In [40]:
import pandas as pd

In [41]:
weather_dict = {
    'day': ['1/1/2022', '1/2/2022', '1/3/2022', '1/4/2022', '1/5/2022', '1/6/2022'],
    'temperature': [32,35,27,29,14,25],
    'windspeed': [6,4,6,7,7,8],
    'event': ['Sunny', 'Rain', 'Sunny', 'Sunny', 'Snow', 'Sunny'], 
    'holiday': [True, False, False, False, False, False]
}

# you can make a df from a list of tuples, too
df = pd.DataFrame(weather_dict)
df

Unnamed: 0,day,temperature,windspeed,event,holiday
0,1/1/2022,32,6,Sunny,True
1,1/2/2022,35,4,Rain,False
2,1/3/2022,27,6,Sunny,False
3,1/4/2022,29,7,Sunny,False
4,1/5/2022,14,7,Snow,False
5,1/6/2022,25,8,Sunny,False


In [42]:
# you can extract values from .shape attribute of the dataframe
rows, cols = df.shape
print(rows, cols)

6 5


In [43]:
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,27.0,6.333333
std,7.293833,1.36626
min,14.0,4.0
25%,25.5,6.0
50%,28.0,6.5
75%,31.25,7.0
max,35.0,8.0


In [44]:
# return rows using slicing
df[2:5]  # 5 is not inclusive

Unnamed: 0,day,temperature,windspeed,event,holiday
2,1/3/2022,27,6,Sunny,False
3,1/4/2022,29,7,Sunny,False
4,1/5/2022,14,7,Snow,False


In [45]:
# this notation also prints everything:
df[:]

Unnamed: 0,day,temperature,windspeed,event,holiday
0,1/1/2022,32,6,Sunny,True
1,1/2/2022,35,4,Rain,False
2,1/3/2022,27,6,Sunny,False
3,1/4/2022,29,7,Sunny,False
4,1/5/2022,14,7,Snow,False
5,1/6/2022,25,8,Sunny,False


In [46]:
# get name of all columns
df.columns

Index(['day', 'temperature', 'windspeed', 'event', 'holiday'], dtype='object')

In [47]:
# Note using [[]] returns a DataFrame, not a series
df[['day', 'temperature']]

Unnamed: 0,day,temperature
0,1/1/2022,32
1,1/2/2022,35
2,1/3/2022,27
3,1/4/2022,29
4,1/5/2022,14
5,1/6/2022,25


In [48]:
df.loc[[0, 1]]  # get rows 0 and 1

Unnamed: 0,day,temperature,windspeed,event,holiday
0,1/1/2022,32,6,Sunny,True
1,1/2/2022,35,4,Rain,False


In [49]:
df.loc[[0, 1], 'temperature'] # get temperature column of rows 0 and 1 

0    32
1    35
Name: temperature, dtype: int64

In [50]:
df.loc[:,['temperature', 'day']]

Unnamed: 0,temperature,day
0,32,1/1/2022
1,35,1/2/2022
2,27,1/3/2022
3,29,1/4/2022
4,14,1/5/2022
5,25,1/6/2022


In [51]:
# get max temperature
df['temperature'].max()

35

In [52]:
# this condition returns a mask.
# when you use it as a filter, only rows that are set to True are returned
df['temperature'] >= 27

0     True
1     True
2     True
3     True
4    False
5    False
Name: temperature, dtype: bool

In [53]:
# "give me the rows in the df where the row's temperature in the df is greater than 27"
filt = df['temperature'] >= 27
df[filt] # same as df[df['temperature'] >= 27]

Unnamed: 0,day,temperature,windspeed,event,holiday
0,1/1/2022,32,6,Sunny,True
1,1/2/2022,35,4,Rain,False
2,1/3/2022,27,6,Sunny,False
3,1/4/2022,29,7,Sunny,False


In [54]:
df[df['temperature'] == df['temperature'].max()]

Unnamed: 0,day,temperature,windspeed,event,holiday
1,1/2/2022,35,4,Rain,False


In [55]:
# remember, to grab multiple columns, you need to use [[]] notation
df[df['temperature'] == df['temperature'].max()][['day', 'temperature', 'holiday']]

Unnamed: 0,day,temperature,holiday
1,1/2/2022,35,False


In [56]:
df.at[1, 'temperature']

35

In [57]:
# don't forget you can submit a custom index.
# for most manipulations, you will need to set inplace=True
df.set_index(['day', 'temperature'], inplace=True)

In [58]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,windspeed,event,holiday
day,temperature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1/1/2022,32,6,Sunny,True
1/2/2022,35,4,Rain,False
1/3/2022,27,6,Sunny,False
1/4/2022,29,7,Sunny,False
1/5/2022,14,7,Snow,False
1/6/2022,25,8,Sunny,False


In [59]:
df.loc['1/1/2022']

Unnamed: 0_level_0,windspeed,event,holiday
temperature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
32,6,Sunny,True


In [60]:
# reset index
df.reset_index(inplace=True)
df

Unnamed: 0,day,temperature,windspeed,event,holiday
0,1/1/2022,32,6,Sunny,True
1,1/2/2022,35,4,Rain,False
2,1/3/2022,27,6,Sunny,False
3,1/4/2022,29,7,Sunny,False
4,1/5/2022,14,7,Snow,False
5,1/6/2022,25,8,Sunny,False


In [61]:
df.values  # returns a numpy representation of df

array([['1/1/2022', 32, 6, 'Sunny', True],
       ['1/2/2022', 35, 4, 'Rain', False],
       ['1/3/2022', 27, 6, 'Sunny', False],
       ['1/4/2022', 29, 7, 'Sunny', False],
       ['1/5/2022', 14, 7, 'Snow', False],
       ['1/6/2022', 25, 8, 'Sunny', False]], dtype=object)

In [62]:
df.keys()

Index(['day', 'temperature', 'windspeed', 'event', 'holiday'], dtype='object')

In [63]:
# This sets series df['day'] to a new series that = old series w/ datetime dtype
# the format arg specifies how to interpret the time to be converted to datetime dtype
df['day'] = pd.to_datetime(df['day'], format='%d/%m/%Y')
df.dtypes

day            datetime64[ns]
temperature             int64
windspeed               int64
event                  object
holiday                  bool
dtype: object

In [66]:
df.set_index('day')

Unnamed: 0_level_0,temperature,windspeed,event,holiday
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-01,32,6,Sunny,True
2022-02-01,35,4,Rain,False
2022-03-01,27,6,Sunny,False
2022-04-01,29,7,Sunny,False
2022-05-01,14,7,Snow,False
2022-06-01,25,8,Sunny,False
