# Data Frames

DataFrames are the workhorse of pandas and are directly inspirede by the R programming language. We can think of a DataFrame as a bunch of Series objects put together to share the same index. Let's use pandas to explore this topic!

In [1]:
import numpy as np
import pandas as pd
from numpy.random import randn
np.random.seed(101)

In [2]:
weather = pd.DataFrame(randn(5,4),index ="Monday Tuesday Wednesday Thursday Friday".split(),columns='Rainfall Wind Temp Humidity'.split())

In [3]:
weather

Unnamed: 0,Rainfall,Wind,Temp,Humidity
Monday,2.70685,0.628133,0.907969,0.503826
Tuesday,0.651118,-0.319318,-0.848077,0.605965
Wednesday,-2.018168,0.740122,0.528813,-0.589001
Thursday,0.188695,-0.758872,-0.933237,0.955057
Friday,0.190794,1.978757,2.605967,0.683509


In [4]:
weather.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, Monday to Friday
Data columns (total 4 columns):
Rainfall    5 non-null float64
Wind        5 non-null float64
Temp        5 non-null float64
Humidity    5 non-null float64
dtypes: float64(4)
memory usage: 200.0+ bytes


In [5]:
weather.describe()

Unnamed: 0,Rainfall,Wind,Temp,Humidity
count,5.0,5.0,5.0,5.0
mean,0.343858,0.453764,0.452287,0.431871
std,1.681131,1.061385,1.454516,0.594708
min,-2.018168,-0.758872,-0.933237,-0.589001
25%,0.188695,-0.319318,-0.848077,0.503826
50%,0.190794,0.628133,0.528813,0.605965
75%,0.651118,0.740122,0.907969,0.683509
max,2.70685,1.978757,2.605967,0.955057


## Selection and Indexing

In [6]:
weather[['Temp','Rainfall']]

Unnamed: 0,Temp,Rainfall
Monday,0.907969,2.70685
Tuesday,-0.848077,0.651118
Wednesday,0.528813,-2.018168
Thursday,-0.933237,0.188695
Friday,2.605967,0.190794


In [7]:
weather.Temp

Monday       0.907969
Tuesday     -0.848077
Wednesday    0.528813
Thursday    -0.933237
Friday       2.605967
Name: Temp, dtype: float64

In [8]:
weather['Wind Speed']= weather['Rainfall']*weather['Wind']

In [9]:
weather

Unnamed: 0,Rainfall,Wind,Temp,Humidity,Wind Speed
Monday,2.70685,0.628133,0.907969,0.503826,1.700261
Tuesday,0.651118,-0.319318,-0.848077,0.605965,-0.207914
Wednesday,-2.018168,0.740122,0.528813,-0.589001,-1.493691
Thursday,0.188695,-0.758872,-0.933237,0.955057,-0.143196
Friday,0.190794,1.978757,2.605967,0.683509,0.377536


In [10]:
weather['Saturation'] = weather['Humidity']/weather['Temp']

In [11]:
weather

Unnamed: 0,Rainfall,Wind,Temp,Humidity,Wind Speed,Saturation
Monday,2.70685,0.628133,0.907969,0.503826,1.700261,0.554893
Tuesday,0.651118,-0.319318,-0.848077,0.605965,-0.207914,-0.714517
Wednesday,-2.018168,0.740122,0.528813,-0.589001,-1.493691,-1.113815
Thursday,0.188695,-0.758872,-0.933237,0.955057,-0.143196,-1.02338
Friday,0.190794,1.978757,2.605967,0.683509,0.377536,0.262286


In [12]:
weather.loc['Monday']

Rainfall      2.706850
Wind          0.628133
Temp          0.907969
Humidity      0.503826
Wind Speed    1.700261
Saturation    0.554893
Name: Monday, dtype: float64

In [13]:
weather.iloc[0]

Rainfall      2.706850
Wind          0.628133
Temp          0.907969
Humidity      0.503826
Wind Speed    1.700261
Saturation    0.554893
Name: Monday, dtype: float64

In [14]:
weather.iloc[4]

Rainfall      0.190794
Wind          1.978757
Temp          2.605967
Humidity      0.683509
Wind Speed    0.377536
Saturation    0.262286
Name: Friday, dtype: float64

In [15]:
weather.loc['Friday']

Rainfall      0.190794
Wind          1.978757
Temp          2.605967
Humidity      0.683509
Wind Speed    0.377536
Saturation    0.262286
Name: Friday, dtype: float64

In [16]:
weather.drop('Wind',axis=1)

Unnamed: 0,Rainfall,Temp,Humidity,Wind Speed,Saturation
Monday,2.70685,0.907969,0.503826,1.700261,0.554893
Tuesday,0.651118,-0.848077,0.605965,-0.207914,-0.714517
Wednesday,-2.018168,0.528813,-0.589001,-1.493691,-1.113815
Thursday,0.188695,-0.933237,0.955057,-0.143196,-1.02338
Friday,0.190794,2.605967,0.683509,0.377536,0.262286


In [17]:
weather

Unnamed: 0,Rainfall,Wind,Temp,Humidity,Wind Speed,Saturation
Monday,2.70685,0.628133,0.907969,0.503826,1.700261,0.554893
Tuesday,0.651118,-0.319318,-0.848077,0.605965,-0.207914,-0.714517
Wednesday,-2.018168,0.740122,0.528813,-0.589001,-1.493691,-1.113815
Thursday,0.188695,-0.758872,-0.933237,0.955057,-0.143196,-1.02338
Friday,0.190794,1.978757,2.605967,0.683509,0.377536,0.262286


In [18]:
weather.drop('Wind',axis=1,inplace=True)

In [19]:
weather

Unnamed: 0,Rainfall,Temp,Humidity,Wind Speed,Saturation
Monday,2.70685,0.907969,0.503826,1.700261,0.554893
Tuesday,0.651118,-0.848077,0.605965,-0.207914,-0.714517
Wednesday,-2.018168,0.528813,-0.589001,-1.493691,-1.113815
Thursday,0.188695,-0.933237,0.955057,-0.143196,-1.02338
Friday,0.190794,2.605967,0.683509,0.377536,0.262286


In [20]:
weather_without_Friday = weather.drop("Friday")
weather_without_Friday

Unnamed: 0,Rainfall,Temp,Humidity,Wind Speed,Saturation
Monday,2.70685,0.907969,0.503826,1.700261,0.554893
Tuesday,0.651118,-0.848077,0.605965,-0.207914,-0.714517
Wednesday,-2.018168,0.528813,-0.589001,-1.493691,-1.113815
Thursday,0.188695,-0.933237,0.955057,-0.143196,-1.02338


## Conditional Selection

In [21]:
weather>0

Unnamed: 0,Rainfall,Temp,Humidity,Wind Speed,Saturation
Monday,True,True,True,True,True
Tuesday,True,False,True,False,False
Wednesday,False,True,False,False,False
Thursday,True,False,True,False,False
Friday,True,True,True,True,True


In [22]:
weather[weather>0]

Unnamed: 0,Rainfall,Temp,Humidity,Wind Speed,Saturation
Monday,2.70685,0.907969,0.503826,1.700261,0.554893
Tuesday,0.651118,,0.605965,,
Wednesday,,0.528813,,,
Thursday,0.188695,,0.955057,,
Friday,0.190794,2.605967,0.683509,0.377536,0.262286


In [23]:
weather[weather['Temp']>0]

Unnamed: 0,Rainfall,Temp,Humidity,Wind Speed,Saturation
Monday,2.70685,0.907969,0.503826,1.700261,0.554893
Wednesday,-2.018168,0.528813,-0.589001,-1.493691,-1.113815
Friday,0.190794,2.605967,0.683509,0.377536,0.262286


In [25]:
weather[weather['Humidity']>0.3]

Unnamed: 0,Rainfall,Temp,Humidity,Wind Speed,Saturation
Monday,2.70685,0.907969,0.503826,1.700261,0.554893
Tuesday,0.651118,-0.848077,0.605965,-0.207914,-0.714517
Thursday,0.188695,-0.933237,0.955057,-0.143196,-1.02338
Friday,0.190794,2.605967,0.683509,0.377536,0.262286


In [30]:
weather.loc['Wednesday']

Rainfall     -2.018168
Temp          0.528813
Humidity     -0.589001
Wind Speed   -1.493691
Saturation   -1.113815
Name: Wednesday, dtype: float64

In [39]:
weather[weather["Temp"]>0]['Humidity']

Monday       0.503826
Wednesday   -0.589001
Friday       0.683509
Name: Humidity, dtype: float64

In [40]:
weather[weather["Temp"]>0][['Humidity',"Rainfall"]]

Unnamed: 0,Humidity,Rainfall
Monday,0.503826,2.70685
Wednesday,-0.589001,-2.018168
Friday,0.683509,0.190794


In [41]:
weather[(weather["Humidity"]>0) & (weather["Rainfall"]>1)]

Unnamed: 0,Rainfall,Temp,Humidity,Wind Speed,Saturation
Monday,2.70685,0.907969,0.503826,1.700261,0.554893


In [42]:
weather[(weather["Humidity"]>0) | (weather["Rainfall"]>1)]

Unnamed: 0,Rainfall,Temp,Humidity,Wind Speed,Saturation
Monday,2.70685,0.907969,0.503826,1.700261,0.554893
Tuesday,0.651118,-0.848077,0.605965,-0.207914,-0.714517
Thursday,0.188695,-0.933237,0.955057,-0.143196,-1.02338
Friday,0.190794,2.605967,0.683509,0.377536,0.262286
