In [1]:
import pandas as pd

In [2]:
d1 = pd.DataFrame([['a',1], ['b', 2]], columns=['col1','number'])
d2 = pd.DataFrame([['c',3,'lion'],['d', 4, 'tiger']], columns=['letter', 'number', 'animal'])

In [3]:
d1

Unnamed: 0,col1,number
0,a,1
1,b,2


In [4]:
d2

Unnamed: 0,letter,number,animal
0,c,3,lion
1,d,4,tiger


In [5]:
pd.concat([d1,d2], axis=0)

Unnamed: 0,col1,number,letter,animal
0,a,1,,
1,b,2,,
0,,3,c,lion
1,,4,d,tiger


In [7]:
pd.concat([d1,d2], axis=0, ignore_index=True)

Unnamed: 0,col1,number,letter,animal
0,a,1,,
1,b,2,,
2,,3,c,lion
3,,4,d,tiger


In [8]:
pd.concat([d1,d2], axis=1)

Unnamed: 0,col1,number,letter,number.1,animal
0,a,1,c,3,lion
1,b,2,d,4,tiger


### Group by

In [9]:
df = pd.read_csv('weather_data.csv')

In [10]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


In [11]:
df_group = df.groupby('event')

In [12]:
df_group

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x115f15040>

In [13]:
for temp in df_group:
    print(temp)

('0',         day  temperature  windspeed event
3  1/4/2017       -99999          7     0
6  1/6/2017           34          5     0)
('Rain',         day  temperature  windspeed event
0  1/1/2017           32          6  Rain
4  1/5/2017           32     -99999  Rain)
('Snow',         day  temperature  windspeed event
2  1/3/2017           28     -99999  Snow)
('Sunny',         day  temperature  windspeed  event
1  1/2/2017       -99999          7  Sunny
5  1/6/2017           31          2  Sunny)


In [14]:
df_group.get_group('Rain')

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
4,1/5/2017,32,-99999,Rain


In [15]:
df_group.describe()

Unnamed: 0_level_0,temperature,temperature,temperature,temperature,temperature,temperature,temperature,temperature,windspeed,windspeed,windspeed,windspeed,windspeed,windspeed,windspeed,windspeed
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
event,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
0,2.0,-49982.5,70734.012642,-99999.0,-74990.75,-49982.5,-24974.25,34.0,2.0,6.0,1.414214,5.0,5.5,6.0,6.5,7.0
Rain,2.0,32.0,0.0,32.0,32.0,32.0,32.0,32.0,2.0,-49996.5,70714.213653,-99999.0,-74997.75,-49996.5,-24995.25,6.0
Snow,1.0,28.0,,28.0,28.0,28.0,28.0,28.0,1.0,-99999.0,,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0
Sunny,2.0,-49984.0,70731.891322,-99999.0,-74991.5,-49984.0,-24976.5,31.0,2.0,4.5,3.535534,2.0,3.25,4.5,5.75,7.0


In [17]:
def hot_temp(x):
    return x>30;

In [18]:
df['hot_temp'] = df['temperature'].apply(hot_temp)

In [19]:
df

Unnamed: 0,day,temperature,windspeed,event,hot_temp
0,1/1/2017,32,6,Rain,True
1,1/2/2017,-99999,7,Sunny,False
2,1/3/2017,28,-99999,Snow,False
3,1/4/2017,-99999,7,0,False
4,1/5/2017,32,-99999,Rain,True
5,1/6/2017,31,2,Sunny,True
6,1/6/2017,34,5,0,True


In [27]:
df['hot_temp'] = df['temperature'].apply(lambda x: x>30)

In [28]:
df

Unnamed: 0,day,temperature,windspeed,event,hot_temp
0,1/1/2017,32,6,Rain,True
1,1/2/2017,-99999,7,Sunny,False
2,1/3/2017,28,-99999,Snow,False
3,1/4/2017,-99999,7,0,False
4,1/5/2017,32,-99999,Rain,True
5,1/6/2017,31,2,Sunny,True
6,1/6/2017,34,5,0,True


### Merging 2

In [29]:
pd.concat([d1,d2], axis=1)

Unnamed: 0,col1,number,letter,number.1,animal
0,a,1,c,3,lion
1,b,2,d,4,tiger


In [30]:
d1 = pd.DataFrame({
    'city':['lucknow', 'kanpur', 'agra', 'delhi'],
    'temperature':[32,45,30,40]
})

In [31]:
d1

Unnamed: 0,city,temperature
0,lucknow,32
1,kanpur,45
2,agra,30
3,delhi,40


In [32]:
d2 = pd.DataFrame({
    'city':['kanpur','lucknow', 'delhi'],
    'humidity':[68,65,75]
})

In [33]:
d2

Unnamed: 0,city,humidity
0,kanpur,68
1,lucknow,65
2,delhi,75


In [35]:
df = pd.merge(d1,d2, on='city')

In [36]:
df

Unnamed: 0,city,temperature,humidity
0,lucknow,32,65
1,kanpur,45,68
2,delhi,40,75


In [37]:
pd.merge(d1,d2, on='city', how='outer')

Unnamed: 0,city,temperature,humidity
0,lucknow,32,65.0
1,kanpur,45,68.0
2,agra,30,
3,delhi,40,75.0


In [38]:
pd.merge(d1,d2, on='city', how='left')

Unnamed: 0,city,temperature,humidity
0,lucknow,32,65.0
1,kanpur,45,68.0
2,agra,30,
3,delhi,40,75.0
