Concat Dataframes

In [1]:
import pandas as pd

In [6]:
ind = pd.DataFrame({'city': ['mumbai', 'delhi', 'blr'],
                    'temperature':[23,43,54],
                    'humidity':[54,65,34]})
ind

Unnamed: 0,city,temperature,humidity
0,mumbai,23,54
1,delhi,43,65
2,blr,54,34


In [8]:
us = pd.DataFrame({'city': ['michigan', 'cali', 'orlando'],
                    'temperature':[36,54,34],
                    'humidity':[34,23,67]})
us

Unnamed: 0,city,temperature,humidity
0,michigan,36,34
1,cali,54,23
2,orlando,34,67


In [19]:
#this will stack one dataframe on top of the other and put them in a singular new df 
df = pd.concat([ind,us])
df
#but this does not optimize the index

Unnamed: 0,city,temperature,humidity
0,mumbai,23,54
1,delhi,43,65
2,blr,54,34
0,michigan,36,34
1,cali,54,23
2,orlando,34,67


In [26]:
#to fix the index issue we use ignore_index
df1= pd.concat([ind,us], ignore_index=True)
df1

Unnamed: 0,city,temperature,humidity
0,mumbai,23,54
1,delhi,43,65
2,blr,54,34
3,michigan,36,34
4,cali,54,23
5,orlando,34,67


In [28]:
#we can also specify keys to make the access easier
df2 = pd.concat([ind,us], keys=['ind','us'])
df2

Unnamed: 0,Unnamed: 1,city,temperature,humidity
ind,0,mumbai,23,54
ind,1,delhi,43,65
ind,2,blr,54,34
us,0,michigan,36,34
us,1,cali,54,23
us,2,orlando,34,67


In [32]:
df2.loc["ind"]

Unnamed: 0,city,temperature,humidity
0,mumbai,23,54
1,delhi,43,65
2,blr,54,34


In [58]:
df.loc[0]

Unnamed: 0,city,temperature,humidity
0,mumbai,23,54
0,michigan,36,34


In [33]:
temp = pd.DataFrame({
    'city': ['mumbai', 'delhi', 'blr'], 
    'temperature': [34, 5,5]
})
temp

Unnamed: 0,city,temperature
0,mumbai,34
1,delhi,5
2,blr,5


In [34]:
windspeed = pd.DataFrame({'city': ['mumbai', 'delhi', 'blr'],
                          'windspeed':[3,545,65] })
windspeed

Unnamed: 0,city,windspeed
0,mumbai,3
1,delhi,545
2,blr,65


In [39]:
#axis=1 is used to stack a dataframe next to another one and make a new df out of it
df3= pd.concat([temp,windspeed],axis=1)
df3

Unnamed: 0,city,temperature,city.1,windspeed
0,mumbai,34,mumbai,3
1,delhi,5,delhi,545
2,blr,5,blr,65


In [None]:
windspeed = pd.DataFrame({'city': ['mumbai', 'delhi', 'blr'],
                          'windspeed':[3,545,65] })
windspeed

In [47]:
#we specify the index as 0,1 here 
new_temp = pd.DataFrame({
    'city': ['mumbai', 'delhi'], 
    'temperature': [34, 5,]
}, index=[0,1])
new_temp

Unnamed: 0,city,temperature
0,mumbai,34
1,delhi,5


In [48]:
#we specify the index as 1,0 here so that the order can match the correlation of the values when we concat in the next step
new_windspeed = pd.DataFrame({'city': ['delhi', 'mumbai'],
                          'windspeed':[3,545] }, index=[1,0])
new_windspeed

Unnamed: 0,city,windspeed
1,delhi,3
0,mumbai,545


In [49]:
#now when we concat, due to the previous steps index alteration, we get the desired result
df4 = pd.concat([new_temp,new_windspeed], axis = 1)
df4

Unnamed: 0,city,temperature,city.1,windspeed
0,mumbai,34,mumbai,545
1,delhi,5,delhi,3


In [50]:
s= pd.Series(['humid','dry'], name = 'event')
s

0    humid
1      dry
Name: event, dtype: object

In [52]:
#we can also concat a series and a dataframe together
df5 = pd.concat([df4, s], axis= 1)
df5

Unnamed: 0,city,temperature,city.1,windspeed,event
0,mumbai,34,mumbai,545,humid
1,delhi,5,delhi,3,dry


In [56]:
df6 = pd.concat([new_temp,s], axis=1)
df6

Unnamed: 0,city,temperature,event
0,mumbai,34,humid
1,delhi,5,dry
