In [1]:
import pandas as pd

df_temp = pd.DataFrame({
    'city' : ['Lagos', 'Edo', 'Abuja', 'Jiagwa', 'Imo', 'Abia', 'Ebonyi', 'Ondo', 'Ogun', 'Adamawa', 'Zamfara'],
    'temp' : [26, 23, 34, 22, 28, 22, 34, 35, 20, 33, 23]
})
df_temp

Unnamed: 0,city,temp
0,Lagos,26
1,Edo,23
2,Abuja,34
3,Jiagwa,22
4,Imo,28
5,Abia,22
6,Ebonyi,34
7,Ondo,35
8,Ogun,20
9,Adamawa,33


In [29]:
df_humid = pd.DataFrame({
    'city' : ['Niger', 'Jos', 'Lagos', 'Edo', 'Abuja', 'Akwa-Ibom', 'Oyo', 'Bauchi', "Zamfara", 'Kano', 'Kaduna', 'Katsina'],
    'humidity' : [67, 92, 74, 76, 87, 90, 77, 71, 69, 88, 90, 79]
})
df_humid

Unnamed: 0,city,humidity
0,Niger,67
1,Jos,92
2,Lagos,74
3,Edo,76
4,Abuja,87
5,Akwa-Ibom,90
6,Oyo,77
7,Bauchi,71
8,Zamfara,69
9,Kano,88


In [30]:
# merge will perform only for those cities that are common in both, which means it by-default performs inner-join(INTERSECTION)
inner_merge = pd.merge(df_temp, df_humid, how = "inner")
inner_merge

Unnamed: 0,city,temp,humidity
0,Lagos,26,74
1,Edo,23,76
2,Abuja,34,87
3,Zamfara,23,69


In [12]:
inner_merge = pd.merge(df_temp, df_humid, how = "inner", indicator = True)
inner_merge

Unnamed: 0,city,temp,humidity,_merge
0,Lagos,26,74,both
1,Edo,23,76,both
2,Abuja,34,87,both


In [31]:
# to perform an outer join(UNION)
outer_merge = pd.merge(df_temp, df_humid, on = 'city', how = "outer", indicator = True)
outer_merge

Unnamed: 0,city,temp,humidity,_merge
0,Lagos,26.0,74.0,both
1,Edo,23.0,76.0,both
2,Abuja,34.0,87.0,both
3,Jiagwa,22.0,,left_only
4,Imo,28.0,,left_only
5,Abia,22.0,,left_only
6,Ebonyi,34.0,,left_only
7,Ondo,35.0,,left_only
8,Ogun,20.0,,left_only
9,Adamawa,33.0,,left_only


In [32]:
# In left outer join, it takes all the rows from left dataframe and only common rows from right dataframe


In [39]:
left_merge = pd.merge(df_temp, df_humid, on = 'city', how = 'left', indicator = True)
left_merge

Unnamed: 0,city,temp,humidity,_merge
0,Lagos,26,74.0,both
1,Edo,23,76.0,both
2,Abuja,34,87.0,both
3,Jiagwa,22,,left_only
4,Imo,28,,left_only
5,Abia,22,,left_only
6,Ebonyi,34,,left_only
7,Ondo,35,,left_only
8,Ogun,20,,left_only
9,Adamawa,33,,left_only


In [40]:
# In Right outer join, it takes all the rows from Right dataframe and only common rows from left dataframe


In [41]:
right_merge = pd.merge(df_temp, df_humid, on = 'city', how = 'right', indicator = True)
right_merge

Unnamed: 0,city,temp,humidity,_merge
0,Niger,,67,right_only
1,Jos,,92,right_only
2,Lagos,26.0,74,both
3,Edo,23.0,76,both
4,Abuja,34.0,87,both
5,Akwa-Ibom,,90,right_only
6,Oyo,,77,right_only
7,Bauchi,,71,right_only
8,Zamfara,23.0,69,both
9,Kano,,88,right_only


In [42]:
## 2. Additional Parameters to `pd.merge()` Method

In [50]:
sw = pd.DataFrame({
    'state': ['Lagos', 'Ogun', 'Ondo', 'Osun', 'Oyo', 'Ekiti'],
    'humidity': [78,92,68,90,88,79,],
    'temperature' : [22, 23, 30, 28, 29, 34]
})
sw

Unnamed: 0,city,humidity,temperature
0,Lagos,78,22
1,Ogun,92,23
2,Ondo,68,30
3,Osun,90,28
4,Oyo,88,29
5,Ekiti,79,34


In [49]:
ss= pd.DataFrame({
    'state': ['Edo', 'Bayelsa', 'Delta', 'Akwa-Ibom', 'Cross-River', 'Rivers'],
    'humidity': [78,92,88,79,81,84],
    'temperature' : [22, 28, 29, 34, 39, 27,]
})
ss

Unnamed: 0,city,humidity,temperature
0,Edo,78,22
1,Bayelsa,92,28
2,Delta,88,29
3,Akwa-Ibom,79,34
4,Cross-River,81,39
5,Rivers,84,27


In [52]:
merge1 = pd.merge(sw, ss, on = 'city', how = 'inner')
merge1

Unnamed: 0,city,humidity_x,temperature_x,humidity_y,temperature_y


In [51]:
merge1 = pd.merge(sw, ss, on = 'city', how = 'inner', suffixes = ('_1st', '_2nd'))
merge1

Unnamed: 0,city,humidity_1st,temperature_1st,humidity_2nd,temperature_2nd


In [60]:
concat = pd.concat([data1, data2], ignore_index = 'True')
concat

Unnamed: 0,city,humidity,temperature
0,Edo,78,22
1,Abia,92,23
2,Bauchi,68,30
3,Benue,90,28
4,Cross-River,88,29
5,Delta,79,34
6,Yobe,81,39
7,Enugu,84,27
8,Kwara,72,26
9,Lagos,85,36


In [65]:
concat = pd.concat([data1, data2], join = 'outer', ignore_index = 'True')
concat

Unnamed: 0,city,humidity,temperature
0,Edo,78,22
1,Abia,92,23
2,Bauchi,68,30
3,Benue,90,28
4,Cross-River,88,29
5,Delta,79,34
6,Yobe,81,39
7,Enugu,84,27
8,Kwara,72,26
9,Lagos,85,36


In [66]:
data1, data2

(           city  humidity  temperature
 0           Edo        78           22
 1          Abia        92           23
 2        Bauchi        68           30
 3         Benue        90           28
 4   Cross-River        88           29
 5         Delta        79           34
 6          Yobe        81           39
 7         Enugu        84           27
 8         Kwara        72           26
 9         Lagos        85           36
 10    Nassarawa        67           35
 11       Jigawa        78           27,
           city  humidity  temperature
 0          Edo        78           22
 1         Abia        92           28
 2       Bauchi        88           29
 3    Akwa-Ibom        79           34
 4  Cross-River        81           39
 5        Delta        84           27
 6      Anambra        72           26
 7       Ebonyi        85           36
 8       Kaduna        67           35
 9        Lagos        78           27)

In [74]:
merge = pd.merge(data1, data2, how = 'outer', on = 'city')
merge

Unnamed: 0,city,humidity_x,temperature_x,humidity_y,temperature_y
0,Edo,78.0,22.0,78.0,22.0
1,Abia,92.0,23.0,92.0,28.0
2,Bauchi,68.0,30.0,88.0,29.0
3,Benue,90.0,28.0,,
4,Cross-River,88.0,29.0,81.0,39.0
5,Delta,79.0,34.0,84.0,27.0
6,Yobe,81.0,39.0,,
7,Enugu,84.0,27.0,,
8,Kwara,72.0,26.0,,
9,Lagos,85.0,36.0,78.0,27.0


In [None]:
# one_to_one or 1:1: checks if merge keys are unique in both left and right dataframes, if not then throw exception
# if the value of EDO TEMP is the same in both dataframes, throw an error

In [75]:
merge = pd.merge(data1, data2, how = 'outer', on = 'city', validate = "one_to_one")
merge

Unnamed: 0,city,humidity_x,temperature_x,humidity_y,temperature_y
0,Edo,78.0,22.0,78.0,22.0
1,Abia,92.0,23.0,92.0,28.0
2,Bauchi,68.0,30.0,88.0,29.0
3,Benue,90.0,28.0,,
4,Cross-River,88.0,29.0,81.0,39.0
5,Delta,79.0,34.0,84.0,27.0
6,Yobe,81.0,39.0,,
7,Enugu,84.0,27.0,,
8,Kwara,72.0,26.0,,
9,Lagos,85.0,36.0,78.0,27.0


In [77]:
# >**`one_to_many` or `1:m`: checks if merge keys are unique in left dataframe, if not then throw exception**


In [11]:
merge = pd.merge(data1, data2, how = 'outer', on = 'city', validate = "one_to_many")
merge

NameError: name 'data1' is not defined

In [4]:
test1 = pd.DataFrame({
    'city' : ['Edo', 'Ogun'],
    'temp' : [23, 34]
})
test1

Unnamed: 0,city,temp
0,Edo,23
1,Ogun,34


In [18]:
# test2 = pd.DataFrame({
#     'city' : ['Lagos', 'Osun'],
#     'temp' : [22, 33]
# })
# test2

In [19]:
test2 = pd.concat([test1, test2], join = 'outer', ignore_index = 'True')
test2

Unnamed: 0,city,temp
0,Edo,23
1,Ogun,34
2,Edo,23
3,Ogun,34
4,Edo,23
5,Ogun,34
6,Lagos,22
7,Osun,33


In [20]:
merge = pd.merge(test1, test2, on = 'city', how = 'outer', validate = 'one_to_one')
merge

MergeError: Merge keys are not unique in right dataset; not a one-to-one merge

In [9]:
merge=pd.merge(test1,test2, on='city', how = 'outer', validate = 'one_to_many')
merge

Unnamed: 0,city,temp_x,temp_y
0,Edo,23.0,23
1,Ogun,34.0,34
2,Lagos,,22
3,Osun,,33


In [21]:

# trying again

In [23]:
import pandas as pd
df1 = pd.DataFrame({
    'city': [ 'Lagos', 'Kano', 'Ogun'],
    'temperature' : [35, 39, 15],
})
df1

Unnamed: 0,city,temperature
0,Lagos,35
1,Kano,39
2,Ogun,15


In [24]:
df2 = pd.DataFrame({
    'city': [ 'Lagos', 'Kano', 'Katsina', 'Lagos'],
    'humidity' : [76, 95, 72, 76],
})
df2

Unnamed: 0,city,humidity
0,Lagos,76
1,Kano,95
2,Katsina,72
3,Lagos,76


In [25]:
df1 = pd.concat([df1, df2] , ignore_index=True)
df1

Unnamed: 0,city,temperature,humidity
0,Lagos,35.0,
1,Kano,39.0,
2,Ogun,15.0,
3,Lagos,,76.0
4,Kano,,95.0
5,Katsina,,72.0
6,Lagos,,76.0


In [26]:
df2 = pd.concat([df1, df2], join='outer', ignore_index=True )
df2

Unnamed: 0,city,temperature,humidity
0,Lagos,35.0,
1,Kano,39.0,
2,Ogun,15.0,
3,Lagos,,76.0
4,Kano,,95.0
5,Katsina,,72.0
6,Lagos,,76.0
7,Lagos,,76.0
8,Kano,,95.0
9,Katsina,,72.0


In [27]:
df1, df2

(      city  temperature  humidity
 0    Lagos         35.0       NaN
 1     Kano         39.0       NaN
 2     Ogun         15.0       NaN
 3    Lagos          NaN      76.0
 4     Kano          NaN      95.0
 5  Katsina          NaN      72.0
 6    Lagos          NaN      76.0,
        city  temperature  humidity
 0     Lagos         35.0       NaN
 1      Kano         39.0       NaN
 2      Ogun         15.0       NaN
 3     Lagos          NaN      76.0
 4      Kano          NaN      95.0
 5   Katsina          NaN      72.0
 6     Lagos          NaN      76.0
 7     Lagos          NaN      76.0
 8      Kano          NaN      95.0
 9   Katsina          NaN      72.0
 10    Lagos          NaN      76.0)

In [29]:
# pd.merge(df1, df2, on='city', how='outer', validate='one_to_one')

In [30]:
# >**`one_to_many` or `1:m`: checks if merge keys are unique in left dataframe, if not then throw exception**

In [32]:
# pd.merge(df1, df2, on='city', how='outer', validate='one_to_many')

In [33]:
data1

Unnamed: 0,city,humidity,temperature
0,Edo,78,22
1,Abia,92,23
2,Bauchi,68,30
3,Benue,90,28
4,Cross-River,88,29
5,Delta,79,34
6,Yobe,81,39
7,Enugu,84,27
8,Kwara,72,26
9,Lagos,85,36


In [34]:
data2

Unnamed: 0,city,humidity,temperature
0,Edo,78,22
1,Abia,92,28
2,Bauchi,88,29
3,Akwa-Ibom,79,34
4,Cross-River,81,39
5,Delta,84,27
6,Anambra,72,26
7,Ebonyi,85,36
8,Kaduna,67,35
9,Lagos,78,27


In [39]:
concat1 = pd.concat([data1, data2], join = 'outer', ignore_index = True)
concat1

Unnamed: 0,city,humidity,temperature
0,Edo,78,22
1,Abia,92,23
2,Bauchi,68,30
3,Benue,90,28
4,Cross-River,88,29
5,Delta,79,34
6,Yobe,81,39
7,Enugu,84,27
8,Kwara,72,26
9,Lagos,85,36


In [41]:
# Concatenate Dataframes (row-wise)

In [42]:
concat_row = pd.concat([data1, data2], axis = 0)
concat_row

Unnamed: 0,city,humidity,temperature
0,Edo,78,22
1,Abia,92,23
2,Bauchi,68,30
3,Benue,90,28
4,Cross-River,88,29
5,Delta,79,34
6,Yobe,81,39
7,Enugu,84,27
8,Kwara,72,26
9,Lagos,85,36


In [None]:
# Notice the index is also concatenated as such
# To handle this pass ignore_index parameter a value of True, so that the resulting axis is be labeled 0, …, n - 1.
# Useful if you are concatenating objects where the concatenation axis does not have meaningful indexing information.
# Note the index values on the other axes (i.e., columns) have still respected in the join.
# Other than the numeric index, if you want to have an additional index for your sub groups, you can use the keys argument to pd.concat() method
# It provides multi-indexing
# Remember this will work only if the ignore_index argument is False which is the default

In [46]:
concat_row = pd.concat([data1, data2], axis = 0, keys=['state', 'city'])
concat_row

Unnamed: 0,Unnamed: 1,city,humidity,temperature
state,0,Edo,78,22
state,1,Abia,92,23
state,2,Bauchi,68,30
state,3,Benue,90,28
state,4,Cross-River,88,29
state,5,Delta,79,34
state,6,Yobe,81,39
state,7,Enugu,84,27
state,8,Kwara,72,26
state,9,Lagos,85,36


In [None]:
# The advantage of doing this is you can use df.loc to get a subset of your dataframe
# So, after getting a big dataframe if you want to get the dataframe from which it was created keys arg is useful

In [54]:
merge = pd.concat([sw, ss], axis = 0, keys=['South-West', 'South-South'])
merge

Unnamed: 0,Unnamed: 1,city,humidity,temperature
South-West,0,Lagos,78,22
South-West,1,Ogun,92,23
South-West,2,Ondo,68,30
South-West,3,Osun,90,28
South-West,4,Oyo,88,29
South-West,5,Ekiti,79,34
South-South,0,Edo,78,22
South-South,1,Bayelsa,92,28
South-South,2,Delta,88,29
South-South,3,Akwa-Ibom,79,34


In [57]:
merge.loc['South-South', :]

Unnamed: 0,city,humidity,temperature
0,Edo,78,22
1,Bayelsa,92,28
2,Delta,88,29
3,Akwa-Ibom,79,34
4,Cross-River,81,39
5,Rivers,84,27


In [58]:
merge.loc['South-West']

Unnamed: 0,city,humidity,temperature
0,Lagos,78,22
1,Ogun,92,23
2,Ondo,68,30
3,Osun,90,28
4,Oyo,88,29
5,Ekiti,79,34


In [None]:
# Concatenate Dataframes (column-wise)

In [43]:
concat_col = pd.concat([data1, data2], axis = 1)
concat_col

Unnamed: 0,city,humidity,temperature,city.1,humidity.1,temperature.1
0,Edo,78,22,Edo,78.0,22.0
1,Abia,92,23,Abia,92.0,28.0
2,Bauchi,68,30,Bauchi,88.0,29.0
3,Benue,90,28,Akwa-Ibom,79.0,34.0
4,Cross-River,88,29,Cross-River,81.0,39.0
5,Delta,79,34,Delta,84.0,27.0
6,Yobe,81,39,Anambra,72.0,26.0
7,Enugu,84,27,Ebonyi,85.0,36.0
8,Kwara,72,26,Kaduna,67.0,35.0
9,Lagos,85,36,Lagos,78.0,27.0


In [44]:
concat_col = pd.concat([data2, data1], axis = 1)
concat_col

Unnamed: 0,city,humidity,temperature,city.1,humidity.1,temperature.1
0,Edo,78.0,22.0,Edo,78,22
1,Abia,92.0,28.0,Abia,92,23
2,Bauchi,88.0,29.0,Bauchi,68,30
3,Akwa-Ibom,79.0,34.0,Benue,90,28
4,Cross-River,81.0,39.0,Cross-River,88,29
5,Delta,84.0,27.0,Delta,79,34
6,Anambra,72.0,26.0,Yobe,81,39
7,Ebonyi,85.0,36.0,Enugu,84,27
8,Kaduna,67.0,35.0,Kwara,72,26
9,Lagos,78.0,27.0,Lagos,85,36


In [59]:
temp_df = pd.DataFrame({
    'city': [ 'Lahore', 'Karachi', 'Peshawer', 'Islamabad', 'Muree'],
    'temperature' : [35, 39, 33, 29, 15],
})
temp_df

Unnamed: 0,city,temperature
0,Lahore,35
1,Karachi,39
2,Peshawer,33
3,Islamabad,29
4,Muree,15


In [60]:
wind_df = pd.DataFrame({
    'city': [ 'Lahore', 'Karachi', 'Peshawer', 'Islamabad', 'Muree'],
    'wind speed' : [9, 12, 7, 13, 18],
})
wind_df

Unnamed: 0,city,wind speed
0,Lahore,9
1,Karachi,12
2,Peshawer,7
3,Islamabad,13
4,Muree,18


In [61]:
# We have to use the argument axis=1
df = pd.concat([temp_df,wind_df], axis=1)
df

Unnamed: 0,city,temperature,city.1,wind speed
0,Lahore,35,Lahore,9
1,Karachi,39,Karachi,12
2,Peshawer,33,Peshawer,7
3,Islamabad,29,Islamabad,13
4,Muree,15,Muree,18


In [62]:
# c. What will happen if we have missing data in our dataframes

In [63]:
# This dataframe do not have the temperature for Lahore
temp_df = pd.DataFrame({
    'city': [ 'Karachi', 'Peshawer', 'Islamabad', 'Muree'],
    'temperature' : [39, 33, 29, 15],
})
temp_df

Unnamed: 0,city,temperature
0,Karachi,39
1,Peshawer,33
2,Islamabad,29
3,Muree,15


In [64]:
#This dataframe do not have the windspeed of Islamabad
wind_df = pd.DataFrame({
    'city': [ 'Lahore', 'Karachi', 'Peshawer', 'Muree'],
    'wind speed' : [9, 12, 7, 18],
})
wind_df

Unnamed: 0,city,wind speed
0,Lahore,9
1,Karachi,12
2,Peshawer,7
3,Muree,18


In [65]:
df1 = pd.concat([temp_df,wind_df], axis=1)
df1

Unnamed: 0,city,temperature,city.1,wind speed
0,Karachi,39,Lahore,9
1,Peshawer,33,Karachi,12
2,Islamabad,29,Peshawer,7
3,Muree,15,Muree,18


In [None]:
# This doesnot look correct

# We have missing data in the resulting dataframe, i.e., 
# it does not contain record for Lahore, which was there in the second dataframe but not in the first
# Solution is while creating the dataframe you pass it the index
# In Pandas, while creating a DataFrame, you can pass the index argument with appropriate related indices, which is a way to align rows from different dataframes

In [67]:

#### Note the indexes in above two dataframes match. Now concatenation will be OK


In [68]:
df = pd.concat([temp_df,wind_df], axis=1)
df

Unnamed: 0,city,temperature,city.1,wind speed
0,Karachi,39,Lahore,9
1,Peshawer,33,Karachi,12
2,Islamabad,29,Peshawer,7
3,Muree,15,Muree,18
