In [1]:
import pandas as pd
import numpy as np

In [2]:
india_weather = pd.DataFrame({'city' : ['mumbai','chennai','delhi','lucknow'],
                  'temperature' : [45,55,38,56],
                  'humidity' : [70,80,25,30]})

usa_weather = pd.DataFrame({'city' : ['new york','los angeles','las vegas','washington dc'],
                  'temperature' : [25,18,26,17],
                  'humidity' : [45,34,62,49]})

england_weather = pd.DataFrame({'city' : ['london','bermingham','whales'],
                  'temperature' : [13,None,24],
                  'Population (in 10k)' : [12,6,8]})

In [3]:
india_weather

Unnamed: 0,city,temperature,humidity
0,mumbai,45,70
1,chennai,55,80
2,delhi,38,25
3,lucknow,56,30


In [4]:
usa_weather

Unnamed: 0,city,temperature,humidity
0,new york,25,45
1,los angeles,18,34
2,las vegas,26,62
3,washington dc,17,49


In [5]:
england_weather

Unnamed: 0,city,temperature,Population (in 10k)
0,london,13.0,12
1,bermingham,,6
2,whales,24.0,8


# Concatination (default - Row wise)

In [6]:
# Concating india_weather and usa_weather data row wise

mix_weather = pd.concat([india_weather,usa_weather])
mix_weather

Unnamed: 0,city,temperature,humidity
0,mumbai,45,70
1,chennai,55,80
2,delhi,38,25
3,lucknow,56,30
0,new york,25,45
1,los angeles,18,34
2,las vegas,26,62
3,washington dc,17,49


## ignore_index attribute
- instead of concatinating indexes of the dataframes involved, it creates a new and continuous index

In [7]:
#if we want to make index value continuous and overrite the original

mix_weather = pd.concat([india_weather,usa_weather] , ignore_index = True)
mix_weather

Unnamed: 0,city,temperature,humidity
0,mumbai,45,70
1,chennai,55,80
2,delhi,38,25
3,lucknow,56,30
4,new york,25,45
5,los angeles,18,34
6,las vegas,26,62
7,washington dc,17,49


## keys attribute
- makes hirarchical index

In [8]:
# if we want to make keys to mark from which dataframe they belong...

mix_weather = pd.concat([india_weather,usa_weather],keys = ['india','usa'])
mix_weather

Unnamed: 0,Unnamed: 1,city,temperature,humidity
india,0,mumbai,45,70
india,1,chennai,55,80
india,2,delhi,38,25
india,3,lucknow,56,30
usa,0,new york,25,45
usa,1,los angeles,18,34
usa,2,las vegas,26,62
usa,3,washington dc,17,49


In [28]:
# using .loc[ ] method

mix_weather.loc['india']

Unnamed: 0,city,temperature,humidity
0,mumbai,45,70
1,chennai,55,80
2,delhi,38,25
3,lucknow,56,30


In [27]:
mix_weather.loc['india'].loc[::2]

Unnamed: 0,city,temperature,humidity
0,mumbai,45,70
2,delhi,38,25


## presence of mismatch columns in row-wise concatination

In [10]:
# if there are extra columns in some dataframes, it will concat it but will show NAN for those who dont have that column

mix_weather2 = pd.concat([india_weather,usa_weather,england_weather])
mix_weather2

Unnamed: 0,city,temperature,humidity,Population (in 10k)
0,mumbai,45.0,70.0,
1,chennai,55.0,80.0,
2,delhi,38.0,25.0,
3,lucknow,56.0,30.0,
0,new york,25.0,45.0,
1,los angeles,18.0,34.0,
2,las vegas,26.0,62.0,
3,washington dc,17.0,49.0,
0,london,13.0,,12.0
1,bermingham,,,6.0


## axis parameter
   - axis = 0 / row wise concatination (default)
   - axis = 1 / column wise concatination

In [11]:
usa_temperature = pd.DataFrame({'city' : ['new york','los angeles','las vegas','washington dc'],
                  'temperature' : [25,18,26,17]})

usa_humidity = pd.DataFrame({'city' : ['new york','los angeles','las vegas','washington dc'],
                  'humidity' : [45,34,62,49]})

usa_population = pd.DataFrame({'city' : ['new york','los angeles','las vegas','washington dc'],
                               'population (in 10k)' : [65,32,25,54]})

In [12]:
usa_temperature

Unnamed: 0,city,temperature
0,new york,25
1,los angeles,18
2,las vegas,26
3,washington dc,17


In [13]:
usa_humidity

Unnamed: 0,city,humidity
0,new york,45
1,los angeles,34
2,las vegas,62
3,washington dc,49


In [14]:
usa_population

Unnamed: 0,city,population (in 10k)
0,new york,65
1,los angeles,32
2,las vegas,25
3,washington dc,54


### Attempt 1

In [15]:
usa_weather_data = pd.concat([usa_temperature,usa_humidity,usa_population],axis = 1)
usa_weather_data

Unnamed: 0,city,temperature,city.1,humidity,city.2,population (in 10k)
0,new york,25,new york,45,new york,65
1,los angeles,18,los angeles,34,los angeles,32
2,las vegas,26,las vegas,62,las vegas,25
3,washington dc,17,washington dc,49,washington dc,54


### Attempt 2
- making city appear only once

In [16]:
# we will not concat the entire dataframe, rather we will concat just a column (series)

usa_weather_data = pd.concat([usa_temperature,usa_humidity.humidity,usa_population['population (in 10k)']],axis = 1)
usa_weather_data

Unnamed: 0,city,temperature,humidity,population (in 10k)
0,new york,25,45,65
1,los angeles,18,34,32
2,las vegas,26,62,25
3,washington dc,17,49,54


### If the cities in different dataframes are not in order
- pass index parameter while creating the dataframe

In [17]:
usa_temperature = pd.DataFrame({'city' : ['new york','los angeles','las vegas','washington dc'],
                  'temperature' : [25,18,26,17]})

usa_humidity = pd.DataFrame({'city' : ['los angeles','las vegas','new york','washington dc'],
                  'humidity' : [45,34,62,49]},index = [1,2,0,3])

usa_population = pd.DataFrame({'city' : ['new york','washington dc','las vegas','los angeles'],
                               'population (in 10k)' : [65,32,25,54]},index = [0,3,2,1])

In [18]:
usa_temperature

Unnamed: 0,city,temperature
0,new york,25
1,los angeles,18
2,las vegas,26
3,washington dc,17


In [19]:
usa_humidity

Unnamed: 0,city,humidity
1,los angeles,45
2,las vegas,34
0,new york,62
3,washington dc,49


In [20]:
usa_population

Unnamed: 0,city,population (in 10k)
0,new york,65
3,washington dc,32
2,las vegas,25
1,los angeles,54


In [21]:
usa_weather_data = pd.concat([usa_temperature,usa_humidity.humidity,usa_population['population (in 10k)']],axis = 1)
usa_weather_data

Unnamed: 0,city,temperature,humidity,population (in 10k)
0,new york,25,62,65
1,los angeles,18,45,54
2,las vegas,26,34,25
3,washington dc,17,49,32


## join attribute
- join = 'inner' // (intersection data between the dataframes)
- join = 'outer' // Union (dafault)

In [34]:
# default , same as join = 'outer'

mix_weather2 = pd.concat([india_weather,usa_weather,england_weather], join = 'outer')
mix_weather2

Unnamed: 0,city,temperature,humidity,Population (in 10k)
0,mumbai,45.0,70.0,
1,chennai,55.0,80.0,
2,delhi,38.0,25.0,
3,lucknow,56.0,30.0,
0,new york,25.0,45.0,
1,los angeles,18.0,34.0,
2,las vegas,26.0,62.0,
3,washington dc,17.0,49.0,
0,london,13.0,,12.0
1,bermingham,,,6.0


In [37]:
# join = 'inner' 

mix_weather2 = pd.concat([india_weather,usa_weather,england_weather], join = 'inner')
mix_weather2

Unnamed: 0,city,temperature
0,mumbai,45.0
1,chennai,55.0
2,delhi,38.0
3,lucknow,56.0
0,new york,25.0
1,los angeles,18.0
2,las vegas,26.0
3,washington dc,17.0
0,london,13.0
1,bermingham,
