In [3]:
import pandas as pd

texas_weather = pd.DataFrame({
    'city': ['austin', 'dallas', 'houston'],
    'temperature': [64, 62, 65],
    'humidity': [36, 40, 41]
})

california_weather = pd.DataFrame({
    'city': ['san diego', 'los angeles', 'san francisco'],
    'temperature': [72, 74, 71],
    'humidity': [16, 23, 22]
})

In [4]:
texas_weather

Unnamed: 0,city,temperature,humidity
0,austin,64,36
1,dallas,62,40
2,houston,65,41


In [5]:
california_weather

Unnamed: 0,city,temperature,humidity
0,san diego,72,16
1,los angeles,74,23
2,san francisco,71,22


In [11]:
df = pd.concat([texas_weather, california_weather], ignore_index = True)
# If ignore_index=True, do not use the index values along the concatenation axis. 
# The resulting axis will be labeled 0, …, n - 1

df

Unnamed: 0,city,temperature,humidity
0,austin,64,36
1,dallas,62,40
2,houston,65,41
3,san diego,72,16
4,los angeles,74,23
5,san francisco,71,22


In [20]:
df = pd.concat([texas_weather, california_weather], keys=['texas', 'california'])
# keys provides labels you can use to loc your original data frames
df

Unnamed: 0,Unnamed: 1,city,temperature,humidity
texas,0,austin,64,36
texas,1,dallas,62,40
texas,2,houston,65,41
california,0,san diego,72,16
california,1,los angeles,74,23
california,2,san francisco,71,22


In [21]:
# Notice the additional index?
df.loc['texas']

Unnamed: 0,city,temperature,humidity
0,austin,64,36
1,dallas,62,40
2,houston,65,41


In [36]:
# What if you want to concatenate dataframes horizontally?
temp1 = pd.DataFrame({
    'city': ['austin', 'dallas', 'houston'],
    'temperature': [72, 74, 71],
    'humidity': [16, 23, 22]
}, index=[0,1,2])

temp2 = pd.DataFrame({
    'city': ['austin', 'dallas', 'houston'],
    'event': ['rain', 'sunny', 'sunny'],
}, index=[0,1,2])

In [37]:
temp1

Unnamed: 0,city,temperature,humidity
0,austin,72,16
1,dallas,74,23
2,houston,71,22


In [38]:
temp2

Unnamed: 0,city,event
0,austin,rain
1,dallas,sunny
2,houston,sunny


In [35]:
df = pd.concat([temp1, temp2], axis = 1)
df

Unnamed: 0,city,temperature,humidity,city.1,event
0,austin,72,16,austin,rain
1,dallas,74,23,dallas,sunny
2,houston,71,22,houston,sunny


In [39]:
temp1

Unnamed: 0,city,temperature,humidity
0,austin,72,16
1,dallas,74,23
2,houston,71,22


In [43]:
# you can concat series to dataframes, too
s = pd.Series(['Sunny','Rainy','Sunny'], name='event')
df = pd.concat([temp1, s], axis = 1)
df

Unnamed: 0,city,temperature,humidity,event
0,austin,72,16,Sunny
1,dallas,74,23,Rainy
2,houston,71,22,Sunny


In [59]:
# When concatenating, you need to mindful of how your columns are ordered and indexes
# Sometimes, it's better to use merge

# Example 1
temp1 = pd.DataFrame({
    'city': ['austin', 'dallas', 'houston'],
    'temperature': [72, 74, 71],
    'humidity': [16, 23, 22]
})

temp2 = pd.DataFrame({
    'city': ['austin', 'dallas', 'houston'],
    'event': ['rain', 'sunny', 'sunny'],
})

In [57]:
temp1

Unnamed: 0,city,temperature,humidity
0,austin,72,16
1,dallas,74,23
2,houston,71,22


In [54]:
temp2

Unnamed: 0,city,event
0,austin,rain
1,dallas,sunny
2,houston,sunny


In [58]:
pd.merge(temp1, temp2, on='city')  # you specify which column to join on

Unnamed: 0,city,temperature,humidity,event
0,austin,72,16,rain
1,dallas,74,23,sunny
2,houston,71,22,sunny


In [60]:
# Merging - Example 2 (notice both dataframes don't have the same amount of rows

temp1 = pd.DataFrame({
    'city': ['austin', 'dallas', 'houston', 'san antonio'],
    'temperature': [72, 74, 71, 72],
    'humidity': [16, 23, 22, 24]
})

temp2 = pd.DataFrame({
    'city': ['austin', 'dallas', 'houston'],
    'event': ['rain', 'sunny', 'sunny'],
})

In [61]:
temp1

Unnamed: 0,city,temperature,humidity
0,austin,72,16
1,dallas,74,23
2,houston,71,22
3,san antonio,72,24


In [62]:
temp2

Unnamed: 0,city,event
0,austin,rain
1,dallas,sunny
2,houston,sunny


In [73]:
df = pd.merge(temp1, temp2, on='city', how='outer', indicator=True) 
df

# the how specifies what kind of join you want to accomplish.
# inner is default
# indicator=True yields a _merge column that tells you which df the row came from

Unnamed: 0,city,temperature,humidity,event,_merge
0,austin,72,16,rain,both
1,dallas,74,23,sunny,both
2,houston,71,22,sunny,both
3,san antonio,72,24,,left_only


In [78]:
# repeated columns are handled by merge
# names are in format "col_name_x" or "col_name_y", but you can change the suffix 
# you can use the suffixes arg to override the names 
temp1 = pd.DataFrame({
    'city': ['austin', 'dallas', 'houston'],
    'event': ['rain1', 'sunny1', 'sunny1'],
})
temp2 = pd.DataFrame({
    'city': ['austin', 'dallas', 'houston'],
    'event': ['rain2', 'sunny2', 'sunny2'],
})

df = pd.merge(temp1, temp2, on='city', suffixes=('_left','_right'))
df

Unnamed: 0,city,event_left,event_right
0,austin,rain1,rain2
1,dallas,sunny1,sunny2
2,houston,sunny1,sunny2
