# Chapter 10: Merging, joining, and concatenating

## 10.1 Introducing the data sets

In [1]:
import pandas as pd

In [2]:
groups1 = pd.read_csv("data/ch10/meetup/groups1.csv")
groups1.head()

Unnamed: 0,group_id,name,category_id,city_id
0,6388,Alternative Health NYC,14,10001
1,6510,Alternative Energy Meetup,4,10001
2,8458,NYC Animal Rights,26,10001
3,8940,The New York City Anime Group,29,10001
4,10104,NYC Pit Bull Group,26,10001


In [3]:
groups2 = pd.read_csv("data/ch10/meetup/groups2.csv")
groups2.head()

Unnamed: 0,group_id,name,category_id,city_id
0,18879327,BachataMania,5,10001
1,18880221,Photoshoot Chicago - Photography and Modeling ...,27,60601
2,18880426,Chicago Adult Push / Kick Scooter Group Riding...,31,60601
3,18880495,Chicago International Soccer Club,32,60601
4,18880695,Impact.tech San Francisco Meetup,2,94101


In [4]:
categories = pd.read_csv("data/ch10/meetup/categories.csv")
categories.head()

Unnamed: 0,category_id,category_name
0,1,Arts & Culture
1,3,Cars & Motorcycles
2,4,Community & Environment
3,5,Dancing
4,6,Education & Learning


In [5]:
pd.read_csv("data/ch10/meetup/cities.csv").head()

Unnamed: 0,id,city,state,zip
0,7093,West New York,NJ,7093
1,10001,New York,NY,10001
2,13417,New York Mills,NY,13417
3,46312,East Chicago,IN,46312
4,56567,New York Mills,MN,56567


In [6]:
cities = pd.read_csv(
    "data/ch10/meetup/cities.csv", dtype={"zip": "string"}
)

cities.head()

Unnamed: 0,id,city,state,zip
0,7093,West New York,NJ,7093
1,10001,New York,NY,10001
2,13417,New York Mills,NY,13417
3,46312,East Chicago,IN,46312
4,56567,New York Mills,MN,56567


## 10.2 Concatenating data sets

In [7]:
pd.concat(objs=[groups1, groups2])

Unnamed: 0,group_id,name,category_id,city_id
0,6388,Alternative Health NYC,14,10001
1,6510,Alternative Energy Meetup,4,10001
2,8458,NYC Animal Rights,26,10001
3,8940,The New York City Anime Group,29,10001
4,10104,NYC Pit Bull Group,26,10001
...,...,...,...,...
8326,26377464,Shinect,34,94101
8327,26377698,The art of getting what you want [conference s...,14,94101
8328,26378067,Streeterville Running Group,9,60601
8329,26378128,Just Dance NYC,23,10001


In [8]:
len(groups1)

7999

In [9]:
len(groups2)

8331

In [10]:
len(groups1) + len(groups2)

16330

In [11]:
pd.concat(objs=[groups1, groups2], ignore_index=True)

Unnamed: 0,group_id,name,category_id,city_id
0,6388,Alternative Health NYC,14,10001
1,6510,Alternative Energy Meetup,4,10001
2,8458,NYC Animal Rights,26,10001
3,8940,The New York City Anime Group,29,10001
4,10104,NYC Pit Bull Group,26,10001
...,...,...,...,...
16325,26377464,Shinect,34,94101
16326,26377698,The art of getting what you want [conference s...,14,94101
16327,26378067,Streeterville Running Group,9,60601
16328,26378128,Just Dance NYC,23,10001


In [12]:
pd.concat(objs=[groups1, groups2], keys=["G1", "G2"])

Unnamed: 0,Unnamed: 1,group_id,name,category_id,city_id
G1,0,6388,Alternative Health NYC,14,10001
G1,1,6510,Alternative Energy Meetup,4,10001
G1,2,8458,NYC Animal Rights,26,10001
G1,3,8940,The New York City Anime Group,29,10001
G1,4,10104,NYC Pit Bull Group,26,10001
...,...,...,...,...,...
G2,8326,26377464,Shinect,34,94101
G2,8327,26377698,The art of getting what you want [conference s...,14,94101
G2,8328,26378067,Streeterville Running Group,9,60601
G2,8329,26378128,Just Dance NYC,23,10001


In [13]:
groups = pd.concat(objs=[groups1, groups2], ignore_index=True)

## 10.3 Missing values in concatenated DataFrames

In [14]:
sports_champions_A = pd.DataFrame(
    data=[
        ["New England Patriots", "Huston Astros"],
        ["Philadellphia Eagles", "Boston Red Sox"]
    ],
    columns=["Football", "Baseball"],
    index=[2017, 2018]
)

sports_champions_A

Unnamed: 0,Football,Baseball
2017,New England Patriots,Huston Astros
2018,Philadellphia Eagles,Boston Red Sox


In [15]:
sports_champions_B = pd.DataFrame(
    data=[
        ["New England Patriots", "St. Louis Blues"],
        ["Kansas City Chiefs", "Tampa Bay Lightning"]
    ],
    columns=["Football", "Hockey"],
    index=[2019, 2020]
)

sports_champions_B

Unnamed: 0,Football,Hockey
2019,New England Patriots,St. Louis Blues
2020,Kansas City Chiefs,Tampa Bay Lightning


In [16]:
pd.concat(objs=[sports_champions_A, sports_champions_B])

Unnamed: 0,Football,Baseball,Hockey
2017,New England Patriots,Huston Astros,
2018,Philadellphia Eagles,Boston Red Sox,
2019,New England Patriots,,St. Louis Blues
2020,Kansas City Chiefs,,Tampa Bay Lightning


In [17]:
sports_champions_C = pd.DataFrame(
    data=[
        ["Piisttsburgh Penguins", "Golden State Worriors"],
        ["Washington Capitals", "Golden State Worriors"]
    ],
    columns=["Hockey", "Basketball"],
    index=[2017, 2018]
)

sports_champions_C

Unnamed: 0,Hockey,Basketball
2017,Piisttsburgh Penguins,Golden State Worriors
2018,Washington Capitals,Golden State Worriors


In [18]:
pd.concat(objs=[sports_champions_A, sports_champions_C])

Unnamed: 0,Football,Baseball,Hockey,Basketball
2017,New England Patriots,Huston Astros,,
2018,Philadellphia Eagles,Boston Red Sox,,
2017,,,Piisttsburgh Penguins,Golden State Worriors
2018,,,Washington Capitals,Golden State Worriors


In [19]:
# The two lines below are equivalent
pd.concat(
    objs=[sports_champions_A, sports_champions_C],
    axis=1
)

pd.concat(
    objs=[sports_champions_A, sports_champions_C],
    axis="columns"
)

Unnamed: 0,Football,Baseball,Hockey,Basketball
2017,New England Patriots,Huston Astros,Piisttsburgh Penguins,Golden State Worriors
2018,Philadellphia Eagles,Boston Red Sox,Washington Capitals,Golden State Worriors


## 10.4 Left joins

In [20]:
groups.head(3)

Unnamed: 0,group_id,name,category_id,city_id
0,6388,Alternative Health NYC,14,10001
1,6510,Alternative Energy Meetup,4,10001
2,8458,NYC Animal Rights,26,10001


In [21]:
categories.head(3)

Unnamed: 0,category_id,category_name
0,1,Arts & Culture
1,3,Cars & Motorcycles
2,4,Community & Environment


In [22]:
groups.merge(categories, how="left", on="category_id").head()

Unnamed: 0,group_id,name,category_id,city_id,category_name
0,6388,Alternative Health NYC,14,10001,Health & Wellbeing
1,6510,Alternative Energy Meetup,4,10001,Community & Environment
2,8458,NYC Animal Rights,26,10001,
3,8940,The New York City Anime Group,29,10001,Sci-Fi & Fantasy
4,10104,NYC Pit Bull Group,26,10001,
