# Merging Datasets
Use Pandas Merges to create a combined dataset from `clean_08.csv` and `clean_18.csv`. You should've created these data files in the previous section: *Fixing Data Types Pt 3*.

In [10]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

%config IPCompleter.greedy = True

In [11]:
import pandas as pd
import numpy as np

In [12]:
# load 2008 dataset
df_08 = pd.read_csv('clean_08.csv')
df_08.shape
df_08.head(n = 2)

(987, 13)

Unnamed: 0,model,displ,cyl,trans,drive,fuel,veh_class,air_pollution_score,city_mpg,hwy_mpg,cmb_mpg,greenhouse_gas_score,smartway
0,ACURA MDX,3.7,6.0,Auto-S5,4WD,Gasoline,SUV,7.0,15.0,20.0,17.0,4.0,no
1,ACURA RDX,2.3,4.0,Auto-S5,4WD,Gasoline,SUV,7.0,17.0,22.0,19.0,5.0,no


In [13]:
# load 2018 dataset
df_18 = pd.read_csv('clean_18.csv')
df_18.shape
df_18.head(n = 2)

(832, 13)

Unnamed: 0,model,displ,cyl,trans,drive,fuel,veh_class,air_pollution_score,city_mpg,hwy_mpg,cmb_mpg,greenhouse_gas_score,smartway
0,ACURA RDX,3.5,6.0,SemiAuto-6,2WD,Gasoline,small SUV,3.0,20.0,28.0,23.0,5.0,No
1,ACURA RDX,3.5,6.0,SemiAuto-6,4WD,Gasoline,small SUV,3.0,19.0,27.0,22.0,4.0,No


### Create combined dataset

In [14]:
# rename 2008 columns
df_08.rename( columns = lambda columns_name: columns_name.strip()[:10] + '_08', inplace = True)
df_08.head( n = 3 )

Unnamed: 0,model_08,displ_08,cyl_08,trans_08,drive_08,fuel_08,veh_class_08,air_pollut_08,city_mpg_08,hwy_mpg_08,cmb_mpg_08,greenhouse_08,smartway_08
0,ACURA MDX,3.7,6.0,Auto-S5,4WD,Gasoline,SUV,7.0,15.0,20.0,17.0,4.0,no
1,ACURA RDX,2.3,4.0,Auto-S5,4WD,Gasoline,SUV,7.0,17.0,22.0,19.0,5.0,no
2,ACURA RL,3.5,6.0,Auto-S5,4WD,Gasoline,midsize car,7.0,16.0,24.0,19.0,5.0,no


In [15]:
# merge datasets
df_combined = pd.merge(df_08, df_18, left_on = 'model_08', right_on = 'model')
df_combined.head( n = 3 )

Unnamed: 0,model_08,displ_08,cyl_08,trans_08,drive_08,fuel_08,veh_class_08,air_pollut_08,city_mpg_08,hwy_mpg_08,...,trans,drive,fuel,veh_class,air_pollution_score,city_mpg,hwy_mpg,cmb_mpg,greenhouse_gas_score,smartway
0,ACURA RDX,2.3,4.0,Auto-S5,4WD,Gasoline,SUV,7.0,17.0,22.0,...,SemiAuto-6,2WD,Gasoline,small SUV,3.0,20.0,28.0,23.0,5.0,No
1,ACURA RDX,2.3,4.0,Auto-S5,4WD,Gasoline,SUV,7.0,17.0,22.0,...,SemiAuto-6,4WD,Gasoline,small SUV,3.0,19.0,27.0,22.0,4.0,No
2,AUDI A3,2.0,4.0,Man-6,2WD,Gasoline,station wagon,7.0,21.0,29.0,...,AMS-6,4WD,Gasoline,small car,7.0,24.0,31.0,27.0,6.0,No


Save the combined dataset

In [16]:
df_combined.to_csv('combined_dataset.csv', index=False)