# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [16]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [2]:
cars1 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv')
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,,,,,
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,,,,,
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,,,,,
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,,,,,
4,17.0,8,302,140,3449,10.5,70,1,ford torino,,,,,


In [3]:
cars2 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv')
cars2.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,33.0,4,91,53,1795,17.4,76,3,honda civic
1,20.0,6,225,100,3651,17.7,76,1,dodge aspen se
2,18.0,6,250,78,3574,21.0,76,1,ford granada ghia
3,18.5,6,250,110,3645,16.2,76,1,pontiac ventura sj
4,17.5,6,258,95,3193,17.8,76,1,amc pacer d/l


### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [12]:
cars1 = cars1.loc[:, 'mpg':'car']
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [10]:
print('cars1:', cars1.shape[0])
print('cars2:', cars2.shape[0])

cars1: 198
cars2: 200


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [15]:
cars = pd.concat([cars1, cars2])
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [18]:
owners = np.random.randint(15000, 73000, size=len(cars))
owners

array([37254, 40042, 60588, 72499, 69182, 53037, 30679, 69742, 28151,
       27565, 29168, 70981, 36535, 49346, 21566, 23665, 22722, 68417,
       53147, 68326, 17596, 28659, 70226, 20681, 37369, 70568, 20386,
       41830, 39870, 33969, 48648, 28941, 17135, 29115, 16352, 31771,
       60378, 61695, 72697, 15010, 25767, 42474, 22594, 68523, 60537,
       21225, 49117, 48555, 48864, 56540, 40845, 50388, 64379, 31084,
       53459, 19596, 49098, 18469, 47807, 55971, 64906, 60773, 29803,
       49885, 19183, 22308, 58542, 58701, 28978, 68163, 15129, 56393,
       17628, 69644, 40015, 44786, 26840, 72319, 26099, 67602, 43679,
       45670, 35509, 56677, 57147, 50829, 39731, 70302, 27904, 22630,
       48612, 48487, 67939, 43882, 40355, 36494, 31388, 24810, 45058,
       62848, 37783, 43292, 54498, 64099, 62629, 31705, 39700, 51525,
       39979, 54884, 18889, 21165, 27173, 31201, 47155, 65357, 44416,
       32165, 40917, 42574, 40007, 40537, 39048, 41484, 46300, 17605,
       36883, 51112,

### Step 8. Add the column owners to cars

In [19]:
cars['owners'] = owners
cars.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,37254
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,40042
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,60588
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,72499
4,17.0,8,302,140,3449,10.5,70,1,ford torino,69182
