# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a to a variable called cars1 and cars2

In [3]:
cars1 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv")
cars2 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv")

print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement  ... Unnamed: 11  Unnamed: 12  Unnamed: 13
0  18.0          8           307  ...         NaN          NaN          NaN
1  15.0          8           350  ...         NaN          NaN          NaN
2  18.0          8           318  ...         NaN          NaN          NaN
3  16.0          8           304  ...         NaN          NaN          NaN
4  17.0          8           302  ...         NaN          NaN          NaN

[5 rows x 14 columns]
    mpg  cylinders  displacement  ... model  origin                 car
0  33.0          4            91  ...    76       3         honda civic
1  20.0          6           225  ...    76       1      dodge aspen se
2  18.0          6           250  ...    76       1   ford granada ghia
3  18.5          6           250  ...    76       1  pontiac ventura sj
4  17.5          6           258  ...    76       1       amc pacer d/l

[5 rows x 9 columns]


### Step 4. Ops it seems our first dataset has some unnamed blank columns, fix cars1

In [4]:
cars1 = cars1.loc[:, "mpg":"car"]
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [6]:
print(cars1.shape)
print(cars2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [7]:
cars = cars1.append(cars2)
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Ops there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [8]:
nr_owners = np.random.randint(15000, high=73001, size=398, dtype='l')
nr_owners

array([64692, 45577, 25424, 26374, 63330, 45100, 65804, 35608, 58399,
       59205, 39594, 48385, 52309, 50194, 68381, 18249, 65922, 40529,
       66957, 44864, 20876, 21210, 59979, 47451, 47004, 61383, 29184,
       30994, 50058, 66601, 56082, 26528, 27861, 16436, 48878, 60375,
       58146, 42657, 15654, 60278, 57419, 16776, 32709, 49829, 55080,
       48618, 58072, 52912, 50461, 47945, 51205, 15173, 55935, 68934,
       57013, 64547, 44502, 25803, 71133, 38786, 22792, 53510, 22414,
       19900, 53725, 57290, 17478, 29083, 52211, 30987, 66433, 30505,
       39435, 28031, 51295, 67154, 49050, 72583, 54607, 64896, 61121,
       56587, 67344, 59310, 47197, 46600, 32608, 45313, 16968, 51290,
       36211, 26321, 23509, 37063, 62823, 20679, 15372, 71009, 64471,
       48030, 45306, 43157, 59866, 49637, 43389, 27965, 42916, 23946,
       46913, 28839, 48241, 71931, 39208, 23615, 56667, 29461, 29642,
       42145, 23174, 44040, 55186, 41611, 70142, 38062, 23786, 50663,
       72466, 70269,

### Step 8. Add the column owners to cars

In [9]:
cars['owners'] = nr_owners
cars.tail()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,66380
196,44.0,4,97,52,2130,24.6,82,2,vw pickup,20058
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage,55299
198,28.0,4,120,79,2625,18.6,82,1,ford ranger,21460
199,31.0,4,119,82,2720,19.4,82,1,chevy s-10,49990
