# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [2]:
cars1 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv")
cars2 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv")

print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [3]:
cars1 = cars1.loc[:, "mpg":"car"]
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [4]:
print(cars1.shape)
print(cars2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [10]:
cars = pd.concat([cars1, cars2])
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [11]:
nr_owners = np.random.randint(15000, high=73001, size=398, dtype='l')
nr_owners

array([33502, 52226, 51114, 59387, 61358, 66355, 72784, 39199, 45164,
       50363, 40077, 63214, 69368, 23826, 52070, 18965, 37005, 34913,
       64569, 61958, 51235, 26814, 22507, 57756, 66261, 66830, 35795,
       69232, 66086, 47104, 47281, 21915, 60999, 42556, 28039, 67011,
       30890, 61908, 34967, 51201, 31280, 25359, 30299, 47799, 72478,
       27357, 40806, 51616, 68896, 72805, 30990, 61015, 41233, 50442,
       59570, 26104, 50402, 55738, 63282, 58300, 48510, 36319, 62562,
       18308, 27545, 16326, 20777, 24952, 65043, 33086, 63457, 19700,
       19659, 62986, 55737, 37222, 72261, 47823, 57587, 38413, 64470,
       20342, 49710, 67516, 70007, 63919, 32815, 48362, 64480, 63734,
       50097, 33682, 65462, 66774, 20742, 67266, 63834, 66852, 61837,
       27808, 52885, 69900, 48518, 54711, 71906, 59756, 57017, 60842,
       39812, 48738, 44995, 46189, 67921, 29645, 26856, 52305, 19353,
       40697, 26680, 40606, 18203, 52106, 65268, 18253, 69082, 64024,
       54085, 61490,

### Step 8. Add the column owners to cars

In [13]:
cars['owners'] = nr_owners
cars.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,33502
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,52226
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,51114
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,59387
4,17.0,8,302,140,3449,10.5,70,1,ford torino,61358
