# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [27]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [17]:
cars1 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv', sep = ',')

In [18]:
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,,,,,
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,,,,,
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,,,,,
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,,,,,
4,17.0,8,302,140,3449,10.5,70,1,ford torino,,,,,


In [15]:
cars2.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,33.0,4,91,53,1795,17.4,76,3,honda civic
1,20.0,6,225,100,3651,17.7,76,1,dodge aspen se
2,18.0,6,250,78,3574,21.0,76,1,ford granada ghia
3,18.5,6,250,110,3645,16.2,76,1,pontiac ventura sj
4,17.5,6,258,95,3193,17.8,76,1,amc pacer d/l


In [7]:
cars2 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv', sep = ',')

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [23]:
cars1 = cars1.loc[:, 'mpg': 'car']


In [24]:
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [14]:
print(cars1.shape[0])
print(cars2.shape[0])

198
200


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [26]:
cars = pd.concat([cars1, cars2], axis=0, ignore_index =True)
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
394,44.0,4,97,52,2130,24.6,82,2,vw pickup
395,32.0,4,135,84,2295,11.6,82,1,dodge rampage
396,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [28]:
owners = np.random.randint( low = 15000, high = 73000, size = 398)
owners

array([45571, 20135, 58345, 63172, 26077, 48955, 27224, 70944, 54294,
       39416, 54986, 41428, 70472, 50764, 70076, 35903, 29106, 51913,
       24555, 32532, 66342, 53431, 44820, 24892, 34582, 72266, 63300,
       58572, 65146, 46194, 28104, 17926, 15624, 52764, 51421, 47402,
       18427, 63096, 54483, 53627, 46600, 57191, 56491, 56298, 71829,
       60932, 43732, 62364, 52228, 45355, 41078, 61944, 36221, 43439,
       15324, 24994, 19921, 70844, 53256, 33512, 40446, 57526, 41657,
       42857, 22928, 16236, 48595, 55422, 41514, 27207, 63302, 28854,
       47704, 36662, 37529, 22092, 51371, 33731, 50637, 17912, 58417,
       35880, 40397, 54756, 55294, 68025, 17464, 21775, 60674, 31842,
       50409, 37644, 55381, 68028, 22737, 57980, 31000, 61896, 70791,
       34977, 28023, 21312, 51296, 61035, 58294, 24416, 49223, 39168,
       44391, 46184, 58951, 35976, 21036, 60025, 15114, 36092, 31388,
       41644, 55680, 66948, 26091, 55072, 26022, 48088, 47619, 56861,
       44225, 15749,

### Step 8. Add the column owners to cars

In [29]:
cars['owners'] = owners
cars.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,45571
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,20135
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,58345
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,63172
4,17.0,8,302,140,3449,10.5,70,1,ford torino,26077
