# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np
import re

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [2]:
cars1 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv')

In [3]:
cars2 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv')

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [4]:
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,,,,,
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,,,,,
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,,,,,
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,,,,,
4,17.0,8,302,140,3449,10.5,70,1,ford torino,,,,,


In [5]:
cars2.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,33.0,4,91,53,1795,17.4,76,3,honda civic
1,20.0,6,225,100,3651,17.7,76,1,dodge aspen se
2,18.0,6,250,78,3574,21.0,76,1,ford granada ghia
3,18.5,6,250,110,3645,16.2,76,1,pontiac ventura sj
4,17.5,6,258,95,3193,17.8,76,1,amc pacer d/l


In [6]:
cars1.columns[-5:]

Index(['Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12',
       'Unnamed: 13'],
      dtype='object')

In [7]:
cars1.drop(list(cars1.columns[-5:]),axis = 1,inplace = True)

In [8]:
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [9]:
cars1.shape[0]

198

In [10]:
cars2.shape[0]

200

### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [11]:
cars = pd.concat([cars1,cars2],ignore_index = True)

In [12]:
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
394,44.0,4,97,52,2130,24.6,82,2,vw pickup
395,32.0,4,135,84,2295,11.6,82,1,dodge rampage
396,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [13]:
gene = np.random.randint(15000,73000,398)

In [14]:
gene

array([55145, 65000, 71630, 69059, 55809, 23246, 72194, 52993, 58286,
       16476, 63532, 45824, 47315, 39117, 18540, 45723, 49762, 46217,
       58320, 41130, 33987, 72020, 31361, 15225, 32507, 40724, 50913,
       30282, 43977, 23603, 36114, 26073, 20617, 68352, 23576, 29245,
       63035, 36498, 22030, 59686, 24389, 48515, 72756, 35351, 50678,
       56225, 42270, 25574, 21300, 66746, 30018, 69125, 63098, 36048,
       67176, 56618, 34947, 35430, 66432, 52648, 70433, 67697, 72349,
       59064, 61012, 28568, 35434, 25216, 45054, 16131, 70492, 32393,
       72052, 70843, 27553, 24292, 56627, 26310, 39919, 33150, 26997,
       41551, 44339, 42676, 51291, 64739, 56489, 37390, 68660, 32472,
       39991, 32107, 45559, 15836, 49359, 68263, 33694, 21336, 48120,
       58336, 42638, 70486, 34525, 28510, 24713, 29553, 64689, 37708,
       50666, 41901, 53924, 63769, 56581, 30565, 45741, 67580, 72611,
       63002, 54936, 33404, 35732, 17399, 52501, 70094, 70475, 24252,
       58035, 31853,

In [15]:
len(gene)

398

### Step 8. Add the column owners to cars

In [16]:
cars['owners'] = gene

In [17]:
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,55145
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,65000
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,71630
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,69059
4,17.0,8,302,140,3449,10.5,70,1,ford torino,55809
...,...,...,...,...,...,...,...,...,...,...
393,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,68934
394,44.0,4,97,52,2130,24.6,82,2,vw pickup,72898
395,32.0,4,135,84,2295,11.6,82,1,dodge rampage,18249
396,28.0,4,120,79,2625,18.6,82,1,ford ranger,68843
