# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [22]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [23]:
cars1 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv')
cars2 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv')
print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [24]:
cars1.dropna(axis = 1, inplace = True)
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [25]:
print(cars1.info())
print(cars2.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 198 entries, 0 to 197
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           198 non-null    float64
 1   cylinders     198 non-null    int64  
 2   displacement  198 non-null    int64  
 3   horsepower    198 non-null    object 
 4   weight        198 non-null    int64  
 5   acceleration  198 non-null    float64
 6   model         198 non-null    int64  
 7   origin        198 non-null    int64  
 8   car           198 non-null    object 
dtypes: float64(2), int64(5), object(2)
memory usage: 14.1+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           200 non-null    float64
 1   cylinders     200 non-null    int64  
 2   displacement  200 non-null    int64  
 3   horsepower    200 non-null    o

### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [26]:
cars_combined = pd.concat([cars1, cars2], axis=0, ignore_index=True)

print(cars_combined.shape)  # Should be (398, columns)
cars_combined.head()

(398, 9)


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [27]:
owners = np.random.randint(15000, high=73001, size=398, dtype='l')
owners

array([55874, 44939, 17589, 33307, 34302, 28250, 41255, 64815, 38287,
       51273, 24982, 50316, 51199, 72034, 47130, 52277, 66676, 64568,
       30847, 54557, 52127, 40302, 26688, 20625, 64892, 51490, 50178,
       29328, 70196, 43840, 16224, 60809, 23090, 19655, 61273, 49105,
       64992, 44670, 45117, 72998, 31213, 50538, 16141, 59121, 65756,
       58365, 65389, 62849, 68930, 48015, 52175, 68323, 69186, 15184,
       57483, 46527, 55533, 20722, 23102, 24911, 39159, 19907, 16508,
       46828, 46598, 51305, 38980, 56567, 24205, 30052, 69510, 18001,
       33560, 64859, 49306, 38538, 50914, 72052, 63829, 42980, 51579,
       20616, 63919, 34587, 44361, 55119, 22904, 49823, 34451, 30179,
       24150, 36580, 15667, 53133, 29369, 43732, 38462, 33336, 26114,
       49850, 24941, 42475, 47609, 44277, 15541, 35634, 51199, 70539,
       66381, 32433, 57937, 46769, 46773, 16828, 26203, 21640, 35429,
       32933, 57717, 28494, 29849, 50336, 19926, 23619, 28804, 34015,
       23144, 44051,

### Step 8. Add the column owners to cars

In [28]:
cars_combined['owners'] = owners
cars_combined.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,55874
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,44939
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,17589
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,33307
4,17.0,8,302,140,3449,10.5,70,1,ford torino,34302
