In [1]:
import os
import pandas as pd
import numpy as np

df = pd.read_csv(
    "https://data.heatonresearch.com/data/t81-558/auto-mpg.csv", 
    na_values=['NA', '?'])

pd.set_option('display.max_columns', 7)
pd.set_option('display.max_rows', 5)

display(df)

Unnamed: 0,mpg,cylinders,displacement,...,year,origin,name
0,18.0,8,307.0,...,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,...,70,1,buick skylark 320
...,...,...,...,...,...,...,...
396,28.0,4,120.0,...,82,1,ford ranger
397,31.0,4,119.0,...,82,1,chevy s-10


In [2]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,...,year,origin,name
0,18.0,8,307.0,...,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,...,70,1,buick skylark 320
2,18.0,8,318.0,...,70,1,plymouth satellite
3,16.0,8,304.0,...,70,1,amc rebel sst
4,17.0,8,302.0,...,70,1,ford torino


In [3]:
for c_name in df.columns:
  print(c_name)

mpg
cylinders
displacement
horsepower
weight
acceleration
year
origin
name


In [4]:
df[['mpg','year']].head()

Unnamed: 0,mpg,year
0,18.0,70
1,15.0,70
2,18.0,70
3,16.0,70
4,17.0,70


In [5]:
df.columns

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'year', 'origin', 'name'],
      dtype='object')

In [6]:
col_lst = list(df.columns)

In [7]:
col_lst

['mpg',
 'cylinders',
 'displacement',
 'horsepower',
 'weight',
 'acceleration',
 'year',
 'origin',
 'name']

In [8]:
new_lst = ['year','mpg', 'horsepower','cylinders', 'displacement',  'weight', 'acceleration',  'origin', 'name']

In [9]:
new_df = df.reindex(columns=new_lst)

In [10]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,...,year,origin,name
0,18.0,8,307.0,...,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,...,70,1,buick skylark 320
2,18.0,8,318.0,...,70,1,plymouth satellite
3,16.0,8,304.0,...,70,1,amc rebel sst
4,17.0,8,302.0,...,70,1,ford torino


In [11]:
new_df.head()

Unnamed: 0,year,mpg,horsepower,...,acceleration,origin,name
0,70,18.0,130.0,...,12.0,1,chevrolet chevelle malibu
1,70,15.0,165.0,...,11.5,1,buick skylark 320
2,70,18.0,150.0,...,11.0,1,plymouth satellite
3,70,16.0,150.0,...,12.0,1,amc rebel sst
4,70,17.0,140.0,...,10.5,1,ford torino


In [12]:
new_df.groupby('cylinders').agg({'year':'count'})

Unnamed: 0_level_0,year
cylinders,Unnamed: 1_level_1
3,4
4,204
5,3
6,84
8,103


In [13]:
v8_df = new_df[new_df['cylinders'] == 8]
v8_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 103 entries, 0 to 364
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          103 non-null    int64  
 1   mpg           103 non-null    float64
 2   horsepower    103 non-null    float64
 3   cylinders     103 non-null    int64  
 4   displacement  103 non-null    float64
 5   weight        103 non-null    int64  
 6   acceleration  103 non-null    float64
 7   origin        103 non-null    int64  
 8   name          103 non-null    object 
dtypes: float64(4), int64(4), object(1)
memory usage: 8.0+ KB


In [15]:
v6_df = new_df[new_df['cylinders'] == 6]
v6_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 84 entries, 15 to 389
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          84 non-null     int64  
 1   mpg           84 non-null     float64
 2   horsepower    83 non-null     float64
 3   cylinders     84 non-null     int64  
 4   displacement  84 non-null     float64
 5   weight        84 non-null     int64  
 6   acceleration  84 non-null     float64
 7   origin        84 non-null     int64  
 8   name          84 non-null     object 
dtypes: float64(4), int64(4), object(1)
memory usage: 6.6+ KB


In [16]:
big_engines = pd.concat([v8_df,v6_df])
big_engines.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 187 entries, 0 to 389
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   year          187 non-null    int64  
 1   mpg           187 non-null    float64
 2   horsepower    186 non-null    float64
 3   cylinders     187 non-null    int64  
 4   displacement  187 non-null    float64
 5   weight        187 non-null    int64  
 6   acceleration  187 non-null    float64
 7   origin        187 non-null    int64  
 8   name          187 non-null    object 
dtypes: float64(4), int64(4), object(1)
memory usage: 14.6+ KB


In [17]:
big_engines.head(10)

Unnamed: 0,year,mpg,horsepower,...,acceleration,origin,name
0,70,18.0,130.0,...,12.0,1,chevrolet chevelle malibu
1,70,15.0,165.0,...,11.5,1,buick skylark 320
...,...,...,...,...,...,...,...
8,70,14.0,225.0,...,10.0,1,pontiac catalina
9,70,15.0,190.0,...,8.5,1,amc ambassador dpl


In [21]:
cool_pivot = pd.pivot_table(big_engines, index=['origin','year'],values=['mpg'], aggfunc='mean')
print(cool_pivot)

                   mpg
origin year           
1      70    15.272727
       71    15.866667
...                ...
3      80    32.700000
       81    24.800000

[21 rows x 1 columns]


In [22]:
cool_pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,mpg
origin,year,Unnamed: 2_level_1
1,70,15.272727
1,71,15.866667
...,...,...
2,76,16.500000
2,78,16.600000


In [24]:
big_engines.year.unique()

array([70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 81, 80, 82])