In [8]:
# import pandas, matplotlib and numpy
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# import plotly
# 'express' is a lite version of the entirety, but other options ('graph_objects (go)', etc.)
import plotly.express as px

# ------- Notes --------
# plotyly leverages maps + interactivity + custom controls (like sliders and dropdowns)*****
# subscription application recommended
# ----------------------

In [9]:
# read the data from the CSV file
data = pd.read_csv('Fuel_Consumption_2000-2022.csv')

In [10]:
# display number of rows and columns as a list
data.shape

(22556, 12)

In [17]:
# recategorize data categories
data_group = data.astype({
    'YEAR': 'category', 
    'VEHICLE CLASS': 'category',
    'MAKE': 'category'
})

# group by year and vehicle class and create a new variable with the grouped data
data_to_display = data_group.groupby(['YEAR', 'VEHICLE CLASS']).describe()
data_to_display

Unnamed: 0_level_0,Unnamed: 1_level_0,ID,ID,ID,ID,ID,ID,ID,ID,ENGINE SIZE,ENGINE SIZE,...,COMB (mpg),COMB (mpg),EMISSIONS,EMISSIONS,EMISSIONS,EMISSIONS,EMISSIONS,EMISSIONS,EMISSIONS,EMISSIONS
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
YEAR,VEHICLE CLASS,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
2000,COMPACT,105.0,329.219048,221.470782,1.0,54.00,375.0,549.00,624.0,105.0,2.348571,...,34.00,55.0,105.0,213.371429,33.433993,138.0,193.0,216.0,232.0,331.0
2000,FULL-SIZE,28.0,241.642857,174.074955,50.0,104.75,142.5,431.25,544.0,28.0,3.710714,...,28.25,30.0,28.0,248.535714,22.802667,218.0,229.5,246.0,260.5,331.0
2000,MID-SIZE,77.0,347.064935,202.177250,3.0,136.00,390.0,492.00,632.0,77.0,3.029870,...,29.00,34.0,77.0,239.000000,22.919769,189.0,223.0,235.0,260.0,292.0
2000,MINICOMPACT,11.0,328.727273,193.007301,28.0,208.00,419.0,483.50,486.0,11.0,3.263636,...,27.50,29.0,11.0,257.090909,21.092437,225.0,239.5,264.0,271.5,285.0
2000,MINIVAN,26.0,275.807692,152.079195,134.0,167.25,186.5,387.75,586.0,26.0,3.357692,...,26.00,26.0,26.0,266.076923,20.478131,246.0,253.0,261.0,274.0,331.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,STATION WAGON - SMALL,19.0,22235.894737,217.971429,21624.0,22143.50,22260.0,22345.50,22549.0,19.0,1.905263,...,36.00,61.0,19.0,192.473684,39.748331,110.0,184.0,200.0,217.0,260.0
2022,SUBCOMPACT,79.0,21945.189873,252.686695,21621.0,21697.50,21945.0,22232.00,22324.0,79.0,3.098734,...,31.00,40.0,79.0,248.341772,51.964704,165.0,211.0,242.0,286.0,392.0
2022,SUV - SMALL,197.0,22081.477157,275.913854,21603.0,21878.00,22103.0,22274.00,22554.0,197.0,2.328426,...,32.00,49.0,197.0,229.857868,41.351160,136.0,204.0,224.0,252.0,387.0
2022,SUV - STANDARD,149.0,22044.738255,239.832470,21619.0,21840.00,22027.0,22210.00,22556.0,149.0,3.970470,...,25.00,42.0,149.0,305.174497,46.350418,156.0,272.0,309.0,343.0,412.0


In [18]:
# look at the indicies of the variable
data_to_display.index

MultiIndex([(2000,                  'COMPACT'),
            (2000,                'FULL-SIZE'),
            (2000,                 'MID-SIZE'),
            (2000,              'MINICOMPACT'),
            (2000,                  'MINIVAN'),
            (2000,     'PICKUP TRUCK - SMALL'),
            (2000,  'PICKUP TRUCK - STANDARD'),
            (2000, 'STATION WAGON - MID-SIZE'),
            (2000,    'STATION WAGON - SMALL'),
            (2000,               'SUBCOMPACT'),
            ...
            (2022,                  'MINIVAN'),
            (2022,     'PICKUP TRUCK - SMALL'),
            (2022,  'PICKUP TRUCK - STANDARD'),
            (2022,  'SPECIAL PURPOSE VEHICLE'),
            (2022, 'STATION WAGON - MID-SIZE'),
            (2022,    'STATION WAGON - SMALL'),
            (2022,               'SUBCOMPACT'),
            (2022,              'SUV - SMALL'),
            (2022,           'SUV - STANDARD'),
            (2022,               'TWO-SEATER')],
           names=['YEAR

In [19]:
# look at data types in the variable
data_to_display.dtypes

ID                count    float64
                  mean     float64
                  std      float64
                  min      float64
                  25%      float64
                  50%      float64
                  75%      float64
                  max      float64
ENGINE SIZE       count    float64
                  mean     float64
                  std      float64
                  min      float64
                  25%      float64
                  50%      float64
                  75%      float64
                  max      float64
CYLINDERS         count    float64
                  mean     float64
                  std      float64
                  min      float64
                  25%      float64
                  50%      float64
                  75%      float64
                  max      float64
FUEL CONSUMPTION  count    float64
                  mean     float64
                  std      float64
                  min      float64
                  25

In [28]:
# list comprehension (loop the 'for' loop data with the indicies and turn into a list)
Year = [x[0] for x in data_to_display.index]
VehicleClass = [x[1] for x in data_to_display.index]

In [29]:
# convert the series to a list (even though express will allow series data to be read)
CombineMPG = data_to_display['COMB (mpg)']['mean'].values.tolist()

In [35]:
# create a plotly line chart figure
fig = px.line(
    x = Year,
    y = CombineMPG,
    color = VehicleClass,
    markers = True
)

fig.show()

In [38]:
# create a BETTER plotly line chart figure
fig_2 = px.line(
    x = Year,
    y = CombineMPG,
    color = VehicleClass,
    markers = False
)

fig_2.update_layout(
    title = 'Average Combined MPG per Vehicle Class per Year',
    xaxis_title = 'Year',
    yaxis_title = 'Combined MPG Average',
    legend_title = 'Vehicle Class'
)

fig_2.show()

In [53]:
# animation and ranges
fig_3 = px.line(
    x = Year,
    y = CombineMPG,
    color = VehicleClass,
    markers = True,
    color_discrete_sequence = px.colors.qualitative.Dark24,
    animation_frame = Year,
    animation_group = VehicleClass,
    log_x = True,
    range_x = [1999, 2023],
    range_y = [14, 40]
)

fig_3.update_layout(
    title = 'Average Combined MPG per Vehicle Class per Year',
    xaxis_title = 'Year',
    yaxis_title = 'Combined MPG Average',
    legend_title = 'Vehicle Class'
)

fig_3.update_xaxes(
    tickangle = 45,
    minor = dict(
        ticklen = 5,
        tickcolor = 'black',
        showgrid = True
    )
)

fig_3.show()