# In the earlier module on OOP, we used this example involving vehicles

In [1]:
class Vehicle:
    def __init__(self, mpg=None, tank_size=None, passengers=None, transmission=None):
        self.mpg = mpg
        self.tank_size = tank_size
        self.passengers = passengers
        self.transmission = transmission
        self.max_range = self.calculate_max_range()
        
    def calculate_max_range(self):
        if (self.mpg is None) or (self.tank_size is None):
            return None
        return self.mpg * self.tank_size
    
def print_vehicle(vehicle):
    print(f"Fuel efficiency: {vehicle.mpg} mpg")
    print(f"Fuel capacity: {vehicle.tank_size} gallons")
    print(f"Passenger capacity: {vehicle.passengers} passengers")
    print(f"Transmission: {vehicle.transmission}")
    print(f"Max range: {vehicle.max_range} miles")

# This data structure is nice in some instances, but what if we want to compare different vehicles to each other?

In [2]:
vehicles = ["Honda Civic", "Toyota Camry", "Ford F-150"]
tank_size = [12.4, 15.8, 26.0]
mpg = [35, 32, 20]

In [3]:
import pandas as pd

In [4]:
# Create a df
df = pd.DataFrame(columns=["vehicles", "tank_size", "mpg"])

In [5]:
# Show header
df.head()

Unnamed: 0,vehicles,tank_size,mpg


In [6]:
# Add data to each column by passing a list
df["vehicles"] = vehicles
df["tank_size"] = tank_size
df["mpg"] = mpg

# Show header (default is 5 rows)
df.head()

Unnamed: 0,vehicles,tank_size,mpg
0,Honda Civic,12.4,35
1,Toyota Camry,15.8,32
2,Ford F-150,26.0,20


In [7]:
# Add a new column from list
passengers = [5, 5, 3]
df["passengers"] = [5, 7, 3]

df.head()

Unnamed: 0,vehicles,tank_size,mpg,passengers
0,Honda Civic,12.4,35,5
1,Toyota Camry,15.8,32,7
2,Ford F-150,26.0,20,3


In [8]:
# Add a new column using math
df["range"] = df["tank_size"] * df["mpg"]

df.head()

Unnamed: 0,vehicles,tank_size,mpg,passengers,range
0,Honda Civic,12.4,35,5,434.0
1,Toyota Camry,15.8,32,7,505.6
2,Ford F-150,26.0,20,3,520.0


In [9]:
# Each column is a series
series = df["range"]
series.head()

0    434.0
1    505.6
2    520.0
Name: range, dtype: float64

In [10]:
# Return series as list
series.to_list()

[434.0, 505.6, 520.0]

# Not only can we create a DataFrame manually in Python, but we can also load in common spreadsheet formats like CSV and Excel

Vehicle data from Canadian Government:<br /> https://open.canada.ca/data/en/dataset/98f1a129-f628-4ce4-b24d-6f16bf24dd64/resource/b6100f60-5e63-437d-b122-db76c467c0a7

In [11]:
# Read in csv
vehicle_df = pd.read_csv("MY2023_Fuel_Consumption_Ratings.csv")
vehicle_df.head()

Unnamed: 0,Model Year,Make,Model,Vehicle Class,Engine Size (L),Cylinders,Transmission,Fuel Type,Fuel Consumption City (L/100 km),Fuel Consumption Hwy (L/100 km),Fuel Consumption Comb (L/100 km),Fuel Consumption Comb (mpg),CO2 Emissions (g/km),CO2 Rating,Smog Rating
0,2023,Acura,Integra,Full-size,1.5,4,AV7,Z,7.9,6.3,7.2,39,167,6,7
1,2023,Acura,Integra A-SPEC,Full-size,1.5,4,AV7,Z,8.1,6.5,7.4,38,172,6,7
2,2023,Acura,Integra A-SPEC,Full-size,1.5,4,M6,Z,8.9,6.5,7.8,36,181,6,6
3,2023,Acura,MDX SH-AWD,SUV: Small,3.5,6,AS10,Z,12.6,9.4,11.2,25,263,4,5
4,2023,Acura,MDX SH-AWD Type S,SUV: Standard,3.0,6,AS10,Z,13.8,11.2,12.4,23,291,4,5


In [12]:
# Can pass a number of rows to head
vehicle_df.head(10)

Unnamed: 0,Model Year,Make,Model,Vehicle Class,Engine Size (L),Cylinders,Transmission,Fuel Type,Fuel Consumption City (L/100 km),Fuel Consumption Hwy (L/100 km),Fuel Consumption Comb (L/100 km),Fuel Consumption Comb (mpg),CO2 Emissions (g/km),CO2 Rating,Smog Rating
0,2023,Acura,Integra,Full-size,1.5,4,AV7,Z,7.9,6.3,7.2,39,167,6,7
1,2023,Acura,Integra A-SPEC,Full-size,1.5,4,AV7,Z,8.1,6.5,7.4,38,172,6,7
2,2023,Acura,Integra A-SPEC,Full-size,1.5,4,M6,Z,8.9,6.5,7.8,36,181,6,6
3,2023,Acura,MDX SH-AWD,SUV: Small,3.5,6,AS10,Z,12.6,9.4,11.2,25,263,4,5
4,2023,Acura,MDX SH-AWD Type S,SUV: Standard,3.0,6,AS10,Z,13.8,11.2,12.4,23,291,4,5
5,2023,Acura,RDX SH-AWD,SUV: Small,2.0,4,AS10,Z,11.0,8.6,9.9,29,232,5,6
6,2023,Acura,RDX SH-AWD A-SPEC,SUV: Small,2.0,4,AS10,Z,11.3,9.1,10.3,27,242,5,6
7,2023,Acura,TLX SH-AWD,Compact,2.0,4,AS10,Z,11.2,8.0,9.8,29,230,5,7
8,2023,Acura,TLX SH-AWD A-SPEC,Compact,2.0,4,AS10,Z,11.3,8.1,9.8,29,231,5,7
9,2023,Acura,TLX Type S,Compact,3.0,6,AS10,Z,12.3,9.4,11.0,26,256,5,5


In [13]:
# Tail shows last rows
vehicle_df.tail()

Unnamed: 0,Model Year,Make,Model,Vehicle Class,Engine Size (L),Cylinders,Transmission,Fuel Type,Fuel Consumption City (L/100 km),Fuel Consumption Hwy (L/100 km),Fuel Consumption Comb (L/100 km),Fuel Consumption Comb (mpg),CO2 Emissions (g/km),CO2 Rating,Smog Rating
818,2023,Volvo,XC40 B5 AWD,SUV: Small,2.0,4,AS8,Z,10.2,7.9,9.2,31,215,5,5
819,2023,Volvo,XC60 B5 AWD,SUV: Small,2.0,4,AS8,Z,10.3,8.2,9.4,30,218,5,5
820,2023,Volvo,XC60 B6 AWD,SUV: Small,2.0,4,AS8,Z,11.1,8.7,10.0,28,233,5,7
821,2023,Volvo,XC90 B5 AWD,SUV: Standard,2.0,4,AS8,Z,10.5,8.4,9.6,29,223,5,5
822,2023,Volvo,XC90 B6 AWD,SUV: Standard,2.0,4,AS8,Z,11.9,9.1,10.6,27,249,5,7


In [18]:
# Shape of data frame, like a numpy array
vehicle_df.shape

(823, 15)

In [19]:
# Summary info shows columns, counts, types
vehicle_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 823 entries, 0 to 822
Data columns (total 15 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Model Year                        823 non-null    int64  
 1   Make                              823 non-null    object 
 2   Model                             823 non-null    object 
 3   Vehicle Class                     823 non-null    object 
 4   Engine Size (L)                   823 non-null    float64
 5   Cylinders                         823 non-null    int64  
 6   Transmission                      823 non-null    object 
 7   Fuel Type                         823 non-null    object 
 8   Fuel Consumption City (L/100 km)  823 non-null    float64
 9   Fuel Consumption Hwy (L/100 km)   823 non-null    float64
 10  Fuel Consumption Comb (L/100 km)  823 non-null    float64
 11  Fuel Consumption Comb (mpg)       823 non-null    int64  
 12  CO2 Emis

In [20]:
# Descriptive statistics (numeric only)
vehicle_df.describe()

Unnamed: 0,Model Year,Engine Size (L),Cylinders,Fuel Consumption City (L/100 km),Fuel Consumption Hwy (L/100 km),Fuel Consumption Comb (L/100 km),Fuel Consumption Comb (mpg),CO2 Emissions (g/km),CO2 Rating,Smog Rating
count,823.0,823.0,823.0,823.0,823.0,823.0,823.0,823.0,823.0,823.0
mean,2023.0,3.139976,5.603888,12.37983,9.324666,11.005832,27.46051,256.45322,4.540705,5.243013
std,0.0,1.354157,1.954117,3.420741,2.273932,2.847258,7.549711,63.412909,1.266442,1.668878
min,2023.0,1.2,3.0,4.4,4.4,4.4,11.0,104.0,1.0,1.0
25%,2023.0,2.0,4.0,10.1,7.7,9.0,22.0,211.0,4.0,5.0
50%,2023.0,3.0,6.0,12.1,9.1,10.7,26.0,254.0,5.0,5.0
75%,2023.0,3.6,6.0,14.6,10.65,12.8,31.0,298.0,5.0,7.0
max,2023.0,8.0,16.0,30.3,20.9,26.1,64.0,608.0,9.0,8.0


In [24]:
# Save as excel file
vehicle_df.to_excel("fuel_consumption.xlsx", index=False)

In [25]:
# Read excel file
excel = pd.read_excel("fuel_consumption.xlsx")
excel.head()

Unnamed: 0,Model Year,Make,Model,Vehicle Class,Engine Size (L),Cylinders,Transmission,Fuel Type,Fuel Consumption City (L/100 km),Fuel Consumption Hwy (L/100 km),Fuel Consumption Comb (L/100 km),Fuel Consumption Comb (mpg),CO2 Emissions (g/km),CO2 Rating,Smog Rating
0,2023,Acura,Integra,Full-size,1.5,4,AV7,Z,7.9,6.3,7.2,39,167,6,7
1,2023,Acura,Integra A-SPEC,Full-size,1.5,4,AV7,Z,8.1,6.5,7.4,38,172,6,7
2,2023,Acura,Integra A-SPEC,Full-size,1.5,4,M6,Z,8.9,6.5,7.8,36,181,6,6
3,2023,Acura,MDX SH-AWD,SUV: Small,3.5,6,AS10,Z,12.6,9.4,11.2,25,263,4,5
4,2023,Acura,MDX SH-AWD Type S,SUV: Standard,3.0,6,AS10,Z,13.8,11.2,12.4,23,291,4,5
