In [2]:
# Import libraries and dependencies
import pandas as pd
from pathlib import Path

In [3]:
# Import the 2016 - 2020 alternative fuel stations data. 
alt_fuel_stations_2016_path = Path('Resources/alt_fuel_stations_2016.csv')
alt_fuel_stations_2017_path = Path('Resources/alt_fuel_stations_2017.csv')
alt_fuel_stations_2018_path = Path('Resources/alt_fuel_stations_2018.csv')
alt_fuel_stations_2019_path = Path('Resources/alt_fuel_stations_2019.csv')
alt_fuel_stations_2020_path = Path('Resources/alt_fuel_stations_2020.csv')

# Read in data setting the `index_col` to the Fuel Type.
alt_fuel_stations_2016_df = pd.read_csv(alt_fuel_stations_2016_path, index_col="Fuel_Type")
alt_fuel_stations_2017_df = pd.read_csv(alt_fuel_stations_2017_path, index_col="Fuel_Type")
alt_fuel_stations_2018_df = pd.read_csv(alt_fuel_stations_2018_path, index_col="Fuel_Type")
alt_fuel_stations_2019_df = pd.read_csv(alt_fuel_stations_2019_path, index_col="Fuel_Type")
alt_fuel_stations_2020_df = pd.read_csv(alt_fuel_stations_2020_path, index_col="Fuel_Type")


In [5]:
# Display the 2016 data.
alt_fuel_stations_2016_df.head()

Unnamed: 0_level_0,Number_Stations,Year
Fuel_Type,Unnamed: 1_level_1,Unnamed: 2_level_1
85E,3095,2016
Biodiesel,716,2016
CNG,1730,2016
Electric,45124,2016
Hydrogen,58,2016


In [6]:
# Display the 2017 data.
alt_fuel_stations_2017_df.head()

Unnamed: 0_level_0,Number_Stations,Year
Fuel_Type,Unnamed: 1_level_1,Unnamed: 2_level_1
85E,3379,2017
Biodiesel,704,2017
CNG,1682,2017
Electric,53117,2017
Hydrogen,63,2017


In [7]:
# Display the 2018 data.
alt_fuel_stations_2018_df.head()

Unnamed: 0_level_0,Number_Stations,Year
Fuel_Type,Unnamed: 1_level_1,Unnamed: 2_level_1
85E,3661,2018
Biodiesel,672,2018
CNG,1586,2018
Electric,69161,2018
Hydrogen,62,2018


In [8]:
# Display the 2019 data.
alt_fuel_stations_2019_df.head()

Unnamed: 0_level_0,Number_Stations,Year
Fuel_Type,Unnamed: 1_level_1,Unnamed: 2_level_1
85E,3794,2019
Biodiesel,607,2019
CNG,1574,2019
Electric,89433,2019
Hydrogen,62,2019


In [10]:
# Display the 2020 data.
alt_fuel_stations_2020_df.head()

Unnamed: 0_level_0,Number_Stations,Year
Fuel_Type,Unnamed: 1_level_1,Unnamed: 2_level_1
85E,3717,2020
Biodiesel,301,2020
CNG,863,2020
Electric,96626,2020
Hydrogen,46,2020


### Join 2016 to 2017. 

In [11]:
# Join the 2016 and 2017 data where the left suffix is 2016 and right suffix is 2017. 
alt_2016_2017 = alt_fuel_stations_2016_df.join(alt_fuel_stations_2017_df, lsuffix="_2016", rsuffix="_2017")

alt_2016_2017

Unnamed: 0_level_0,Number_Stations_2016,Year_2016,Number_Stations_2017,Year_2017
Fuel_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
85E,3095,2016,3379,2017
Biodiesel,716,2016,704,2017
CNG,1730,2016,1682,2017
Electric,45124,2016,53117,2017
Hydrogen,58,2016,63,2017
LNG,140,2016,137,2017
Propane,3654,2016,3510,2017


### Join 2018, 2019, and 2020 data to the 2016 and 2017 data.

In [12]:
# Create a list to hold the 2018, 2019, and 2020 DataFrames and add "_2018", "_2019" and, "_2020" 
# suffixes to the 2018, 2019, and 2020 DataFrames using the `add_suffix()` method.
fuel_2018_2020 = [alt_fuel_stations_2018_df.add_suffix("_2018"), 
                  alt_fuel_stations_2019_df.add_suffix("_2019"), 
                  alt_fuel_stations_2020_df.add_suffix("_2020")]

# Join the 2016-2017 data with the 2018-2020 data. 
all_fuel = alt_2016_2017.join(fuel_2018_2020, how="inner")
all_fuel

Unnamed: 0_level_0,Number_Stations_2016,Year_2016,Number_Stations_2017,Year_2017,Number_Stations_2018,Year_2018,Number_Stations_2019,Year_2019,Number_Stations_2020,Year_2020
Fuel_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
85E,3095,2016,3379,2017,3661,2018,3794,2019,3717,2020
Biodiesel,716,2016,704,2017,672,2018,607,2019,301,2020
CNG,1730,2016,1682,2017,1586,2018,1574,2019,863,2020
Electric,45124,2016,53117,2017,69161,2018,89433,2019,96626,2020
Hydrogen,58,2016,63,2017,62,2018,62,2019,46,2020
LNG,140,2016,137,2017,121,2018,114,2019,58,2020
Propane,3654,2016,3510,2017,3302,2018,3118,2019,2671,2020


### Drop all the columns with "year_" and sort the DataFrame.

In [13]:
# Get the column names
all_fuel.columns

Index(['Number_Stations_2016', 'Year_2016', 'Number_Stations_2017',
       'Year_2017', 'Number_Stations_2018', 'Year_2018',
       'Number_Stations_2019', 'Year_2019', 'Number_Stations_2020',
       'Year_2020'],
      dtype='object')

In [14]:
# Drop the columns with the "year_<year>".
cleaned_fuel = all_fuel.drop(['Year_2017', 'Year_2018', 'Year_2019', 'Year_2020'], axis=1)
cleaned_fuel

Unnamed: 0_level_0,Number_Stations_2016,Year_2016,Number_Stations_2017,Number_Stations_2018,Number_Stations_2019,Number_Stations_2020
Fuel_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
85E,3095,2016,3379,3661,3794,3717
Biodiesel,716,2016,704,672,607,301
CNG,1730,2016,1682,1586,1574,863
Electric,45124,2016,53117,69161,89433,96626
Hydrogen,58,2016,63,62,62,46
LNG,140,2016,137,121,114,58
Propane,3654,2016,3510,3302,3118,2671


In [15]:
# Drop the columns with the "year_<year>".
cleaned_fuel = all_fuel.drop(['CNG'], axis=0)
cleaned_fuel

Unnamed: 0_level_0,Number_Stations_2016,Year_2016,Number_Stations_2017,Year_2017,Number_Stations_2018,Year_2018,Number_Stations_2019,Year_2019,Number_Stations_2020,Year_2020
Fuel_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
85E,3095,2016,3379,2017,3661,2018,3794,2019,3717,2020
Biodiesel,716,2016,704,2017,672,2018,607,2019,301,2020
Electric,45124,2016,53117,2017,69161,2018,89433,2019,96626,2020
Hydrogen,58,2016,63,2017,62,2018,62,2019,46,2020
LNG,140,2016,137,2017,121,2018,114,2019,58,2020
Propane,3654,2016,3510,2017,3302,2018,3118,2019,2671,2020


In [16]:
cleaned_fuel.columns

Index(['Number_Stations_2016', 'Year_2016', 'Number_Stations_2017',
       'Year_2017', 'Number_Stations_2018', 'Year_2018',
       'Number_Stations_2019', 'Year_2019', 'Number_Stations_2020',
       'Year_2020'],
      dtype='object')

In [19]:
# Sort the DataFrame on all the columns in ascending order.  
cleaned_fuel = all_fuel.sort_values(by=['Number_Stations_2016', 'Year_2016', 'Number_Stations_2017',
       'Year_2017', 'Number_Stations_2018', 'Year_2018',
       'Number_Stations_2019', 'Year_2019', 'Number_Stations_2020',
       'Year_2020']) # axis=0, ascending=True

# Display the sorted DataFrame.
cleaned_fuel

Unnamed: 0_level_0,Number_Stations_2016,Year_2016,Number_Stations_2017,Year_2017,Number_Stations_2018,Year_2018,Number_Stations_2019,Year_2019,Number_Stations_2020,Year_2020
Fuel_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Hydrogen,58,2016,63,2017,62,2018,62,2019,46,2020
LNG,140,2016,137,2017,121,2018,114,2019,58,2020
Biodiesel,716,2016,704,2017,672,2018,607,2019,301,2020
CNG,1730,2016,1682,2017,1586,2018,1574,2019,863,2020
85E,3095,2016,3379,2017,3661,2018,3794,2019,3717,2020
Propane,3654,2016,3510,2017,3302,2018,3118,2019,2671,2020
Electric,45124,2016,53117,2017,69161,2018,89433,2019,96626,2020
