In [1]:
#import denpendencies
import pandas as pd
import os
import pymongo

# EV Vehicle by State

In [2]:
#Read excel file and convert to dataframe
ev_path = "Data/ev_by_state.xlsx"
ev_df = pd.read_excel(ev_path, skiprows = 2, skipfooter = 1)
ev_df.keys()
ev_df = ev_df[["State", "Aprx. Registration Count"]]
ev_df = ev_df.rename(columns = {"Aprx. Registration Count": "EV Count"})
ev_df.head()

Unnamed: 0,State,EV Count
0,Alabama,900
1,Alaska,370
2,Arizona,9820
3,Arkansas,270
4,California,179600


# Total Vehicle Registration by State

In [3]:
#Read excel file and convert it to dataframe
total_registration_path = "Data/mv1.xlsx"
total_registration_df = pd.read_excel(total_registration_path, skiprows = 11, skipfooter = 2)
total_regist_df = total_registration_df.iloc[:, 0:2]
total_regist_df.keys
total_regist_df = total_regist_df.rename(columns = {"Unnamed: 0":"State","TAXICABS)": "Total Regist Count"})
total_regist_df.head()

Unnamed: 0,State,Total Regist Count
0,Alabama,2021395
1,Alaska,169412
2,Arizona,2371199
3,Arkansas,914265
4,California,14615499


# Cleanup Data

In [4]:
# Align states (spelling) in two dataframes
total_regist_df["State"] = ev_df["State"]
total_regist_df.head()

Unnamed: 0,State,Total Regist Count
0,Alabama,2021395
1,Alaska,169412
2,Arizona,2371199
3,Arkansas,914265
4,California,14615499


In [5]:
# Merge two dataframe by State
registration_df = pd.merge(ev_df, total_regist_df, on="State")

In [6]:
registration_df

Unnamed: 0,State,EV Count,Total Regist Count
0,Alabama,900,2021395
1,Alaska,370,169412
2,Arizona,9820,2371199
3,Arkansas,270,914265
4,California,179600,14615499
5,Colorado,7750,1768221
6,Connecticut,2710,1301153
7,Delaware,400,417700
8,District Of Columbia,660,191723
9,Florida,16600,7670861


In [7]:
registration_df.keys()

Index(['State', 'EV Count', 'Total Regist Count'], dtype='object')

In [8]:
# Add 'EV Percentage' column and calculate the EV percentage by state
registration_df["EV Percentage (%)"] = round(registration_df['EV Count']/registration_df['Total Regist Count']*100, 4)

In [9]:
# Set index as State
registration_df.set_index('State')

Unnamed: 0_level_0,EV Count,Total Regist Count,EV Percentage (%)
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama,900,2021395,0.0445
Alaska,370,169412,0.2184
Arizona,9820,2371199,0.4141
Arkansas,270,914265,0.0295
California,179600,14615499,1.2288
Colorado,7750,1768221,0.4383
Connecticut,2710,1301153,0.2083
Delaware,400,417700,0.0958
District Of Columbia,660,191723,0.3442
Florida,16600,7670861,0.2164


# MongoDB

In [10]:
# Convert dataframe to dictionary
registration_dict = registration_df.to_dict('record')
registration_dict

[{'State': 'Alabama',
  'EV Count': 900,
  'Total Regist Count': 2021395,
  'EV Percentage (%)': 0.0445},
 {'State': 'Alaska',
  'EV Count': 370,
  'Total Regist Count': 169412,
  'EV Percentage (%)': 0.2184},
 {'State': 'Arizona',
  'EV Count': 9820,
  'Total Regist Count': 2371199,
  'EV Percentage (%)': 0.4141},
 {'State': 'Arkansas',
  'EV Count': 270,
  'Total Regist Count': 914265,
  'EV Percentage (%)': 0.0295},
 {'State': 'California',
  'EV Count': 179600,
  'Total Regist Count': 14615499,
  'EV Percentage (%)': 1.2288},
 {'State': 'Colorado',
  'EV Count': 7750,
  'Total Regist Count': 1768221,
  'EV Percentage (%)': 0.4383},
 {'State': 'Connecticut',
  'EV Count': 2710,
  'Total Regist Count': 1301153,
  'EV Percentage (%)': 0.2083},
 {'State': 'Delaware',
  'EV Count': 400,
  'Total Regist Count': 417700,
  'EV Percentage (%)': 0.0958},
 {'State': 'District Of Columbia',
  'EV Count': 660,
  'Total Regist Count': 191723,
  'EV Percentage (%)': 0.3442},
 {'State': 'Florida',

In [11]:
# Connect to MongoDB
conn = "mongodb://localhost:27017"
client = pymongo.MongoClient(conn)

In [12]:
# Declare the database
db = client.etl_db

# Drop collection if existing
db.ev.drop()

# Declare collection
db.ev

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'etl_project_db'), 'ev')

In [13]:
# Insert the document into the database
db.ev.insert_many(registration_dict)

<pymongo.results.InsertManyResult at 0x12215c4c8>