### Dependencies

In [44]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import statsmodels.formula.api as smf

### Data Preprocessing

In [45]:
# Once Data is cleaned, the code for preprocessing, loading the data will go here! '
# Importing cleaned data into dataframe
oil_prices_df = pd.read_csv("Resources_Clean/brent_oil_prices.csv")
electric_car_data_df = pd.read_csv("Resources_Clean/ecar_data_clean_me.csv")
electric_car_sales_df = pd.read_csv("Resources_Clean/ecar_sales_by_model_in_usa.csv")
charging_stations_df = pd.read_csv("Resources_Clean/ev_charging_stations.csv")

In [46]:
# Check and Drop Null Values
oil_prices_df.isnull().sum()

Unnamed: 0    0
date          0
price         0
dtype: int64

In [47]:
electric_car_data_df.isnull().sum()

Unnamed: 0          0
brand               0
model               0
accel_sec           0
top_speed_kmh       0
range_km            0
battery_pack_kwh    0
efficiency_whkm     0
fast_charge_kmh     0
rapid_charge        0
power_train         0
plug_type           0
body_style          0
price_euro          0
dtype: int64

In [48]:
electric_car_sales_df.isnull().sum()

Unnamed: 0     0
make           0
model          0
jan_12        45
feb_12        52
              ..
aug_19         1
sep_19         1
oct_19         1
nov_19         1
dec_19         0
Length: 99, dtype: int64

In [49]:
# Filling Null Values with 0 and checking
electric_car_sales_df.fillna(value = 0, inplace = True)
electric_car_sales_df.isnull().sum()

Unnamed: 0    0
make          0
model         0
jan_12        0
feb_12        0
             ..
aug_19        0
sep_19        0
oct_19        0
nov_19        0
dec_19        0
Length: 99, dtype: int64

In [50]:
charging_stations_df.isnull().sum()

Unnamed: 0          0
station_name        0
street_address      0
city                0
access_days_time    0
dtype: int64

### Modelling with Oil Price Feature for June 2019

In [51]:
# Filtering Oil price data
new_oil_prices_df = oil_prices_df.loc[(oil_prices_df['date'] == "2019-06")]
new_oil_prices_df

Unnamed: 0.1,Unnamed: 0,date,price
385,385,2019-06,64.2205


In [52]:
electric_car_sales_df['jun_19'].sum() # Target

1249037.0

In [None]:
# Training Data
X_train = new_oil_prices_df["price"] # Our intended features, i.e Gas Prices, Infastructure, Crude Oil, can obtain these from dataframes created
y_train = electric_car_sales_df['jun_19'].sum()  # Target wanting to Achieve

model = LinearRegression() # Fitting our model
model.fit(X_train, y_train)

y_pred = pd.Series(model.predict(X_train), index = X_train.index) # Predicting total sales using Oil price of a month

### Multiple Linear Regression model using Car Features for June 2019

In [53]:
# Filtering the data
electric_car_data_df.head()

Unnamed: 0.1,Unnamed: 0,brand,model,accel_sec,top_speed_kmh,range_km,battery_pack_kwh,efficiency_whkm,fast_charge_kmh,rapid_charge,power_train,plug_type,body_style,price_euro
0,0,Tesla,Model 3 Long Range Dual Motor,4.6,233,460,70.0,161,940,Yes,AWD,Type 2 CCS,Sedan,55480
1,1,Volkswagen,ID.3 Pure,10.0,160,270,45.0,167,250,Yes,RWD,Type 2 CCS,Hatchback,30000
2,2,Polestar,2,4.7,210,400,75.0,181,620,Yes,AWD,Type 2 CCS,Liftback,56440
3,3,BMW,iX3,6.8,180,360,74.0,206,560,Yes,RWD,Type 2 CCS,SUV,68040
4,4,Honda,e,9.5,145,170,28.5,168,190,Yes,RWD,Type 2 CCS,Hatchback,32997


In [None]:
# Training Data
X_train = electric_car_data_df["range_km", "efficiency_whkm", "fast_charge_kmh"] # Our intended features, i.e Gas Prices, Infastructure, Crude Oil, can obtain these from dataframes created
y_train = electric_car_sales_df['jun_19'].sum()  # Target wanting to Achieve

model = LinearRegression() # Fitting our model
model.fit(X_train, y_train)

y_pred = pd.Series(model.predict(X_train), index = X_train.index) # Predicting total sales using Car Features

### Model using Sum of Charging Stations 

In [56]:
# Getting our numbers for the Machine Learning Model
charging_stations_df.head()

Unnamed: 0.1,Unnamed: 0,station_name,street_address,city,access_days_time
0,0,BMW OF DARIEN,138-142 Ledge Rd,Darien,24 hours daily
1,1,Dunkin’ - Tesla Supercharger,893 E Main St,Meriden,24 hours daily; for Tesla use only
2,2,Town of Beacon Falls - Commuter Lot,105 N Main St,Beacon Falls,24 hours daily
3,3,OLD SAYBROOK VW,319 Middlesex Turnpike,Old Saybrook,24 hours daily
4,4,Fairfield Rail Station,80 Mill Plain Rd,Fairfield,24 hours daily


In [60]:
# Getting the sum of the stations
charging_stations_df['station_name'].count()

385

In [None]:
# Training Data
X_train = charging_stations_df["station_name"].count() # Our intended features, i.e Gas Prices, Infastructure, Crude Oil, can obtain these from dataframes created
y_train = electric_car_sales_df["jun_19"].sum()  # Target wanting to Achieve

model = LinearRegression() # Fitting our model
model.fit(X_train, y_train)

y_pred = pd.Series(model.predict(X_train), index = X_train.index) # Predicting total sales using sum of charging stations