# Import Libraries


In [None]:
#Maths
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Model
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from xgboost.sklearn import XGBRegressor
from sklearn.svm import SVR

#Processing
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics

# **Data Processing**

*   Load Data








In [None]:
data = pd.read_csv("tesladeliv2.csv",index_col=False)
data.head(20)
data.columns

Index(['Deliveries', 'Tesla Model 3 & Y', 'Tesla Model S & X', 'Unnamed: 3'], dtype='object')



*   Pre-processing/Cleaning Data




In [None]:
data.info()
print(data.shape)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Deliveries         24 non-null     object
 1   Tesla Model 3 & Y  24 non-null     int64 
 2   Tesla Model S & X  24 non-null     int64 
 3   Unnamed: 3         24 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 896.0+ bytes
(24, 4)


In [None]:
data["Total Q"] = data["Unnamed: 3"]
data.drop(columns=["Unnamed: 3"],inplace=True)
#Reformatting our Dataset quater column (Deliveries) to Pandas Datetime format
data['Deliveries'] = data["Deliveries"].str.split().apply(lambda x: x[::-1]).apply(lambda x: "-".join(x))

In [None]:
data.tail(2)

Unnamed: 0,Deliveries,Tesla Model 3 & Y,Tesla Model S & X,Total Q
22,2021-Q4,296850,11750,308600
23,2022-Q1,295324,14724,310048


In [None]:
data["Deliveries"] = pd.to_datetime(data["Deliveries"])

In [None]:
#Changing the Datatype of our Variables
data['Tesla Model 3 & Y'] = data['Tesla Model 3 & Y'].astype(float)
data['Tesla Model S & X'] = data['Tesla Model S & X'].astype(float)
data['Total Q'] = data['Total Q'].astype(float)
data.head(10)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Deliveries         24 non-null     datetime64[ns]
 1   Tesla Model 3 & Y  24 non-null     float64       
 2   Tesla Model S & X  24 non-null     float64       
 3   Total Q            24 non-null     float64       
dtypes: datetime64[ns](1), float64(3)
memory usage: 896.0 bytes


In [None]:
data.head(25)
# data.info()

Unnamed: 0,Deliveries,Tesla Model 3 & Y,Tesla Model S & X,Total Q
0,2016-04-01,0.0,14402.0,14402.0
1,2016-07-01,0.0,24821.0,24821.0
2,2016-10-01,0.0,22200.0,22200.0
3,2017-01-01,0.0,25051.0,25051.0
4,2017-04-01,0.0,22020.0,22020.0
5,2017-07-01,220.0,25930.0,26150.0
6,2017-10-01,1550.0,28320.0,29870.0
7,2018-01-01,8180.0,21800.0,29980.0
8,2018-04-01,18440.0,22300.0,40740.0
9,2018-07-01,55840.0,27660.0,83500.0




*   Processing Data



In [None]:
#Creating a array of Toatl Q column
total_np = np.array(data["Total Q"]).reshape(-1,1)

#Scaling our Total Q values for model and storing in new array
scaler = MinMaxScaler(feature_range=(0,1))
data_total_scaled = scaler.fit_transform(total_np)

#Decding a Training length, I chose 80%
train_len = math.ceil(len(total_np) * .8)
train_data_scaled = data_total_scaled[0:train_len, :]
print(data_total_scaled)
print(train_data_scaled ,train_len)

[[0.        ]
 [0.03524147]
 [0.02637614]
 [0.03601943]
 [0.0257673 ]
 [0.03973671]
 [0.05231933]
 [0.05269139]
 [0.08908627]
 [0.2337187 ]
 [0.25807215]
 [0.16437902]
 [0.27382072]
 [0.28001055]
 [0.33043911]
 [0.25061729]
 [0.2587182 ]
 [0.42344899]
 [0.56237866]
 [0.57661866]
 [0.63218173]
 [0.76746514]
 [0.99510225]
 [1.        ]]
[[0.        ]
 [0.03524147]
 [0.02637614]
 [0.03601943]
 [0.0257673 ]
 [0.03973671]
 [0.05231933]
 [0.05269139]
 [0.08908627]
 [0.2337187 ]
 [0.25807215]
 [0.16437902]
 [0.27382072]
 [0.28001055]
 [0.33043911]
 [0.25061729]
 [0.2587182 ]
 [0.42344899]
 [0.56237866]
 [0.57661866]] 20




*Spliting Data into Train/Test*






In [None]:
print(data_total_scaled.shape)
x_train = []
y_train = []

#Based on any 3 previous Quaters our model will predict the 4th Quater Accordingly
n = 3

#Training Data
#This is 80% of our Toal Data
for i in range(n,len(train_data_scaled )):
  x_train.append(train_data_scaled [i-n:i,0])
  y_train.append(train_data_scaled[i,0])

x_train, y_train = np.array(x_train), np.array(y_train)

print(x_train)
print(y_train)

(24, 1)
[[0.         0.03524147 0.02637614]
 [0.03524147 0.02637614 0.03601943]
 [0.02637614 0.03601943 0.0257673 ]
 [0.03601943 0.0257673  0.03973671]
 [0.0257673  0.03973671 0.05231933]
 [0.03973671 0.05231933 0.05269139]
 [0.05231933 0.05269139 0.08908627]
 [0.05269139 0.08908627 0.2337187 ]
 [0.08908627 0.2337187  0.25807215]
 [0.2337187  0.25807215 0.16437902]
 [0.25807215 0.16437902 0.27382072]
 [0.16437902 0.27382072 0.28001055]
 [0.27382072 0.28001055 0.33043911]
 [0.28001055 0.33043911 0.25061729]
 [0.33043911 0.25061729 0.2587182 ]
 [0.25061729 0.2587182  0.42344899]
 [0.2587182  0.42344899 0.56237866]]
[0.03601943 0.0257673  0.03973671 0.05231933 0.05269139 0.08908627
 0.2337187  0.25807215 0.16437902 0.27382072 0.28001055 0.33043911
 0.25061729 0.2587182  0.42344899 0.56237866 0.57661866]


In [None]:
#Testing Data
#This is the rest 20% of our Toal Data

test_data_scaled = data_total_scaled[train_len:,:]
x_test = []
y_test = []
for i in range(n,len(test_data_scaled)):
  x_test.append(test_data_scaled[i-n:i,0])
  y_test.append(test_data_scaled[i,0])

x_test,y_test = np.array(x_test), np.array(y_test)
print(x_test,y_test)


[[0.63218173 0.76746514 0.99510225]] [1.]


# Creating our ML Models

*   Making Various Models



In [None]:
model = LinearRegression()
model.fit(x_train,y_train)

model2 = DecisionTreeRegressor()
model2.fit(x_train,y_train)

model3 = XGBRegressor()
model3.fit(x_train,y_train)

model4 = SVR()
model4.fit(x_train,y_train)



SVR()





*   Testing our Models (based on x_test)





In [None]:
pred = model.predict(x_test)
print(pred)

pred2 = model2.predict(x_test)
print(pred2)

pred3 = model3.predict(x_test)
print(pred3)

pred4 = model4.predict(x_test)
print(pred4)

[1.05351715]
[0.57661866]
[0.56964]
[0.32111407]




*   Evaluation our Model (compaing our prediction with y_test)



In [None]:
# Model Rsquared Error Calculation
#On How similar our prediction was to the ACTUAL value y_test
mse = metrics.mean_squared_error(y_test, pred)
print(mse)
mse2 = metrics.mean_squared_error(y_test, pred2)
print(mse2)
mse3 = metrics.mean_squared_error(y_test, pred3)
print(mse3)
mse4 = metrics.mean_squared_error(y_test, pred4)
print(mse4)

0.002864085797932802
0.17925176007512164
0.18520974613185004
0.46088610448234507


In [None]:
print(scaler.inverse_transform(np.array(pred).reshape(-1,1)))
print(scaler.inverse_transform(np.array(y_test).reshape(-1,1)))
print(data_total_scaled[len(data_total_scaled)-1],y_test)
print(pred)

[[325870.1325824]]
[[310048.]]
[1.] [1.]
[1.05351715]


In [None]:
new_data = [x[0] for x in data_total_scaled[len(data_total_scaled)-3:]]
print(new_data)
new = [new_data]
pred2 = model.predict(new)
print(pred2)
print(scaler.inverse_transform(np.array(y_test).reshape(-1,1)))
print(scaler.inverse_transform(np.array(pred2).reshape(-1,1)))

[0.7674651441250685, 0.9951022506646463, 1.0]
[1.04109197]
[[310048.]]
[[322196.67737369]]


# Creating our Prediction Class
This is just to organize my code efficiently and use the various functions i can create within this class easily across our code.

*New array which we can now trust for all our predictions*

In [None]:
x_pred_data = data_total_scaled
xpdlen = len(x_pred_data)

In [51]:
class TeslaPredictor:
  def __init__(self,data,datalen,model,num,scaler):
    self.model = model
    self.scaler = scaler
    self.num = num
    self.dataset = data
    self.datasetlen = datalen
    self.x_pred = None
    self.prediction = None
    self.pred_value = None
    self.universal = self.dataset #Includes Real world scaled values from Total Q + Our Predicted scaled values by our model
    self.universallen = len(self.universal)
    self.all_transformed = [] #Transorming universal array scaled values to actual deliveries (including our predicted deliveries)
    self.yearly = {"Year":[],"Deliveries":[],"Savings":[],"Status":[]}
    self.quaterly = {"Year":[],"Deliveries":[],"Status":[]}

  def GetLastThree(self):
    self.x_pred = [[x[0] for x in self.universal[int(self.universallen)-3:]]]
    return self.x_pred
    
  def Prediction(self):
    for i in range(self.num):
      self.x_pred = self.GetLastThree()
      self.prediction = self.model.predict(self.x_pred)
      #Update our Universal array & its len with our predictions
      self.universal = np.append(self.universal,np.array(self.prediction).reshape(-1,1),axis=0)
      self.universallen = len(self.universal)
  
  def getdeliveries(self):
    for i in range(self.universallen):
      self.all_transformed = np.append(self.all_transformed,int(self.scaler.inverse_transform(np.array(self.universal[i]).reshape(-1,1))))

  def getyearly(self):
    n =4
    self.arr = self.all_transformed[3:]
    startyear = 2017

    for i in range(0,len(self.arr),n):
       self.yearly["Year"].append(f"{startyear}")
       self.yearly["Deliveries"].append(sum(self.arr[i:i+n].tolist()))
       self.yearly["Savings"].append(sum(self.arr[:i+n].tolist())*3)

       if i < self.datasetlen:
         self.yearly["Status"].append("Reported")
       else:
          self.yearly["Status"].append("Predicted")

       startyear +=1

  def getquaterly(self):
    n = 1
    self.arr = self.all_transformed[len(self.all_transformed)-4:]
    for i in range(0,len(self.arr),n):
      self.quaterly["Year"].append(f"2022-Q{i +1}")
      self.quaterly["Deliveries"].append(sum(self.arr[i:i+n].tolist()))
      self.quaterly["Status"].append("Predicted")




  
  def Report(self):
    print("All Delv + Pred delv from Q2 2016 \n",self.all_transformed)
    print(self.yearly)


In [None]:
years = 9
q = (4 * years) - 1
tp = TeslaPredictor(x_pred_data,xpdlen,model,q,scaler)
tp.Prediction()
tp.getdeliveries()
tp.getyearly()
tp.Report()

All Delv + Pred delv from Q2 2016 
 [  14402.   24821.   22200.   25051.   22020.   26150.   29870.   29980.
   40740.   83500.   90700.   63000.   95355.   97186.  112095.   88496.
   90891.  139593.  180667.  184877.  201304.  241300.  308600.  310048.
  322196.  367577.  409430.  440666.  479179.  527214.  576798.  628023.
  685090.  748196.  816019.  889255.  969200. 1056235. 1150606. 1253070.
 1364485. 1485555. 1617049. 1759905. 1915133. 2083782. 2267004. 2466066.
 2682344. 2917320. 3172609. 3449971. 3751314. 4078711. 4434413. 4820869.
 5240737. 5696906. 6192515.]
{'Year': ['2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025', '2026', '2027', '2028', '2029', '2030'], 'Deliveries': [103091.0, 244920.0, 367636.0, 499647.0, 936081.0, 1409251.0, 2023857.0, 2877328.0, 4065296.0, 5720159.0, 8025824.0, 11238339.0, 15714409.0, 21951027.0], 'Savings': [309273.0, 1044033.0, 2146941.0, 3645882.0, 6454125.0, 10681878.0, 16753449.0, 25385433.0, 37581321.0, 54741798.0, 7881927

In [None]:
finaldata = pd.DataFrame(tp.yearly)
finaldata.head(20)
finaldata.to_csv("TeslaCO2MLReport.csv")

In [None]:
finaldata.head(20)

Unnamed: 0,Year,Deliveries,Savings,Status
0,2017,103091.0,309273.0,Reported
1,2018,244920.0,1044033.0,Reported
2,2019,367636.0,2146941.0,Reported
3,2020,499647.0,3645882.0,Reported
4,2021,936081.0,6454125.0,Reported
5,2022,1409251.0,10681878.0,Reported
6,2023,2023857.0,16753449.0,Predicted
7,2024,2877328.0,25385433.0,Predicted
8,2025,4065296.0,37581321.0,Predicted
9,2026,5720159.0,54741798.0,Predicted


In [52]:
years2 = 1
q2 = (4 * years2) - 1
tp2 = TeslaPredictor(x_pred_data,xpdlen,model,q2,scaler)
tp2.Prediction()
tp2.getdeliveries()
tp2.getyearly()
tp2.getquaterly()
tp2.Report()
print(tp2.quaterly)

All Delv + Pred delv from Q2 2016 
 [ 14402.  24821.  22200.  25051.  22020.  26150.  29870.  29980.  40740.
  83500.  90700.  63000.  95355.  97186. 112095.  88496.  90891. 139593.
 180667. 184877. 201304. 241300. 308600. 310048. 322196. 367577. 409430.]
{'Year': ['2017', '2018', '2019', '2020', '2021', '2022'], 'Deliveries': [103091.0, 244920.0, 367636.0, 499647.0, 936081.0, 1409251.0], 'Savings': [309273.0, 1044033.0, 2146941.0, 3645882.0, 6454125.0, 10681878.0], 'Status': ['Reported', 'Reported', 'Reported', 'Reported', 'Reported', 'Reported']}
{'Year': ['2022-Q1', '2022-Q2', '2022-Q3', '2022-Q4'], 'Deliveries': [310048.0, 322196.0, 367577.0, 409430.0], 'Status': ['Predicted', 'Predicted', 'Predicted', 'Predicted']}


In [53]:
finaldata3 = pd.DataFrame(tp2.quaterly)
finaldata3["Status"][0] = "Reported"
finaldata3.to_csv("TeslaCO2MLReport2.csv")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [None]:
finaldata3.head(20)

Unnamed: 0,Year,Deliveries,Status
0,2022,310048.0,Reported
1,2022,322196.0,Predicted
2,2022,367577.0,Predicted
3,2022,409430.0,Predicted
