# Importing Modules

In [33]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Model
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from xgboost.sklearn import XGBRegressor
from sklearn.svm import SVR

#Processing
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics

# Data Processing

In [34]:
#Electricity Consumption
data = pd.read_csv("elec-mix-bar.csv",index_col=False)[6280:6317]
data.head(20)
#Electricity Production
data2 = pd.read_csv("elec-fossil-nuclear-renewables.csv",index_col=False)[6280:6317]
data2.tail(20)

Unnamed: 0,Entity,Code,Year,Electricity from fossil fuels (TWh),Electricity from nuclear (TWh),Electricity from renewables (TWh)
6297,World,OWID_WRL,2002,10249.42,2600.35,2882.15
6298,World,OWID_WRL,2003,10805.79,2576.19,2910.59
6299,World,OWID_WRL,2004,11255.24,2681.18,3142.96
6300,World,OWID_WRL,2005,11768.76,2685.38,3291.33
6301,World,OWID_WRL,2006,12295.7,2719.87,3440.17
6302,World,OWID_WRL,2007,13128.37,2665.34,3548.53
6303,World,OWID_WRL,2008,13182.88,2654.51,3799.0
6304,World,OWID_WRL,2009,13036.77,2617.32,3891.98
6305,World,OWID_WRL,2010,14005.49,2686.63,4195.52
6306,World,OWID_WRL,2011,14655.33,2576.2,4408.04


In [35]:
#Creating Total Electricity  Consumption in Twh and KWh
data["Total Consumption (TWh)"] = (data["Electricity from fossil fuels (TWh)"] +	data["Electricity from nuclear (TWh)"] + data["Electricity from renewables (TWh)"])
data["Total Consumption (kWh)"] = (data["Electricity from fossil fuels (TWh)"] +	data["Electricity from nuclear (TWh)"] + data["Electricity from renewables (TWh)"]) * 1000000000
data["% of Renewables"] = (((data["Electricity from nuclear (TWh)"] + data["Electricity from renewables (TWh)"]) * 1000000000) / data["Total Consumption (kWh)"] ) * 100
data.drop(columns=["Code"],inplace=True)
data.reset_index(inplace=True,drop=True)
data.head()

#Creating Total Electricity Production in Twh for only Renewables
data2["Total Renewable Production (TWh)"] = (data2["Electricity from nuclear (TWh)"] + data2["Electricity from renewables (TWh)"])
data2.drop(columns=["Code"],inplace=True)
data2.reset_index(inplace=True,drop=True)

In [36]:
data2.tail()

Unnamed: 0,Entity,Year,Electricity from fossil fuels (TWh),Electricity from nuclear (TWh),Electricity from renewables (TWh),Total Renewable Production (TWh)
32,World,2017,16090.55,2548.19,6238.26,8786.45
33,World,2018,16573.16,2620.13,6660.83,9280.96
34,World,2019,16544.52,2723.79,7022.66,9746.45
35,World,2020,16024.75,2635.81,7446.28,10082.09
36,World,2021,16992.07,2735.52,7792.93,10528.45


# Calculating Solar and Nuclear Metrics

Number of Solar Panels and Nuclear Power plants needed to satisfy the worlds 2021 Electricity consumption

In [37]:
TEC = data["Total Consumption (kWh)"].tail(1).values[0] #kWh
# Total Solar Power Production for a 400W Tesla Solar Panel
TSPP = (400 * 7 * 365)/1000 #kWh in 7hrs for a 400w in 1 year
n = TEC / TSPP
print(n/1000000000,"B","Tesla Solar Panels")
solarpanels = n/1000000000



26.928101761252446 B Tesla Solar Panels


In [38]:
TNPP = 3937000 * 24 * 365 #kwh for a 3937000W Nuclear Power Plant for 24hrs in a year at full capacity
n2 = TEC / TNPP
print(n2,"Palo Verde nuclear power plant")
plants = n2

#Creating Dataset
newmetrics = {"Tesla 400W Solar Panels (B)":solarpanels,"Palo Verde Nuclear power plants":plants,"Total Electricty Consumption 2021":data["Total Consumption (TWh)"].tail(1).values[0],"1 TSPP/ Year":"3.8KWh * 356","1 TNPP":"3937000KW * 24 * 365"}

797.9710114671371 Palo Verde nuclear power plant


Area of solar panles needed compared to cities

In [39]:
asp = 1.995 # Sqm #Area of 1 Tesla Solar Panel
totalarea = asp * n #Sqm

texasarea = 695662 * 1000000 #Sqm
nycarea = 	783.8  * 1000000 #Sqm
london = 1572 * 1000000 #Sqm
mumbai = 603.4 * 1000000 #Sqm
usa = 9372610	* 1000000 #Sqm

print(totalarea/texasarea,"Texas")
print(totalarea/nycarea,"NYCs")
print(totalarea/london,"Londons")
print(totalarea/mumbai,"Mumbais")
print((totalarea/usa),"USAs")

datacity = {"Total Area (Sqm)":totalarea,"Mumbai":totalarea/mumbai,"NYC":totalarea/nycarea,"London":totalarea/london,"Texas":totalarea/texasarea,"USA":totalarea/usa}
print(datacity)

0.07722365604805011 Texas
68.53988646810237 NYCs
34.17402227334519 Londons
89.03142693685554 Mumbais
0.005731761271801412 USAs
{'Total Area (Sqm)': 53721563013.69863, 'Mumbai': 89.03142693685554, 'NYC': 68.53988646810237, 'London': 34.17402227334519, 'Texas': 0.07722365604805011, 'USA': 0.005731761271801412}


# Data processing

In [40]:
#Data needed to be predicted is Total Renewable production till 2100
totaldatanp = np.array(data2["Total Renewable Production (TWh)"].apply(pd.to_numeric)).reshape(-1,1)
print(totaldatanp)

[[ 3546.565]
 [ 3687.041]
 [ 3860.288]
 [ 4084.592]
 [ 4139.842]
 [ 4280.392]
 [ 4432.196]
 [ 4457.337]
 [ 4667.631]
 [ 4730.874]
 [ 4963.294]
 [ 5087.775]
 [ 5130.474]
 [ 5203.837]
 [ 5329.563]
 [ 5380.33 ]
 [ 5387.2  ]
 [ 5482.5  ]
 [ 5486.78 ]
 [ 5824.14 ]
 [ 5976.71 ]
 [ 6160.04 ]
 [ 6213.87 ]
 [ 6453.51 ]
 [ 6509.3  ]
 [ 6882.15 ]
 [ 6984.24 ]
 [ 7128.4  ]
 [ 7455.36 ]
 [ 7776.93 ]
 [ 8022.77 ]
 [ 8402.23 ]
 [ 8786.45 ]
 [ 9280.96 ]
 [ 9746.45 ]
 [10082.09 ]
 [10528.45 ]]


In [41]:
#Scalling the data
scaler = MinMaxScaler(feature_range=(0,1))
totaldata = scaler.fit_transform(totaldatanp)

#Training length
train_len = math.ceil(len(totaldata) * .8) # 80%
train_data = totaldata[0:train_len]
# print(train_data)

x_train,y_train = [],[]

#For every 5 years we will predict the 6th year based on the previous 5 years

#Training Data
n = 5
for i in range(n,len(train_data)):
  x_train.append(train_data[i-n:i,0])
  y_train.append(train_data[i,0])

x_train, y_train = np.array(x_train), np.array(y_train)
print(x_train)
print(y_train)

#Testing Data
test_data = totaldata[train_len:]
x_test = []
y_test = []

for i in range(n,len(test_data)):
  x_test.append(test_data[i-n:i,0])
  y_test.append(test_data[i,0])

x_test,y_test = np.array(x_test), np.array(y_test)
print(x_test,y_test)


[[0.         0.02012007 0.04493385 0.07706042 0.08497376]
 [0.02012007 0.04493385 0.07706042 0.08497376 0.10510442]
 [0.04493385 0.07706042 0.08497376 0.10510442 0.12684698]
 [0.07706042 0.08497376 0.10510442 0.12684698 0.13044787]
 [0.08497376 0.10510442 0.12684698 0.13044787 0.16056781]
 [0.10510442 0.12684698 0.13044787 0.16056781 0.16962597]
 [0.12684698 0.13044787 0.16056781 0.16962597 0.20291497]
 [0.13044787 0.16056781 0.16962597 0.20291497 0.22074411]
 [0.16056781 0.16962597 0.20291497 0.22074411 0.2268598 ]
 [0.16962597 0.20291497 0.22074411 0.2268598  0.23736742]
 [0.20291497 0.22074411 0.2268598  0.23736742 0.25537487]
 [0.22074411 0.2268598  0.23736742 0.25537487 0.26264612]
 [0.2268598  0.23736742 0.25537487 0.26264612 0.26363009]
 [0.23736742 0.25537487 0.26264612 0.26363009 0.2772797 ]
 [0.25537487 0.26264612 0.26363009 0.2772797  0.27789272]
 [0.26264612 0.26363009 0.2772797  0.27789272 0.32621205]
 [0.26363009 0.2772797  0.27789272 0.32621205 0.34806431]
 [0.2772797  0

# **ML Modeling**

Creating models

In [42]:
model = LinearRegression()
model.fit(x_train,y_train)

model2 = DecisionTreeRegressor()
model2.fit(x_train,y_train)

model3 = XGBRegressor()
model3.fit(x_train,y_train)

model4 = SVR()
model4.fit(x_train,y_train)



SVR()

Predicting

In [43]:
pred = model.predict(x_test)
print(pred)

pred2 = model2.predict(x_test)
print(pred2)

pred3 = model3.predict(x_test)
print(pred3)

pred4 = model4.predict(x_test)
print(pred4)

[0.93730577 0.99300837]
[0.60590585 0.60590585]
[0.60461855 0.60461855]
[0.35728778 0.35571747]


MSE Calculation

In [44]:
mse = metrics.mean_squared_error(y_test, pred)
print(mse)
mse2 = metrics.mean_squared_error(y_test, pred2)
print(mse2)
mse3 = metrics.mean_squared_error(y_test, pred3)
print(mse3)
mse4 = metrics.mean_squared_error(y_test, pred4)
print(mse4)

2.5206462172338024e-05
0.13215889627174401
0.13309289368131752
0.37504374845293753


In [45]:
print(scaler.inverse_transform(np.array(pred).reshape(-1,1)))
print(scaler.inverse_transform(np.array(y_test).reshape(-1,1)))
print(pred)

[[10090.72609318]
 [10479.63522243]]
[[10082.09]
 [10528.45]]
[0.93730577 0.99300837]


# Creating our Prediction Class

In [46]:
class RenewablePredictor:
  def __init__(self,data,model,num,scaler):
    self.scaler = scaler
    self.data = data
    self.datalen = len(self.data)
    self.model = model

    self.x_pred = None
    self.num = num
    self.newdata = self.data
    self.newdatalen = len(self.newdata)

    self.converted = []
    self.yearly = {"Year":[],"Electricity from All Renewables (TWh)":[],"Status":[]}
  
  def GetLast(self):
    self.x_pred = [[x[0] for x in self.newdata[int(self.newdatalen)-5:]]]
    return self.x_pred
    
  def Prediction(self):
    for i in range(self.num):
      self.x_pred = self.GetLast()
      self.prediction = self.model.predict(self.x_pred)
      #Update our Universal array & its len with our predictions
      self.newdata = np.append(self.newdata,np.array(self.prediction).reshape(-1,1),axis=0)
      self.newdatalen = len(self.newdata)

  def transformdata(self):
    for i in range(self.newdatalen):
      self.converted = np.append(self.converted,int(self.scaler.inverse_transform(np.array(self.newdata[i]).reshape(-1,1))))

  def getyearly(self):
    self.arr = self.converted
    startyear = 1985

    for i in range(0,len(self.arr)):
       self.yearly["Year"].append(f"{startyear}")
       self.yearly["Electricity from All Renewables (TWh)"].append(self.arr[i].tolist())

       if i < self.datalen:
         self.yearly["Status"].append("Reported")
       else:
          self.yearly["Status"].append("Predicted")

       startyear +=1
  

  def Report(self):
    return self.yearly

   


# Prediction & Saving Datasets

Predicting for next 80 years till 2100

In [47]:
years = 80
rp = RenewablePredictor(totaldata,model,years,scaler)
rp.Prediction()
rp.transformdata()
rp.getyearly()

Creating Datasets for all information we aquired in the code


*   Number of Solar Panels 
*   Number of Nuclear Power Plants 


*   Area of solar panels needed
*   Predicting Total Global Renewable(Nuclear +other Renewables) electricity production till 2100






In [48]:
finaldata = pd.DataFrame(rp.Report())
cityarea = pd.DataFrame(datacity,index=[0])
metricsdata = pd.DataFrame(newmetrics,index=[0])

finaldata.to_csv("ReneablesReport.csv")
data.to_csv("ElectricityConsumption.csv")
cityarea.to_csv("CityAreaData.csv")
metricsdata.to_csv("Information.csv")