## This Analysis will focus on answering the following questions: 

Q5/ Electricity production from renewable sources in US

Q6/ Correlation of electricity production from renewable sources with USD

In [120]:
import pandas as pd 
import os 
import csv
import datetime as dt 

In [121]:
US_filepath = os.path.join("Resources\Raw Data\Table 2 - US_electricity_net_gen_from_renewable_en_1990_2020.csv")
USD_filepath = os.path.join("Resources\Raw Data\Table 3 - US-dollar-index-historical-chart.csv")

US_electricity = pd.read_csv(US_filepath)
USD_data = pd.read_csv(USD_filepath)

In [122]:
#isolating US_electricity data for only 2005 to 2019 data 

US_electricity = US_electricity.loc[(US_electricity["YYYYMM"] <= 201912) & (US_electricity["YYYYMM"] >= 200501) ]


In [124]:
#no days were provided in date data. Will assume data was collected at the first day of the month. 

US_electricity["YYYYMM"] = US_electricity['YYYYMM'].astype(str)

US_electricity["Date (YYYY-MM-01)"] = US_electricity["YYYYMM"]+str("01")

# there is a 13th month for the cumulative total. Removing this data. 

US_electricity["Month"] = US_electricity["YYYYMM"].str[-2:]
US_electricity["Month"] = US_electricity["Month"].astype(int)

US_electricity = US_electricity.loc[ US_electricity["Month"] != 13]



Unnamed: 0,MSN,YYYYMM,Value,Column_Order,Description,Unit,Date (YYYY-MM-01),Month
440,CLETPUS,200501,177013.907,1,"Electricity Net Generation From Coal, All Sectors",Million Kilowatthours,20050101,1
441,CLETPUS,200502,155818.014,1,"Electricity Net Generation From Coal, All Sectors",Million Kilowatthours,20050201,2
442,CLETPUS,200503,163612.7,1,"Electricity Net Generation From Coal, All Sectors",Million Kilowatthours,20050301,3
443,CLETPUS,200504,143083.239,1,"Electricity Net Generation From Coal, All Sectors",Million Kilowatthours,20050401,4
444,CLETPUS,200505,153957.962,1,"Electricity Net Generation From Coal, All Sectors",Million Kilowatthours,20050501,5


In [125]:
#clean up columns 
#US_electricity[unit] only includes million kilowatthours (can remove)

US_electricity = US_electricity.drop(columns=["YYYYMM", "Month", "MSN", "Column_Order", "Unit"])
US_electricity = US_electricity.rename(columns= {"Date (YYYY-MM-01)" : "Date"})


Unnamed: 0,Value,Description,Date
440,177013.907,"Electricity Net Generation From Coal, All Sectors",20050101
441,155818.014,"Electricity Net Generation From Coal, All Sectors",20050201
442,163612.7,"Electricity Net Generation From Coal, All Sectors",20050301
443,143083.239,"Electricity Net Generation From Coal, All Sectors",20050401
444,153957.962,"Electricity Net Generation From Coal, All Sectors",20050501


In [127]:
#format date to datetime objects 
US_electricity["Date"]=pd.to_datetime(US_electricity["Date"], format='%Y-%m-%d')

US_electricity.head()

Unnamed: 0,Value,Description,Date
440,177013.907,"Electricity Net Generation From Coal, All Sectors",2005-01-01
441,155818.014,"Electricity Net Generation From Coal, All Sectors",2005-02-01
442,163612.7,"Electricity Net Generation From Coal, All Sectors",2005-03-01
443,143083.239,"Electricity Net Generation From Coal, All Sectors",2005-04-01
444,153957.962,"Electricity Net Generation From Coal, All Sectors",2005-05-01


In [128]:
#Grouping by renewable energy and crude oil 
#Renewable sources include : hydroelectric pumped storage, wood, waste, geothermal, solar, and wind
#crude oil sources include: coal, petroleum, natural gas, other gases, and conventional hydroelectric power 

US_electricity = US_electricity.loc[US_electricity["Value"] != "Not Available"]

US_renewable = US_electricity.loc[ (US_electricity["Description"] == "Electricity Net Generation From Hydroelectric Pumped Storage, All Sectors") | 
                                (US_electricity["Description"] == "Electricity Net Generation From Wood, All Sectors") | 
                                (US_electricity["Description"] == "Electricity Net Generation From Waste, All Sectors") |
                               (US_electricity["Description"] == "Electricity Net Generation From Geothermal, All Sectors") | 
                               (US_electricity["Description"] == "Electricity Net Generation From Solar, All Sectors") | 
                               (US_electricity["Description"] == "Electricity Net Generation From Wind, All Sectors")]

US_oil = US_electricity.loc[(US_electricity["Description"] == "Electricity Net Generation From Coal, All Sectors") |
                            (US_electricity["Description"] == "Electricity Net Generation From Petroleum, All Sectors") | 
                            (US_electricity["Description"] == "Electricity Net Generation From Natural Gas, All Sectors") |
                            (US_electricity["Description"] == "Electricity Net Generation From Other Gases, All Sectors") |
                            (US_electricity["Description"] == "Electricity Net Generation From Conventional Hydroelectric Power, All Sectors")]


In [129]:
# Cleaning data - need to remove top 30 rows in USD_data 
USD_data = USD_data.rename(columns = {"Macrotrends Data Download" : "Date", 
                                      "Unnamed: 1" : "Value"})
rows = list(range(0,15))
USD_data = USD_data.drop(rows)


In [130]:
USD_data["Date"]=pd.to_datetime(USD_data["Date"], format='%Y-%m-%d')

In [131]:
#isolating for 2005 to 2019 
sdate = dt.datetime(2005,1,1)   # start date
edate = dt.datetime(2019,12,30)   # end date

USD_data = USD_data [(USD_data["Date"]>=sdate ) & (USD_data["Date"]<= edate)]


In [134]:
#setting date as index

US_renewable = US_renewable.set_index(US_renewable["Date"])
US_renewable = US_renewable.drop(columns=["Date"])
US_renewable.head()

Unnamed: 0_level_0,Value,Description
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2005-01-01,-725.387,Electricity Net Generation From Hydroelectric ...
2005-02-01,-345.608,Electricity Net Generation From Hydroelectric ...
2005-03-01,-496.889,Electricity Net Generation From Hydroelectric ...
2005-04-01,-337.538,Electricity Net Generation From Hydroelectric ...
2005-05-01,-466.254,Electricity Net Generation From Hydroelectric ...


In [135]:
US_oil = US_oil.set_index(US_oil["Date"])
US_oil = US_oil.drop(columns=["Date"])
US_oil.head()

Unnamed: 0_level_0,Value,Description
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2005-01-01,177013.907,"Electricity Net Generation From Coal, All Sectors"
2005-02-01,155818.014,"Electricity Net Generation From Coal, All Sectors"
2005-03-01,163612.7,"Electricity Net Generation From Coal, All Sectors"
2005-04-01,143083.239,"Electricity Net Generation From Coal, All Sectors"
2005-05-01,153957.962,"Electricity Net Generation From Coal, All Sectors"


In [137]:
USD_data = USD_data.set_index(USD_data["Date"])
USD_data = USD_data.drop(columns=["Date"])
USD_data.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
2005-01-01,95.45
2005-02-01,95.698
2005-03-01,95.307
2005-04-01,96.234
2005-05-01,96.47


In [141]:
#Exporting clean data set as CSV 

US_renewable.to_csv(r"Resources\Clean Data\Table 2a.csv", index=True)

US_oil.to_csv(r"Resources\Clean Data\Table 2b", index=True)

USD_data.to_csv(r"Resources\Clean Data\Table 3", index=True)