### EnergyETL

In [None]:
# import dependencies
import pandas as pd
from sqlalchemy import create_engine

In [None]:
# read csv file
file_path = "Resources/us_potential_energy.csv"

# create the data frame 
energy_df = pd.read_csv(file_path)
energy_df.head()

In [None]:
# select the columns
energy_df = energy_df[["Unnamed: 0", "urbanUtilityScalePV_GWh","ruralUtilityScalePV_GWh","rooftopPV_GWh", "CSP_GWh","onshoreWind_GWh","offshoreWind_GWh", "biopowerSolid_GWh", "biopowerGaseous_GWh", "geothermalHydrothermal_GWh", "EGSGeothermal_GWh", "hydropower_GWh"]]
                     
# display the new dataframe
energy_df.head()

In [None]:
# rename the columns
energy_df.columns=["State", "Urban_Solar", "Rural_Solar", "Rooftop_Solar", "CSP_Solar", "Onshore_Wind", "Offshore_Wind", "Biopower_Solid", "Biopower_Gaseous", "Geothermal_Hydrothermal", "EGS_Geothermal", "Hydropower"]
energy_df.head()

In [None]:
# change NaN values to 0
energy_df.fillna(0, inplace=True)
energy_df.head()

In [None]:
#output clean data frame to csv
potential_energy = "Resources/us_potential_energy(clean).csv"
energy_df.to_csv(potential_energy,index=False)

### Scrape Wikipedia for the US Census Population Estimates

In [None]:
# Store the Wikipedia URL
wiki_url = "https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States_by_population#cite_note-5"

# Use Pandas to scrape Wikipedia page for tables
wiki_tables = pd.read_html(wiki_url)

# Grab the first table
pop_est_df = wiki_tables[0]

# Grab the State & Population est. for 2018
pop_est_df = pop_est_df[['Name', 'Population estimate, July 1, 2018[5]']]

# Rename the columns
pop_est_df = pop_est_df.rename(columns={"Name": "State", "Population estimate, July 1, 2018[5]": "Population"})

# Drop the Territories
pop_est_df = pop_est_df[0:52]

# Display the new DataFrame
pop_est_df.tail()

In [None]:
# Read in Energy Consumption csv
tot_consum_csv = "Resources/total_energy_consumed.csv"

# Create the Consumption DataFrame
tot_consum_df = pd.read_csv(tot_consum_csv)

# Join Population and Total Energy Consumed DataFrames
tot_consum_df = tot_consum_df.merge(pop_est_df, on='State',how="inner")

#convert BTU's to Gigawatt hours
tot_energy_gwh = tot_consum_df['Total_Energy_Consumed'] * 1000000 * 0.00000000029 * tot_consum_df['Population']

#add Gigawatt hours to dataframe
tot_consum_df['Total_Energy_Consumed_Gwh'] = tot_energy_gwh
tot_consum_df = tot_consum_df[['Rank','State','Total_Energy_Consumed_Gwh']]
tot_consum_df.head()

In [None]:
#connect to database
import config.py
engine = create_engine(sqldb_connect)

In [None]:
#copy dataframes into database tables
pop_est_df.to_sql('state_population', con=engine, if_exists='replace')
energy_df.to_sql('energy_renewable', con=engine, if_exists='replace')
tot_consum_df.to_sql('energy_consumption', con=engine, if_exists='replace')

In [None]:
#calcuate total renewable energy potential & add to master dataframe
renewable_master_df = energy_df.set_index('State')
renewable_master_sum = renewable_master_df.sum(axis=1)
renewable_master_df = pd.DataFrame(renewable_master_sum)
renewable_master_df = renewable_master_df.rename(columns={0:'Total Renewable'})
energy_master_df = tot_consum_df.merge(renewable_master_df, on='State')
energy_master_df.head()

In [None]:
#calcuate difference in renewable energy potential & consumption
#add to master dataframe
energy_diff = energy_master_df['Total Renewable'] - energy_master_df['Total_Energy_Consumed_Gwh']
energy_master_df['Energy Difference'] = energy_diff

energy_master_df.head()

In [None]:
#output data frame to csv
energy_compare = "Resources/energy_totals_comparison.csv"
energy_master_df.to_csv(energy_compare,index=False)

#copy dataframe into database tables
energy_master_df.to_sql('energy_compare', con=engine, if_exists='replace')

In [None]:
#Query database to find states with negative energy difference values
not_100_renew = engine.execute('select "State" from energy_compare where "Energy Difference" < 0').fetchall()
not_100_renew = [states[0] for states in not_100_renew]

#output query results
print('The following states do not have sufficient potential sources of renewable energy to meet their current energy consumption:')
for state in not_100_renew:
    print(state)
