# Electricity Usage Analytics of all states in USA
## Exploratory Data Analysis and Data Preprocessing

#### Import Libraries and Packages

In [1]:
# Import necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.style.use('ggplot')

In [2]:
# Function to read data from URL
def read_data_from_URL(url):
    df=pd.read_csv(url)
    return df

In [3]:
#set up data urls from github
parent_url='https://raw.githubusercontent.com/nthammadi-uncc/electricity_usage_analysis/main/Data/'
electric_price_url=parent_url+'avgprice_annual.csv'
monthly_consumption_url=parent_url+'consumption_monthly.csv'
annual_customers_url=parent_url+'customers_annual.csv'
annual_emission_url=parent_url+'emission_annual.csv'
monthly_generation_url=parent_url+'generation_monthly.csv'
median_household_income_url=parent_url+'median_household_income.csv'
presidential_election_url=parent_url+'presidential_election_results.csv'

### Get Electricity Price data

In [4]:
price_df = read_data_from_URL(electric_price_url)
price_df.head()

Unnamed: 0,Year,State,Industry Sector Category,Residential,Commercial,Industrial,Transportation,Other,Total
0,2020,AK,Total Electric Industry,22.57,19.58,15.88,0.0,,19.82
1,2020,AL,Total Electric Industry,12.58,11.55,5.87,0.0,,9.84
2,2020,AR,Total Electric Industry,10.41,8.61,5.89,13.32,,8.32
3,2020,AZ,Total Electric Industry,12.27,10.11,6.07,9.38,,10.44
4,2020,CA,Total Electric Industry,20.45,17.53,14.27,10.07,,18.0


### Get Monthly Electricity Consumption & Generation data

In [5]:
consumption_df = read_data_from_URL(monthly_consumption_url)
consumption_df.head()

  if (await self.run_code(code, result,  async_=asy)):


Unnamed: 0,YEAR,MONTH,STATE,TYPE_OF_PRODUCER,ENERGY_SOURCE,CONSUMPTION_MWh
0,2001,1,AK,Total Electric Power Industry,Coal (Short Tons),47615.0
1,2001,1,AK,Total Electric Power Industry,Petroleum (Barrels),124998.0
2,2001,1,AK,Total Electric Power Industry,Natural Gas (Mcf),3941267.0
3,2001,1,AK,"Electric Generators, Electric Utilities",Coal (Short Tons),16535.0
4,2001,1,AK,"Electric Generators, Electric Utilities",Petroleum (Barrels),114198.0


In [6]:
generation_df = read_data_from_URL(monthly_generation_url)
generation_df.head()

Unnamed: 0,YEAR,MONTH,STATE,TYPE_OF_PRODUCER,ENERGY_SOURCE,GENERATION_MWh
0,2001,1,AK,Total Electric Power Industry,Coal,46903.0
1,2001,1,AK,Total Electric Power Industry,Petroleum,71085.0
2,2001,1,AK,Total Electric Power Industry,Natural Gas,367521.0
3,2001,1,AK,Total Electric Power Industry,Hydroelectric Conventional,104549.0
4,2001,1,AK,Total Electric Power Industry,Wind,87.0


### Get Annual Customers(Consumers) of Electricity Data

In [7]:
customers_df = read_data_from_URL(annual_customers_url)
customers_df.head()

Unnamed: 0,Year,State,Industry Sector Category,Residential,Commercial,Industrial,Transportation,Other,Total
0,2020,AK,Total Electric Industry,315208.0,61993.0,1129.0,0.0,,378330.0
1,2020,AL,Total Electric Industry,2280741.0,371888.0,7240.0,0.0,,2659869.0
2,2020,AR,Total Electric Industry,1413490.0,197869.0,35978.0,2.0,,1647339.0
3,2020,AZ,Total Electric Industry,2896339.0,331229.0,7595.0,2.0,,3235165.0
4,2020,CA,Total Electric Industry,13834719.0,1725533.0,148130.0,13.0,,15708395.0


### Get Annual Emissions Data(CO2, SO2,NOx)

In [8]:
emission_df = read_data_from_URL(annual_emission_url)
emission_df.head()

Unnamed: 0,Year,State,Producer Type,Energy Source,CO2\n(Metric Tons),SO2\n(Metric Tons),NOx\n(Metric Tons)
0,1990,AK,Commercial Cogen,All Sources,824004.0,13198.0,3011.0
1,1990,AK,Commercial Cogen,Coal,821929.0,13191.0,3009.0
2,1990,AK,Commercial Cogen,Petroleum,2075.0,6.0,2.0
3,1990,AK,Commercial Non-Cogen,All Sources,0.0,149.0,42.0
4,1990,AK,Commercial Non-Cogen,Petroleum,0.0,149.0,42.0


### Get Median Household Income Data

In [9]:
median_income_df = read_data_from_URL(median_household_income_url)
median_income_df.head()

Unnamed: 0,State,2020,2019,2018,2017,2016,2015,2014,2013,2012,...,2009,2008,2007,2006,2005,2004,2003,2002,2001,2000
0,United States,67521.0,68703.0,63179.0,61136.0,59039.0,56516.0,53657.0,53585.0,51017.0,...,49777.0,50303.0,50233.0,48201.0,46326.0,44334.0,43318.0,42409.0,42228.0,41990.0
1,Alabama,54393.0,56200.0,49936.0,50865.0,47221.0,44509.0,42278.0,47320.0,43464.0,...,39980.0,44476.0,42212.0,37952.0,37150.0,36629.0,37255.0,37603.0,35160.0,35424.0
2,Alaska,74476.0,78394.0,68734.0,77987.0,75723.0,75112.0,67629.0,72472.0,63648.0,...,61604.0,63989.0,62993.0,56418.0,55891.0,55063.0,51837.0,52774.0,57363.0,52847.0
3,Arizona,66628.0,70674.0,62283.0,59700.0,57100.0,52248.0,49254.0,52611.0,47044.0,...,45739.0,46914.0,47215.0,46657.0,45245.0,43846.0,41166.0,39734.0,42704.0,39783.0
4,Arkansas,50540.0,54539.0,49781.0,49751.0,45907.0,42798.0,44922.0,39376.0,39018.0,...,36538.0,39586.0,40795.0,37057.0,36658.0,34984.0,32002.0,32387.0,33339.0,29697.0


### Get Presidential Election Results Data

In [10]:
election_results_df = read_data_from_URL(presidential_election_url)
election_results_df.head()

Unnamed: 0,year,state,state_po,candidate,party_simplified,writein,candidate_votes,total_votes
0,2000,ALABAMA,AL,"BUSH, GEORGE W.",REPUBLICAN,False,941173,1666272
1,2000,ALABAMA,AL,"GORE, AL",DEMOCRAT,False,692611,1666272
2,2000,ALABAMA,AL,,OTHER,False,25896,1666272
3,2000,ALABAMA,AL,"BROWNE, HARRY",LIBERTARIAN,False,5893,1666272
4,2000,ALABAMA,AL,,OTHER,True,699,1666272
