# Load Database with state and energy data

## Import libraries

In [1]:
# Dependencies
import pandas as pd
import os
import sqlite3

In [2]:
# Import state coordinates
state_path = os.path.join("assets", "data", "state_coordinates", "statelatlong.csv")

In [3]:
# Read our Data file with the pandas library
state_coordinates_df = pd.read_csv(state_path, encoding="ISO-8859-1")

In [4]:
# Show just the header
state_coordinates_df.head()

Unnamed: 0,State,Latitude,Longitude,City
0,AL,32.601011,-86.680736,Alabama
1,AK,61.302501,-158.77502,Alaska
2,AZ,34.168219,-111.930907,Arizona
3,AR,34.751928,-92.131378,Arkansas
4,CA,37.271875,-119.270415,California


In [5]:
# rename columns
state_coordinates_df = state_coordinates_df.rename(columns={'State': 'Abbreviation', 'City': 'US_State'})
state_coordinates_df.head()

Unnamed: 0,Abbreviation,Latitude,Longitude,US_State
0,AL,32.601011,-86.680736,Alabama
1,AK,61.302501,-158.77502,Alaska
2,AZ,34.168219,-111.930907,Arizona
3,AR,34.751928,-92.131378,Arkansas
4,CA,37.271875,-119.270415,California


In [6]:
# Export state coordinates data to csv file
state_coordinates_path = os.path.join("assets", "data", "state_coordinates", "state_coordinates.csv")
state_coordinates_df.to_csv(state_coordinates_path, index=False, header=True)

## Store data in a SQLite database

In [7]:
# Create connection to the database
database_path = os.path.join("assets", "db", "us_energy.sqlite")
conn = sqlite3.connect(database_path)

In [13]:
# Store state coordinates data in a table
state_coordinates_df.to_sql('state_coordinates', conn, if_exists='replace', index=False)

c = conn.cursor()

c.executescript('''
    PRAGMA foreign_keys=off;

    BEGIN TRANSACTION;
    ALTER TABLE state_coordinates RENAME TO old_table;

    /*create a new table with the same column names and types while
    defining a primary key for the desired column*/
    CREATE TABLE state_coordinates (Abbreviation TEXT PRIMARY KEY NOT NULL,
                                    Latitude NUMERIC,
                                    Longitude NUMERIC,
                                    US_State TEXT);

    INSERT INTO state_coordinates SELECT * FROM old_table;

    DROP TABLE old_table;
    COMMIT TRANSACTION;

    PRAGMA foreign_keys=on;''')

#close out the connection
c.close()

In [15]:
# Read the state coordinates table
pd.read_sql('select * from state_coordinates', conn).head()

Unnamed: 0,Abbreviation,Latitude,Longitude,US_State
0,AL,32.601011,-86.680736,Alabama
1,AK,61.302501,-158.77502,Alaska
2,AZ,34.168219,-111.930907,Arizona
3,AR,34.751928,-92.131378,Arkansas
4,CA,37.271875,-119.270415,California


In [16]:
# Import combined data from csv file
final_path = os.path.join("assets", "data", "EIA_Project_Data", "final_combine_table.csv")

In [17]:
# Read our Data file with the pandas library
final_combine_table_df = pd.read_csv(final_path, encoding="ISO-8859-1")

In [18]:
# Show just the header
final_combine_table_df.head()

Unnamed: 0,State,Year,Total_co2_emission,CO2_Unit,Average_Price,Price_Unit,resident_population,Pop_Unit,Total_energy,ENERGY_Unit,Total_renewable_energy,Renew_Unit
0,AL,2016,115.088298,million metric tons CO2,14.81,Dollars per million Btu,4865,Thousand,1124482,Billion Btu,235494,Billion Btu
1,AL,2015,120.142812,million metric tons CO2,15.68,Dollars per million Btu,4853,Thousand,1269743,Billion Btu,261340,Billion Btu
2,AL,2014,123.161802,million metric tons CO2,18.62,Dollars per million Btu,4842,Thousand,1367345,Billion Btu,268551,Billion Btu
3,AL,2013,120.908517,million metric tons CO2,18.89,Dollars per million Btu,4830,Thousand,1478402,Billion Btu,310608,Billion Btu
4,AL,2012,123.201027,million metric tons CO2,19.86,Dollars per million Btu,4816,Thousand,1443558,Billion Btu,241822,Billion Btu


In [22]:
# Store energy data in a table
final_combine_table_df.to_sql('final_combine_table', conn, if_exists='replace', index=False)

c = conn.cursor()

c.executescript('''
    PRAGMA foreign_keys=off;

    BEGIN TRANSACTION;
    ALTER TABLE final_combine_table RENAME TO old_table;

    /*create a new table with the same column names and types while
    defining a primary key for the desired column*/
    CREATE TABLE final_combine_table (State TEXT NOT NULL,
                                    Year INTEGER NOT NULL,
                                    Total_co2_emission NUMERIC,
                                    CO2_Unit TEXT,
                                    Average_Price NUMERIC,
                                    Price_Unit TEXT,
                                    Average_resident_population NUMERIC,
                                    Pop_Unit TEXT,
                                    Total_energy NUMERIC,
                                    ENERGY_Unit TEXT,
                                    Total_renewable_energy NUMERIC,
                                    Renew_Unit TEXT,
                                     PRIMARY KEY (State, Year));

    INSERT INTO final_combine_table SELECT * FROM old_table;

    DROP TABLE old_table;
    COMMIT TRANSACTION;

    PRAGMA foreign_keys=on;''')

#close out the connection
c.close()

In [23]:
# Read the energy data table
pd.read_sql('select * from final_combine_table', conn).head()

Unnamed: 0,State,Year,Total_co2_emission,CO2_Unit,Average_Price,Price_Unit,Average_resident_population,Pop_Unit,Total_energy,ENERGY_Unit,Total_renewable_energy,Renew_Unit
0,AL,2016,115.088298,million metric tons CO2,14.81,Dollars per million Btu,4865,Thousand,1124482,Billion Btu,235494,Billion Btu
1,AL,2015,120.142812,million metric tons CO2,15.68,Dollars per million Btu,4853,Thousand,1269743,Billion Btu,261340,Billion Btu
2,AL,2014,123.161802,million metric tons CO2,18.62,Dollars per million Btu,4842,Thousand,1367345,Billion Btu,268551,Billion Btu
3,AL,2013,120.908517,million metric tons CO2,18.89,Dollars per million Btu,4830,Thousand,1478402,Billion Btu,310608,Billion Btu
4,AL,2012,123.201027,million metric tons CO2,19.86,Dollars per million Btu,4816,Thousand,1443558,Billion Btu,241822,Billion Btu
