In [1]:
# Dependencies and setup (will need more)
import numpy as np
import os
import pandas as pd
import warnings
warnings.simplefilter("ignore")

In [2]:
# Matt's Code Begins Here

# United States EV Market Share (1990-2018)

In [3]:
# Market share
shareURL = "https://www.anl.gov/es/vision-model"

# This is where the CSV lives locally:
shareFilepath = "data/evMarketShare.csv"

print(f"{shareFilepath} is {round(os.path.getsize(shareFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{shareURL}")

# Read CSV into DataFrame
shareData = pd.read_csv(shareFilepath)

# Display DataFrame
shareData

data/evMarketShare.csv is 0.0 megabytes (MB).
More info here:
https://www.anl.gov/es/vision-model


Unnamed: 0,Year,EV Market Share
0,1990,0.0
1,1991,0.0
2,1992,0.0
3,1993,0.0
4,1994,0.0
5,1995,0.0
6,1996,0.0
7,1997,0.0
8,1998,0.0
9,1999,0.0


# Charging Stations by State (as of April 5, 2019)

In [4]:
# Use the read_html function in pandas to automatically scrape tabular data
stationsURL = "https://evadoption.com/ev-charging-stations-statistics/charging-stations-by-state/"
evStationsTable = pd.read_html(stationsURL)
evStationsTable

[                   State  Charging Locations (1)  Charging Outlets (2)  \
 0                Alabama                     115                   268   
 1                 Alaska                      16                    26   
 2                Arizona                     454                  1223   
 3               Arkansas                      72                   196   
 4             California                    5095                 19687   
 5               Colorado                     692                  1857   
 6            Connecticut                     335                   814   
 7               Delaware                      49                   159   
 8   District of Columbia                     122                   340   
 9                Florida                    1165                  3010   
 10               Georgia                     773                  2335   
 11                Hawaii                     265                   523   
 12                 Idaho

In [5]:
# Slice off desired DataFrame using normal indexing
evStations_df = evStationsTable[0]

# Display DataFrame
evStations_df

Unnamed: 0,State,Charging Locations (1),Charging Outlets (2),Outlets Per Location,EV Stock (3),EVs to Charging Outlets
0,Alabama,115,268,2.33,2487,9.28
1,Alaska,16,26,1.62,534,20.54
2,Arizona,454,1223,2.69,18129,14.82
3,Arkansas,72,196,2.72,1194,6.09
4,California,5095,19687,3.86,506608,25.73
5,Colorado,692,1857,2.68,19738,10.63
6,Connecticut,335,814,2.43,10916,13.41
7,Delaware,49,159,3.24,1895,11.92
8,District of Columbia,122,340,2.79,2321,6.83
9,Florida,1165,3010,2.58,40548,13.47


In [6]:
# Drop "median" row off DataFrame
evStations_df.drop(evStations_df.tail(1).index, inplace = True)

# Display DataFrame
evStations_df.tail()

Unnamed: 0,State,Charging Locations (1),Charging Outlets (2),Outlets Per Location,EV Stock (3),EVs to Charging Outlets
46,Virginia,578,1356,2.35,16505,12.17
47,Washington,874,2383,2.73,41459,17.4
48,West Virginia,87,212,2.44,746,3.52
49,Wisconsin,285,542,1.9,8271,15.26
50,Wyoming,51,139,2.73,269,1.94


In [7]:
# Create new DataFrame filtered on desired states
evStations = evStations_df[(evStations_df.State == "Iowa") |
                           (evStations_df.State == "Minnesota") |
                           (evStations_df.State == "North Dakota") |
                           (evStations_df.State == "South Dakota") |
                           (evStations_df.State == "Wisconsin")]

# Reset index in place
evStations.reset_index(inplace = True, drop = True)

# Display DataFrame
evStations

Unnamed: 0,State,Charging Locations (1),Charging Outlets (2),Outlets Per Location,EV Stock (3),EVs to Charging Outlets
0,Iowa,127,316,2.49,2799,8.86
1,Minnesota,326,793,2.43,8845,11.15
2,North Dakota,19,29,1.53,291,10.03
3,South Dakota,33,89,2.7,424,4.76
4,Wisconsin,285,542,1.9,8271,15.26


In [8]:
# Export to CSV
# evStations.to_csv("chargingStations.csv", index = False, header = True)

# Minnesota Vehicle Miles Traveled (1990-2018)

In [9]:
# Vehicle Miles Traveled
mnVMTURL = "http://www.dot.state.mn.us/roadway/data/data-products.html#VMT"

# This is where the data CSV lives locally:
mnVMTFilepath = "data/minnesotaVMT.csv"

print(f"{mnVMTFilepath} is {round(os.path.getsize(mnVMTFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{mnVMTURL}")

# Read CSV into DataFrame
mnVMTData = pd.read_csv(mnVMTFilepath)

# Display DataFrame
mnVMTData

data/minnesotaVMT.csv is 0.0 megabytes (MB).
More info here:
http://www.dot.state.mn.us/roadway/data/data-products.html#VMT


Unnamed: 0,Year,MN VMT
0,2018,60438313272
1,2017,59970745402
2,2016,58856547322
3,2015,57795772499
4,2014,57393220230
5,2013,56993471770
6,2012,57018582582
7,2011,56675049785
8,2010,56762480425
9,2009,56970430780


# Minnesota Total Gasoline All Sales (1990-2018)

In [10]:
# Total Gasoline
mnGasURL = "https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=C100020271&f=A"

# This is where the data CSV lives locally:
mnGasFilepath = "data/mnFuelConsumption.csv"

print(f"{mnGasFilepath} is {round(os.path.getsize(mnGasFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{mnGasURL}")

# Read CSV into DataFrame
mnGasData = pd.read_csv(mnGasFilepath)

# Display DataFrame
mnGasData

data/mnFuelConsumption.csv is 0.0 megabytes (MB).
More info here:
https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=C100020271&f=A


Unnamed: 0,Year,MN Gas
0,1990,5409.4
1,1991,5390.1
2,1992,5429.6
3,1993,5594.5
4,1994,5902.9
5,1995,6080.0
6,1996,6234.2
7,1997,6316.1
8,1998,6274.9
9,1999,6592.6


In [11]:
# Multiply by 1,000 and 365.25 (CSV values are stated in thousand gallons per day)
mnGasData["MN Gas"] = (mnGasData["MN Gas"] * 1000) * 365.25

# Sort by year
mnGasData = mnGasData.sort_values("Year", ascending = False)

# Reset index in place
mnGasData.reset_index(inplace = True, drop = True)

# Display DataFrame
mnGasData

Unnamed: 0,Year,MN Gas
0,2018,2271234000.0
1,2017,2331281000.0
2,2016,2407911000.0
3,2015,2367989000.0
4,2014,2289606000.0
5,2013,2307029000.0
6,2012,2266559000.0
7,2011,2207060000.0
8,2010,2218309000.0
9,2009,2213890000.0


In [12]:
# Merge DataFrames
milesGas = pd.merge(mnVMTData, mnGasData, how = "outer", left_on = "Year", right_on = "Year", on = None, sort = False,
                    copy = True, indicator = False, validate = None)
# Display DataFrame
milesGas

Unnamed: 0,Year,MN VMT,MN Gas
0,2018,60438313272,2271234000.0
1,2017,59970745402,2331281000.0
2,2016,58856547322,2407911000.0
3,2015,57795772499,2367989000.0
4,2014,57393220230,2289606000.0
5,2013,56993471770,2307029000.0
6,2012,57018582582,2266559000.0
7,2011,56675049785,2207060000.0
8,2010,56762480425,2218309000.0
9,2009,56970430780,2213890000.0


# Iowa Vehicle Miles Traveled (1990-2018)

In [13]:
# Vehicle Miles Traveled
iaVMTURL = "https://iowadot.gov/maps/msp/vmt/30yearvmt.pdf"

# This is where the data CSV lives locally:
iaVMTFilepath = "data/iowaVMT.csv"

print(f"{iaVMTFilepath} is {round(os.path.getsize(iaVMTFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{iaVMTURL}")

# Read CSV into DataFrame
iaVMTData = pd.read_csv(iaVMTFilepath)

# Display DataFrame
iaVMTData

data/iowaVMT.csv is 0.0 megabytes (MB).
More info here:
https://iowadot.gov/maps/msp/vmt/30yearvmt.pdf


Unnamed: 0,Year,IA VMT,U.S. Total
0,1990,23165,2147501
1,1991,23572,2172214
2,1992,24411,2247152
3,1993,25396,2296705
4,1994,26039,2357587
5,1995,26658,2422776
6,1996,27587,2482201
7,1997,28404,2560373
8,1998,29192,2625363
9,1999,29726,2679459


In [14]:
# Multiply values by 1,000,000
iaVMTData["IA VMT"] = iaVMTData["IA VMT"] * 1000000
iaVMTData["U.S. Total"] = iaVMTData["U.S. Total"] * 1000000

# Sort by year
iaVMTData = iaVMTData.sort_values("Year", ascending = False)

# Reset index in place
iaVMTData.reset_index(inplace = True, drop = True)

# Display DataFrame
iaVMTData

Unnamed: 0,Year,IA VMT,U.S. Total
0,2018,33507000000,3240327000000
1,2017,33751000000,3212347000000
2,2016,33263000000,3174408000000
3,2015,33109000000,3095373000000
4,2014,32332000000,3025656000000
5,2013,31542000000,2988280000000
6,2012,31581000000,2968570000000
7,2011,31411000000,2950402000000
8,2010,31579000000,2967266000000
9,2009,31295000000,2956764000000


In [15]:
# Merge DataFrames
milesGas = milesGas.merge(iaVMTData, how = "left", left_on = "Year", right_on = "Year", on = None, sort = False,
                          copy = True, indicator = False, validate = None)

# Display DataFrame
milesGas

Unnamed: 0,Year,MN VMT,MN Gas,IA VMT,U.S. Total
0,2018,60438313272,2271234000.0,33507000000,3240327000000
1,2017,59970745402,2331281000.0,33751000000,3212347000000
2,2016,58856547322,2407911000.0,33263000000,3174408000000
3,2015,57795772499,2367989000.0,33109000000,3095373000000
4,2014,57393220230,2289606000.0,32332000000,3025656000000
5,2013,56993471770,2307029000.0,31542000000,2988280000000
6,2012,57018582582,2266559000.0,31581000000,2968570000000
7,2011,56675049785,2207060000.0,31411000000,2950402000000
8,2010,56762480425,2218309000.0,31579000000,2967266000000
9,2009,56970430780,2213890000.0,31295000000,2956764000000


# Iowa Total Gasoline All Sales (1990-2018)

In [16]:
# Total Gasoline
iaGasURL = "https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=C100020191&f=A"

# This is where the data CSV lives locally:
iaGasFilepath = "data/iaFuelConsumption.csv"

print(f"{iaGasFilepath} is {round(os.path.getsize(iaGasFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{iaGasURL}")

# Read CSV into DataFrame
iaGasData = pd.read_csv(iaGasFilepath)

# Display DataFrame
iaGasData

data/iaFuelConsumption.csv is 0.0 megabytes (MB).
More info here:
https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=C100020191&f=A


Unnamed: 0,Year,IA Gas
0,1990,3886.9
1,1991,3780.7
2,1992,3782.3
3,1993,3795.5
4,1994,3920.9
5,1995,3870.9
6,1996,4005.6
7,1997,4017.1
8,1998,4080.0
9,1999,4272.0


In [17]:
# Multiply by 1,000 and 365.25 (CSV values are stated in thousand gallons per day)
iaGasData["IA Gas"] = (iaGasData["IA Gas"] * 1000) * 365.25

# Sort by year
iaGasData = iaGasData.sort_values("Year", ascending = False)

# Reset index in place
iaGasData.reset_index(inplace = True, drop = True)

# Display DataFrame
iaGasData

Unnamed: 0,Year,IA Gas
0,2018,1140457000.0
1,2017,1181657000.0
2,2016,1198349000.0
3,2015,1149113000.0
4,2014,1186003000.0
5,2013,1261610000.0
6,2012,1300947000.0
7,2011,1340687000.0
8,2010,1387475000.0
9,2009,1304308000.0


In [18]:
# Merge DataFrames
milesGas = milesGas.merge(iaGasData, how = "left", left_on = "Year", right_on = "Year", on = None, sort = False,
                          copy = True, indicator = False, validate = None)

# Display DataFrame
milesGas

Unnamed: 0,Year,MN VMT,MN Gas,IA VMT,U.S. Total,IA Gas
0,2018,60438313272,2271234000.0,33507000000,3240327000000,1140457000.0
1,2017,59970745402,2331281000.0,33751000000,3212347000000,1181657000.0
2,2016,58856547322,2407911000.0,33263000000,3174408000000,1198349000.0
3,2015,57795772499,2367989000.0,33109000000,3095373000000,1149113000.0
4,2014,57393220230,2289606000.0,32332000000,3025656000000,1186003000.0
5,2013,56993471770,2307029000.0,31542000000,2988280000000,1261610000.0
6,2012,57018582582,2266559000.0,31581000000,2968570000000,1300947000.0
7,2011,56675049785,2207060000.0,31411000000,2950402000000,1340687000.0
8,2010,56762480425,2218309000.0,31579000000,2967266000000,1387475000.0
9,2009,56970430780,2213890000.0,31295000000,2956764000000,1304308000.0


# North Dakota Vehicle Miles Traveled (1990-2018)

In [19]:
# Vehicle Miles Traveled
ndVMTURL = "https://www.dot.nd.gov/business/docs/trafficreports/Annual-Traffic-Report-2019.pdf"

# This is where the data CSV lives locally:
ndVMTFilepath = "data/northdakotaVMT.csv"

print(f"{ndVMTFilepath} is {round(os.path.getsize(ndVMTFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{ndVMTURL}")

# Read CSV into DataFrame
ndVMTData = pd.read_csv(ndVMTFilepath)

# Display DataFrame
ndVMTData

data/northdakotaVMT.csv is 0.0 megabytes (MB).
More info here:
https://www.dot.nd.gov/business/docs/trafficreports/Annual-Traffic-Report-2019.pdf


Unnamed: 0,Year,ND VMT
0,1990,5957
1,1991,5999
2,1992,6105
3,1993,6204
4,1994,6388
5,1995,6546
6,1996,6767
7,1997,6942
8,1998,7093
9,1999,7101


In [20]:
# Multiply values by 1,000,000
ndVMTData["ND VMT"] = ndVMTData["ND VMT"] * 1000000

# Sort by year
ndVMTData = ndVMTData.sort_values("Year", ascending = False)

# Reset index in place
ndVMTData.reset_index(inplace = True, drop = True)

# Display DataFrame
ndVMTData

Unnamed: 0,Year,ND VMT
0,2018,9866000000
1,2017,9703000000
2,2016,9740000000
3,2015,10079000000
4,2014,10437000000
5,2013,10100000000
6,2012,10093000000
7,2011,9166000000
8,2010,8303000000
9,2009,7943000000


In [21]:
# Merge DataFrames
milesGas = milesGas.merge(ndVMTData, how = "left", left_on = "Year", right_on = "Year", on = None, sort = False,
                          copy = True, indicator = False, validate = None)

# Display DataFrame
milesGas

Unnamed: 0,Year,MN VMT,MN Gas,IA VMT,U.S. Total,IA Gas,ND VMT
0,2018,60438313272,2271234000.0,33507000000,3240327000000,1140457000.0,9866000000
1,2017,59970745402,2331281000.0,33751000000,3212347000000,1181657000.0,9703000000
2,2016,58856547322,2407911000.0,33263000000,3174408000000,1198349000.0,9740000000
3,2015,57795772499,2367989000.0,33109000000,3095373000000,1149113000.0,10079000000
4,2014,57393220230,2289606000.0,32332000000,3025656000000,1186003000.0,10437000000
5,2013,56993471770,2307029000.0,31542000000,2988280000000,1261610000.0,10100000000
6,2012,57018582582,2266559000.0,31581000000,2968570000000,1300947000.0,10093000000
7,2011,56675049785,2207060000.0,31411000000,2950402000000,1340687000.0,9166000000
8,2010,56762480425,2218309000.0,31579000000,2967266000000,1387475000.0,8303000000
9,2009,56970430780,2213890000.0,31295000000,2956764000000,1304308000.0,7943000000


# North Dakota Total Gasoline All Sales (1990-2018)

In [22]:
# Total Gasoline
ndGasURL = "https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=C100020381&f=A"

# This is where the data CSV lives locally:
ndGasFilepath = "data/ndFuelConsumption.csv"

print(f"{ndGasFilepath} is {round(os.path.getsize(ndGasFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{ndGasURL}")

# Read CSV into DataFrame
ndGasData = pd.read_csv(ndGasFilepath)

# Display DataFrame
ndGasData

data/ndFuelConsumption.csv is 0.0 megabytes (MB).
More info here:
https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=C100020381&f=A


Unnamed: 0,Year,ND Gas
0,1990,876.5
1,1991,884.7
2,1992,917.1
3,1993,988.8
4,1994,998.1
5,1995,1011.6
6,1996,977.8
7,1997,992.3
8,1998,1022.7
9,1999,1039.4


In [23]:
# Multiply by 1,000 and 365.25 (CSV values are stated in thousand gallons per day)
ndGasData["ND Gas"] = (ndGasData["ND Gas"] * 1000) * 365.25

# Sort by year
ndGasData = ndGasData.sort_values("Year", ascending = False)

# Reset index in place
ndGasData.reset_index(inplace = True, drop = True)

# Display DataFrame
ndGasData

Unnamed: 0,Year,ND Gas
0,2018,396844125.0
1,2017,402176775.0
2,2016,400058325.0
3,2015,430921950.0
4,2014,443669175.0
5,2013,435268425.0
6,2012,417152025.0
7,2011,391328850.0
8,2010,370400025.0
9,2009,346183950.0


In [24]:
# Merge DataFrames
milesGas = milesGas.merge(ndGasData, how = "left", left_on = "Year", right_on = "Year", on = None, sort = False,
                          copy = True, indicator = False, validate = None)

# Display DataFrame
milesGas

Unnamed: 0,Year,MN VMT,MN Gas,IA VMT,U.S. Total,IA Gas,ND VMT,ND Gas
0,2018,60438313272,2271234000.0,33507000000,3240327000000,1140457000.0,9866000000,396844125.0
1,2017,59970745402,2331281000.0,33751000000,3212347000000,1181657000.0,9703000000,402176775.0
2,2016,58856547322,2407911000.0,33263000000,3174408000000,1198349000.0,9740000000,400058325.0
3,2015,57795772499,2367989000.0,33109000000,3095373000000,1149113000.0,10079000000,430921950.0
4,2014,57393220230,2289606000.0,32332000000,3025656000000,1186003000.0,10437000000,443669175.0
5,2013,56993471770,2307029000.0,31542000000,2988280000000,1261610000.0,10100000000,435268425.0
6,2012,57018582582,2266559000.0,31581000000,2968570000000,1300947000.0,10093000000,417152025.0
7,2011,56675049785,2207060000.0,31411000000,2950402000000,1340687000.0,9166000000,391328850.0
8,2010,56762480425,2218309000.0,31579000000,2967266000000,1387475000.0,8303000000,370400025.0
9,2009,56970430780,2213890000.0,31295000000,2956764000000,1304308000.0,7943000000,346183950.0


# South Dakota Vehicle Miles Traveled (1990-2018)

In [25]:
# Vehicle Miles Traveled
sdVMTURL = "https://dot.sd.gov/transportation/highways/traffic"

# This is where the data CSV lives locally:
sdVMTFilepath = "data/southdakotaVMT.csv"

print(f"{sdVMTFilepath} is {round(os.path.getsize(sdVMTFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{sdVMTURL}")

# Read CSV into DataFrame
sdVMTData = pd.read_csv(sdVMTFilepath)

# Display DataFrame
sdVMTData

data/southdakotaVMT.csv is 0.0 megabytes (MB).
More info here:
https://dot.sd.gov/transportation/highways/traffic


Unnamed: 0,Year,SD VMT,Unnamed: 2
0,1990,6985,
1,1991,6690,
2,1992,7176,
3,1993,7352,
4,1994,7550,
5,1995,7569,
6,1996,7696,
7,1997,7797,
8,1998,7931,
9,1999,8054,


In [26]:
# Drop unnecessary column
sdVMTData.drop(columns = ["Unnamed: 2"], inplace = True)

# Display DataFrame
sdVMTData.head()

Unnamed: 0,Year,SD VMT
0,1990,6985
1,1991,6690
2,1992,7176
3,1993,7352
4,1994,7550


In [27]:
# Multiply values by 1,000,000
sdVMTData["SD VMT"] = sdVMTData["SD VMT"] * 1000000

# Sort by year
sdVMTData = sdVMTData.sort_values("Year", ascending = False)

# Reset index in place
sdVMTData.reset_index(inplace = True, drop = True)

# Display DataFrame
sdVMTData

Unnamed: 0,Year,SD VMT
0,2018,9736000000
1,2017,9638000000
2,2016,9508000000
3,2015,9325000000
4,2014,9226000000
5,2013,9114000000
6,2012,9077000000
7,2011,8993000000
8,2010,8861000000
9,2009,8740000000


In [28]:
# Merge DataFrames
milesGas = milesGas.merge(sdVMTData, how = "left", left_on = "Year", right_on = "Year", on = None, sort = False,
                          copy = True, indicator = False, validate = None)

# Display DataFrame
milesGas

Unnamed: 0,Year,MN VMT,MN Gas,IA VMT,U.S. Total,IA Gas,ND VMT,ND Gas,SD VMT
0,2018,60438313272,2271234000.0,33507000000,3240327000000,1140457000.0,9866000000,396844125.0,9736000000
1,2017,59970745402,2331281000.0,33751000000,3212347000000,1181657000.0,9703000000,402176775.0,9638000000
2,2016,58856547322,2407911000.0,33263000000,3174408000000,1198349000.0,9740000000,400058325.0,9508000000
3,2015,57795772499,2367989000.0,33109000000,3095373000000,1149113000.0,10079000000,430921950.0,9325000000
4,2014,57393220230,2289606000.0,32332000000,3025656000000,1186003000.0,10437000000,443669175.0,9226000000
5,2013,56993471770,2307029000.0,31542000000,2988280000000,1261610000.0,10100000000,435268425.0,9114000000
6,2012,57018582582,2266559000.0,31581000000,2968570000000,1300947000.0,10093000000,417152025.0,9077000000
7,2011,56675049785,2207060000.0,31411000000,2950402000000,1340687000.0,9166000000,391328850.0,8993000000
8,2010,56762480425,2218309000.0,31579000000,2967266000000,1387475000.0,8303000000,370400025.0,8861000000
9,2009,56970430780,2213890000.0,31295000000,2956764000000,1304308000.0,7943000000,346183950.0,8740000000


# South Dakota Total Gasoline All Sales (1990-2018)

In [29]:
# Total Gasoline
sdGasURL = "https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=C100020461&f=A"

# This is where the data CSV lives locally:
sdGasFilepath = "data/sdFuelConsumption.csv"

print(f"{sdGasFilepath} is {round(os.path.getsize(sdGasFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{sdGasURL}")

# Read CSV into DataFrame
sdGasData = pd.read_csv(sdGasFilepath)

# Display DataFrame
sdGasData

data/sdFuelConsumption.csv is 0.0 megabytes (MB).
More info here:
https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=C100020461&f=A


Unnamed: 0,Year,SD Gas
0,1990,1030.4
1,1991,1041.8
2,1992,1100.8
3,1993,1106.0
4,1994,1146.5
5,1995,1120.0
6,1996,1134.7
7,1997,1134.2
8,1998,1208.1
9,1999,1155.7


In [30]:
# Multiply by 1,000 and 365.25 (CSV values are stated in thousand gallons per day)
sdGasData["SD Gas"] = (sdGasData["SD Gas"] * 1000) * 365.25

# Sort by year
sdGasData = sdGasData.sort_values("Year", ascending = False)

# Reset index in place
sdGasData.reset_index(inplace = True, drop = True)

# Display DataFrame
sdGasData

Unnamed: 0,Year,SD Gas
0,2018,453202200.0
1,2017,441222000.0
2,2016,427050300.0
3,2015,418065150.0
4,2014,411052350.0
5,2013,432163800.0
6,2012,413389950.0
7,2011,442646475.0
8,2010,439432275.0
9,2009,424237875.0


In [31]:
# Merge DataFrames
milesGas = milesGas.merge(sdGasData, how = "left", left_on = "Year", right_on = "Year", on = None, sort = False,
                          copy = True, indicator = False, validate = None)

# Display DataFrame
milesGas

Unnamed: 0,Year,MN VMT,MN Gas,IA VMT,U.S. Total,IA Gas,ND VMT,ND Gas,SD VMT,SD Gas
0,2018,60438313272,2271234000.0,33507000000,3240327000000,1140457000.0,9866000000,396844125.0,9736000000,453202200.0
1,2017,59970745402,2331281000.0,33751000000,3212347000000,1181657000.0,9703000000,402176775.0,9638000000,441222000.0
2,2016,58856547322,2407911000.0,33263000000,3174408000000,1198349000.0,9740000000,400058325.0,9508000000,427050300.0
3,2015,57795772499,2367989000.0,33109000000,3095373000000,1149113000.0,10079000000,430921950.0,9325000000,418065150.0
4,2014,57393220230,2289606000.0,32332000000,3025656000000,1186003000.0,10437000000,443669175.0,9226000000,411052350.0
5,2013,56993471770,2307029000.0,31542000000,2988280000000,1261610000.0,10100000000,435268425.0,9114000000,432163800.0
6,2012,57018582582,2266559000.0,31581000000,2968570000000,1300947000.0,10093000000,417152025.0,9077000000,413389950.0
7,2011,56675049785,2207060000.0,31411000000,2950402000000,1340687000.0,9166000000,391328850.0,8993000000,442646475.0
8,2010,56762480425,2218309000.0,31579000000,2967266000000,1387475000.0,8303000000,370400025.0,8861000000,439432275.0
9,2009,56970430780,2213890000.0,31295000000,2956764000000,1304308000.0,7943000000,346183950.0,8740000000,424237875.0


# Wisconsin Vehicle Miles Traveled (1990-2018)

In [32]:
# Vehicle Miles Traveled
wiVMTURL = "https://wisconsindot.gov/Documents/projects/data-plan/veh-miles/vmt-hist.pdf"

# This is where the data CSV lives locally:
wiVMTFilepath = "data/wisconsinVMT.csv"

print(f"{wiVMTFilepath} is {round(os.path.getsize(wiVMTFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{wiVMTURL}")

# Read CSV into DataFrame
wiVMTData = pd.read_csv(wiVMTFilepath)

# Display DataFrame
wiVMTData

data/wisconsinVMT.csv is 0.0 megabytes (MB).
More info here:
https://wisconsindot.gov/Documents/projects/data-plan/veh-miles/vmt-hist.pdf


Unnamed: 0,Year,WI VMT
0,2018,65884
1,2017,65324
2,2016,63870
3,2015,62140
4,2014,60044
5,2013,59484
6,2012,59020
7,2011,58554
8,2010,59420
9,2009,58157


In [33]:
# Multiply values by 1,000,000
wiVMTData["WI VMT"] = wiVMTData["WI VMT"] * 1000000

# Display DataFrame
wiVMTData

Unnamed: 0,Year,WI VMT
0,2018,65884000000
1,2017,65324000000
2,2016,63870000000
3,2015,62140000000
4,2014,60044000000
5,2013,59484000000
6,2012,59020000000
7,2011,58554000000
8,2010,59420000000
9,2009,58157000000


In [34]:
# Merge DataFrames
milesGas = milesGas.merge(wiVMTData, how = "left", left_on = "Year", right_on = "Year", on = None, sort = False,
                          copy = True, indicator = False, validate = None)

# Display DataFrame
milesGas

Unnamed: 0,Year,MN VMT,MN Gas,IA VMT,U.S. Total,IA Gas,ND VMT,ND Gas,SD VMT,SD Gas,WI VMT
0,2018,60438313272,2271234000.0,33507000000,3240327000000,1140457000.0,9866000000,396844125.0,9736000000,453202200.0,65884000000
1,2017,59970745402,2331281000.0,33751000000,3212347000000,1181657000.0,9703000000,402176775.0,9638000000,441222000.0,65324000000
2,2016,58856547322,2407911000.0,33263000000,3174408000000,1198349000.0,9740000000,400058325.0,9508000000,427050300.0,63870000000
3,2015,57795772499,2367989000.0,33109000000,3095373000000,1149113000.0,10079000000,430921950.0,9325000000,418065150.0,62140000000
4,2014,57393220230,2289606000.0,32332000000,3025656000000,1186003000.0,10437000000,443669175.0,9226000000,411052350.0,60044000000
5,2013,56993471770,2307029000.0,31542000000,2988280000000,1261610000.0,10100000000,435268425.0,9114000000,432163800.0,59484000000
6,2012,57018582582,2266559000.0,31581000000,2968570000000,1300947000.0,10093000000,417152025.0,9077000000,413389950.0,59020000000
7,2011,56675049785,2207060000.0,31411000000,2950402000000,1340687000.0,9166000000,391328850.0,8993000000,442646475.0,58554000000
8,2010,56762480425,2218309000.0,31579000000,2967266000000,1387475000.0,8303000000,370400025.0,8861000000,439432275.0,59420000000
9,2009,56970430780,2213890000.0,31295000000,2956764000000,1304308000.0,7943000000,346183950.0,8740000000,424237875.0,58157000000


# Wisconsin Total Gasoline All Sales (1990-2018)

In [35]:
# Total Gasoline
wiGasURL = "https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=C100020551&f=A"

# This is where the data CSV lives locally:
wiGasFilepath = "data/wiFuelConsumption.csv"

print(f"{wiGasFilepath} is {round(os.path.getsize(wiGasFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{wiGasURL}")

# Read CSV into DataFrame
wiGasData = pd.read_csv(wiGasFilepath)

# Display DataFrame
wiGasData

data/wiFuelConsumption.csv is 0.0 megabytes (MB).
More info here:
https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=C100020551&f=A


Unnamed: 0,Year,WI Gas
0,1990,5816.9
1,1991,5850.4
2,1992,5917.1
3,1993,6012.0
4,1994,6135.2
5,1995,6329.4
6,1996,6478.1
7,1997,6596.4
8,1998,6768.0
9,1999,6713.5


In [36]:
# Multiply by 1,000 and 365.25 (CSV values are stated in thousand gallons per day)
wiGasData["WI Gas"] = (wiGasData["WI Gas"] * 1000) * 365.25

# Sort by year
wiGasData = wiGasData.sort_values("Year", ascending = False)

# Reset index in place
wiGasData.reset_index(inplace = True, drop = True)

# Display DataFrame
wiGasData

Unnamed: 0,Year,WI Gas
0,2018,2518508000.0
1,2017,2625417000.0
2,2016,2683346000.0
3,2015,2653395000.0
4,2014,2575633000.0
5,2013,2597001000.0
6,2012,2463392000.0
7,2011,2554084000.0
8,2010,2584801000.0
9,2009,2512847000.0


In [37]:
# Merge DataFrames
milesGas = milesGas.merge(wiGasData, how = "left", left_on = "Year", right_on = "Year", on = None, sort = False,
                          copy = True, indicator = False, validate = None)

# Display DataFrame
milesGas

Unnamed: 0,Year,MN VMT,MN Gas,IA VMT,U.S. Total,IA Gas,ND VMT,ND Gas,SD VMT,SD Gas,WI VMT,WI Gas
0,2018,60438313272,2271234000.0,33507000000,3240327000000,1140457000.0,9866000000,396844125.0,9736000000,453202200.0,65884000000,2518508000.0
1,2017,59970745402,2331281000.0,33751000000,3212347000000,1181657000.0,9703000000,402176775.0,9638000000,441222000.0,65324000000,2625417000.0
2,2016,58856547322,2407911000.0,33263000000,3174408000000,1198349000.0,9740000000,400058325.0,9508000000,427050300.0,63870000000,2683346000.0
3,2015,57795772499,2367989000.0,33109000000,3095373000000,1149113000.0,10079000000,430921950.0,9325000000,418065150.0,62140000000,2653395000.0
4,2014,57393220230,2289606000.0,32332000000,3025656000000,1186003000.0,10437000000,443669175.0,9226000000,411052350.0,60044000000,2575633000.0
5,2013,56993471770,2307029000.0,31542000000,2988280000000,1261610000.0,10100000000,435268425.0,9114000000,432163800.0,59484000000,2597001000.0
6,2012,57018582582,2266559000.0,31581000000,2968570000000,1300947000.0,10093000000,417152025.0,9077000000,413389950.0,59020000000,2463392000.0
7,2011,56675049785,2207060000.0,31411000000,2950402000000,1340687000.0,9166000000,391328850.0,8993000000,442646475.0,58554000000,2554084000.0
8,2010,56762480425,2218309000.0,31579000000,2967266000000,1387475000.0,8303000000,370400025.0,8861000000,439432275.0,59420000000,2584801000.0
9,2009,56970430780,2213890000.0,31295000000,2956764000000,1304308000.0,7943000000,346183950.0,8740000000,424237875.0,58157000000,2512847000.0


In [38]:
# Rearrange columns
milesGas = milesGas[["Year", "U.S. Total", "IA VMT", "IA Gas", "MN VMT", "MN Gas", "ND VMT", "ND Gas", "SD VMT", "SD Gas",
                     "WI VMT", "WI Gas"]]

# Display DataFrame
milesGas.head()

Unnamed: 0,Year,U.S. Total,IA VMT,IA Gas,MN VMT,MN Gas,ND VMT,ND Gas,SD VMT,SD Gas,WI VMT,WI Gas
0,2018,3240327000000,33507000000,1140457000.0,60438313272,2271234000.0,9866000000,396844125.0,9736000000,453202200.0,65884000000,2518508000.0
1,2017,3212347000000,33751000000,1181657000.0,59970745402,2331281000.0,9703000000,402176775.0,9638000000,441222000.0,65324000000,2625417000.0
2,2016,3174408000000,33263000000,1198349000.0,58856547322,2407911000.0,9740000000,400058325.0,9508000000,427050300.0,63870000000,2683346000.0
3,2015,3095373000000,33109000000,1149113000.0,57795772499,2367989000.0,10079000000,430921950.0,9325000000,418065150.0,62140000000,2653395000.0
4,2014,3025656000000,32332000000,1186003000.0,57393220230,2289606000.0,10437000000,443669175.0,9226000000,411052350.0,60044000000,2575633000.0


In [39]:
# Rename columns
milesGas = milesGas.rename(columns = {"U.S. Total":"USA VMT"})

# Display DataFrame
milesGas.head()

Unnamed: 0,Year,USA VMT,IA VMT,IA Gas,MN VMT,MN Gas,ND VMT,ND Gas,SD VMT,SD Gas,WI VMT,WI Gas
0,2018,3240327000000,33507000000,1140457000.0,60438313272,2271234000.0,9866000000,396844125.0,9736000000,453202200.0,65884000000,2518508000.0
1,2017,3212347000000,33751000000,1181657000.0,59970745402,2331281000.0,9703000000,402176775.0,9638000000,441222000.0,65324000000,2625417000.0
2,2016,3174408000000,33263000000,1198349000.0,58856547322,2407911000.0,9740000000,400058325.0,9508000000,427050300.0,63870000000,2683346000.0
3,2015,3095373000000,33109000000,1149113000.0,57795772499,2367989000.0,10079000000,430921950.0,9325000000,418065150.0,62140000000,2653395000.0
4,2014,3025656000000,32332000000,1186003000.0,57393220230,2289606000.0,10437000000,443669175.0,9226000000,411052350.0,60044000000,2575633000.0


# Median Household Incomes by State (1990-2018)

In [40]:
# Median household income by state
incomeURL = "https://www2.census.gov/programs-surveys/cps/tables/time-series/historical-income-households/h08.xls"

# This is where the data CSV lives locally:
incomeFilepath = "data/medianIncomes.csv"

print(f"{incomeFilepath} is {round(os.path.getsize(incomeFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{incomeURL}")

# Read CSV into DataFrame
medianIncomes = pd.read_csv(incomeFilepath)

# Display DataFrame
medianIncomes

data/medianIncomes.csv is 0.0 megabytes (MB).
More info here:
https://www2.census.gov/programs-surveys/cps/tables/time-series/historical-income-households/h08.xls


Unnamed: 0,Year,Iowa,Minnesota,North Dakota,South Dakota,Wisconsin
0,2018,68718,71817,66505,59463,62629
1,2017,63467,69975,60167,56914,63482
2,2016,59094,70218,60184,57450,59817
3,2015,60855,68730,57415,55065,55425
4,2014,57810,67244,60730,53053,58080
5,2013,54855,60907,52888,54453,55258
6,2012,53442,61795,55766,49415,53079
7,2011,50219,57820,56361,47223,52058
8,2010,49016,52321,51006,45352,50351
9,2009,50721,56090,50075,45826,51237


In [41]:
# Rename columns
medianIncomes = medianIncomes.rename(columns = {"Iowa":"IA Income", "Minnesota":"MN Income", "North Dakota":"ND Income",
                                                "South Dakota":"SD Income", "Wisconsin":"WI Income"})

# Display DataFrame
medianIncomes.head()

Unnamed: 0,Year,IA Income,MN Income,ND Income,SD Income,WI Income
0,2018,68718,71817,66505,59463,62629
1,2017,63467,69975,60167,56914,63482
2,2016,59094,70218,60184,57450,59817
3,2015,60855,68730,57415,55065,55425
4,2014,57810,67244,60730,53053,58080


# Estimated Population by State (1990-2018)

In [42]:
# Estimated populations by state
popURL = "https://www.census.gov/data/tables/time-series/demo/popest/2010s-state-total.html"

# This is where the data CSV lives locally:
popFilepath = "data/statePopulations.csv"

print(f"{popFilepath} is {round(os.path.getsize(popFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{popURL}")

# Read CSV into DataFrame
statePops = pd.read_csv(popFilepath)

# Display DataFrame
statePops

data/statePopulations.csv is 0.0 megabytes (MB).
More info here:
https://www.census.gov/data/tables/time-series/demo/popest/2010s-state-total.html


Unnamed: 0,Year,.Iowa,.Minnesota,.North Dakota,.South Dakota,.Wisconsin
0,1990,2776755,4387283,637364,696667,4902265
1,1991,2791227,4427429,634199,701445,4952675
2,1992,2806923,4471503,635427,708698,5004636
3,1993,2820525,4521709,637229,716258,5055318
4,1994,2829422,4566028,639762,723038,5095504
5,1995,2840860,4605445,641548,728251,5137004
6,1996,2848473,4647723,642858,730699,5173828
7,1997,2854396,4687726,640945,730855,5200235
8,1998,2861025,4726411,637808,730789,5222124
9,1999,2869413,4775508,633666,733133,5250446


In [43]:
# Drop unwanted characters from strings in column names
statePops.columns = statePops.columns.str.replace(".", "")

# Display DataFrame
statePops

Unnamed: 0,Year,Iowa,Minnesota,North Dakota,South Dakota,Wisconsin
0,1990,2776755,4387283,637364,696667,4902265
1,1991,2791227,4427429,634199,701445,4952675
2,1992,2806923,4471503,635427,708698,5004636
3,1993,2820525,4521709,637229,716258,5055318
4,1994,2829422,4566028,639762,723038,5095504
5,1995,2840860,4605445,641548,728251,5137004
6,1996,2848473,4647723,642858,730699,5173828
7,1997,2854396,4687726,640945,730855,5200235
8,1998,2861025,4726411,637808,730789,5222124
9,1999,2869413,4775508,633666,733133,5250446


# State Landmass in Square Miles

In [44]:
# State landmasses
areaURL = "https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_area"

# This is where the data CSV lives locally:
areaFilepath = "data/stateArea.csv"

print(f"{areaFilepath} is {round(os.path.getsize(areaFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{areaURL}")

# Read CSV into DataFrame
area = pd.read_csv(areaFilepath)

# Display DataFrame
area

data/stateArea.csv is 0.0 megabytes (MB).
More info here:
https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_area


Unnamed: 0,Year,Iowa,Minnesota,North Dakota,South Dakota,Wisconsin
0,2018,56272.81,86935.83,70698.32,77115.68,65496.38
1,2017,56272.81,86935.83,70698.32,77115.68,65496.38
2,2016,56272.81,86935.83,70698.32,77115.68,65496.38
3,2015,56272.81,86935.83,70698.32,77115.68,65496.38
4,2014,56272.81,86935.83,70698.32,77115.68,65496.38
5,2013,56272.81,86935.83,70698.32,77115.68,65496.38
6,2012,56272.81,86935.83,70698.32,77115.68,65496.38
7,2011,56272.81,86935.83,70698.32,77115.68,65496.38
8,2010,56272.81,86935.83,70698.32,77115.68,65496.38
9,2009,56272.81,86935.83,70698.32,77115.68,65496.38


In [45]:
# Merge state population and state landmass DataFrames
stateCensus = pd.merge(statePops, area, how = "left", left_on = "Year", right_on = "Year", on = None, sort = False,
                       copy = True, indicator = False, validate = None)

# Display DataFrame
stateCensus

Unnamed: 0,Year,Iowa_x,Minnesota_x,North Dakota_x,South Dakota_x,Wisconsin_x,Iowa_y,Minnesota_y,North Dakota_y,South Dakota_y,Wisconsin_y
0,1990,2776755,4387283,637364,696667,4902265,56272.81,86935.83,70698.32,77115.68,65496.38
1,1991,2791227,4427429,634199,701445,4952675,56272.81,86935.83,70698.32,77115.68,65496.38
2,1992,2806923,4471503,635427,708698,5004636,56272.81,86935.83,70698.32,77115.68,65496.38
3,1993,2820525,4521709,637229,716258,5055318,56272.81,86935.83,70698.32,77115.68,65496.38
4,1994,2829422,4566028,639762,723038,5095504,56272.81,86935.83,70698.32,77115.68,65496.38
5,1995,2840860,4605445,641548,728251,5137004,56272.81,86935.83,70698.32,77115.68,65496.38
6,1996,2848473,4647723,642858,730699,5173828,56272.81,86935.83,70698.32,77115.68,65496.38
7,1997,2854396,4687726,640945,730855,5200235,56272.81,86935.83,70698.32,77115.68,65496.38
8,1998,2861025,4726411,637808,730789,5222124,56272.81,86935.83,70698.32,77115.68,65496.38
9,1999,2869413,4775508,633666,733133,5250446,56272.81,86935.83,70698.32,77115.68,65496.38


In [46]:
# Rename columns
stateCensus = stateCensus.rename(columns = {"Iowa_x":"IA Pop", "Minnesota_x":"MN Pop", "North Dakota_x":"ND Pop",
                                            "South Dakota_x":"SD Pop", "Wisconsin_x":"WI Pop", "Iowa_y":"IA Area",
                                            "Minnesota_y":"MN Area", "North Dakota_y":"ND Area",
                                            "South Dakota_y":"SD Area", "Wisconsin_y":"WI Area"})

# Display DataFrame
stateCensus

Unnamed: 0,Year,IA Pop,MN Pop,ND Pop,SD Pop,WI Pop,IA Area,MN Area,ND Area,SD Area,WI Area
0,1990,2776755,4387283,637364,696667,4902265,56272.81,86935.83,70698.32,77115.68,65496.38
1,1991,2791227,4427429,634199,701445,4952675,56272.81,86935.83,70698.32,77115.68,65496.38
2,1992,2806923,4471503,635427,708698,5004636,56272.81,86935.83,70698.32,77115.68,65496.38
3,1993,2820525,4521709,637229,716258,5055318,56272.81,86935.83,70698.32,77115.68,65496.38
4,1994,2829422,4566028,639762,723038,5095504,56272.81,86935.83,70698.32,77115.68,65496.38
5,1995,2840860,4605445,641548,728251,5137004,56272.81,86935.83,70698.32,77115.68,65496.38
6,1996,2848473,4647723,642858,730699,5173828,56272.81,86935.83,70698.32,77115.68,65496.38
7,1997,2854396,4687726,640945,730855,5200235,56272.81,86935.83,70698.32,77115.68,65496.38
8,1998,2861025,4726411,637808,730789,5222124,56272.81,86935.83,70698.32,77115.68,65496.38
9,1999,2869413,4775508,633666,733133,5250446,56272.81,86935.83,70698.32,77115.68,65496.38


In [47]:
# Calculate population densities
iowa = stateCensus["IA Pop"] / stateCensus["IA Area"]
minnesota = stateCensus["MN Pop"] / stateCensus["MN Area"]
northdakota = stateCensus["ND Pop"] / stateCensus["ND Area"]
southdakota = stateCensus["SD Pop"] / stateCensus["SD Area"]
wisconsin = stateCensus["WI Pop"] / stateCensus["WI Area"]

# Add new columns with population densities
stateCensus["IA Dens"] = iowa
stateCensus["MN Dens"] = minnesota
stateCensus["ND Dens"] = northdakota
stateCensus["SD Dens"] = southdakota
stateCensus["WI Dens"] = wisconsin

# Display DataFrame
stateCensus

Unnamed: 0,Year,IA Pop,MN Pop,ND Pop,SD Pop,WI Pop,IA Area,MN Area,ND Area,SD Area,WI Area,IA Dens,MN Dens,ND Dens,SD Dens,WI Dens
0,1990,2776755,4387283,637364,696667,4902265,56272.81,86935.83,70698.32,77115.68,65496.38,49.344524,50.465763,9.015264,9.034051,74.847877
1,1991,2791227,4427429,634199,701445,4952675,56272.81,86935.83,70698.32,77115.68,65496.38,49.601699,50.927552,8.970496,9.09601,75.617538
2,1992,2806923,4471503,635427,708698,5004636,56272.81,86935.83,70698.32,77115.68,65496.38,49.880626,51.434524,8.987866,9.190064,76.41088
3,1993,2820525,4521709,637229,716258,5055318,56272.81,86935.83,70698.32,77115.68,65496.38,50.122342,52.01203,9.013354,9.288098,77.184693
4,1994,2829422,4566028,639762,723038,5095504,56272.81,86935.83,70698.32,77115.68,65496.38,50.280446,52.52182,9.049182,9.376018,77.798254
5,1995,2840860,4605445,641548,728251,5137004,56272.81,86935.83,70698.32,77115.68,65496.38,50.483706,52.975223,9.074445,9.443618,78.431877
6,1996,2848473,4647723,642858,730699,5173828,56272.81,86935.83,70698.32,77115.68,65496.38,50.618993,53.461536,9.092974,9.475362,78.994106
7,1997,2854396,4687726,640945,730855,5200235,56272.81,86935.83,70698.32,77115.68,65496.38,50.724249,53.92168,9.065916,9.477385,79.397289
8,1998,2861025,4726411,637808,730789,5222124,56272.81,86935.83,70698.32,77115.68,65496.38,50.84205,54.366663,9.021544,9.476529,79.731491
9,1999,2869413,4775508,633666,733133,5250446,56272.81,86935.83,70698.32,77115.68,65496.38,50.991109,54.931413,8.962957,9.506925,80.163911


In [48]:
# Rearrange columns
stateCensus = stateCensus[["Year", "IA Pop", "IA Area", "IA Dens", "MN Pop", "MN Area", "MN Dens", "ND Pop", "ND Area",
                           "ND Dens", "SD Pop", "SD Area", "SD Dens", "WI Pop", "WI Area", "WI Dens"]]

# Display DataFrame
stateCensus

Unnamed: 0,Year,IA Pop,IA Area,IA Dens,MN Pop,MN Area,MN Dens,ND Pop,ND Area,ND Dens,SD Pop,SD Area,SD Dens,WI Pop,WI Area,WI Dens
0,1990,2776755,56272.81,49.344524,4387283,86935.83,50.465763,637364,70698.32,9.015264,696667,77115.68,9.034051,4902265,65496.38,74.847877
1,1991,2791227,56272.81,49.601699,4427429,86935.83,50.927552,634199,70698.32,8.970496,701445,77115.68,9.09601,4952675,65496.38,75.617538
2,1992,2806923,56272.81,49.880626,4471503,86935.83,51.434524,635427,70698.32,8.987866,708698,77115.68,9.190064,5004636,65496.38,76.41088
3,1993,2820525,56272.81,50.122342,4521709,86935.83,52.01203,637229,70698.32,9.013354,716258,77115.68,9.288098,5055318,65496.38,77.184693
4,1994,2829422,56272.81,50.280446,4566028,86935.83,52.52182,639762,70698.32,9.049182,723038,77115.68,9.376018,5095504,65496.38,77.798254
5,1995,2840860,56272.81,50.483706,4605445,86935.83,52.975223,641548,70698.32,9.074445,728251,77115.68,9.443618,5137004,65496.38,78.431877
6,1996,2848473,56272.81,50.618993,4647723,86935.83,53.461536,642858,70698.32,9.092974,730699,77115.68,9.475362,5173828,65496.38,78.994106
7,1997,2854396,56272.81,50.724249,4687726,86935.83,53.92168,640945,70698.32,9.065916,730855,77115.68,9.477385,5200235,65496.38,79.397289
8,1998,2861025,56272.81,50.84205,4726411,86935.83,54.366663,637808,70698.32,9.021544,730789,77115.68,9.476529,5222124,65496.38,79.731491
9,1999,2869413,56272.81,50.991109,4775508,86935.83,54.931413,633666,70698.32,8.962957,733133,77115.68,9.506925,5250446,65496.38,80.163911


### *Merge Mileage/Gas DataFrame with Census DataFrame*

In [49]:
# Merge DataFrames
DataFrame = milesGas.merge(stateCensus, how = "left", left_on = "Year", right_on = "Year", on = None, sort = False,
                           copy = True, indicator = False, validate = None)

# Display DataFrame
DataFrame

Unnamed: 0,Year,USA VMT,IA VMT,IA Gas,MN VMT,MN Gas,ND VMT,ND Gas,SD VMT,SD Gas,...,MN Dens,ND Pop,ND Area,ND Dens,SD Pop,SD Area,SD Dens,WI Pop,WI Area,WI Dens
0,2018,3240327000000,33507000000,1140457000.0,60438313272,2271234000.0,9866000000,396844125.0,9736000000,453202200.0,...,64.487209,758080,70698.32,10.722744,878698,77115.68,11.394544,5807406,65496.38,88.667587
1,2017,3212347000000,33751000000,1181657000.0,59970745402,2331281000.0,9703000000,402176775.0,9638000000,441222000.0,...,64.026881,754942,70698.32,10.678358,872868,77115.68,11.318943,5790186,65496.38,88.404672
2,2016,3174408000000,33263000000,1198349000.0,58856547322,2407911000.0,9740000000,400058325.0,9508000000,427050300.0,...,63.526672,754434,70698.32,10.671173,862996,77115.68,11.190928,5772628,65496.38,88.136596
3,2015,3095373000000,33109000000,1149113000.0,57795772499,2367989000.0,10079000000,430921950.0,9325000000,418065150.0,...,63.058373,754066,70698.32,10.665968,853988,77115.68,11.074116,5760940,65496.38,87.958144
4,2014,3025656000000,32332000000,1186003000.0,57393220230,2289606000.0,10437000000,443669175.0,9226000000,411052350.0,...,62.702329,737401,70698.32,10.430248,849129,77115.68,11.011107,5751525,65496.38,87.814395
5,2013,2988280000000,31542000000,1261610000.0,56993471770,2307029000.0,10100000000,435268425.0,9114000000,432163800.0,...,62.269826,722036,70698.32,10.212916,842316,77115.68,10.922759,5736754,65496.38,87.588871
6,2012,2968570000000,31581000000,1300947000.0,57018582582,2266559000.0,10093000000,417152025.0,9077000000,413389950.0,...,61.846111,701176,70698.32,9.917859,833566,77115.68,10.809293,5719960,65496.38,87.33246
7,2011,2950402000000,31411000000,1340687000.0,56675049785,2207060000.0,9166000000,391328850.0,8993000000,442646475.0,...,61.495278,685225,70698.32,9.692239,823579,77115.68,10.679787,5705288,65496.38,87.108448
8,2010,2967266000000,31579000000,1387475000.0,56762480425,2218309000.0,8303000000,370400025.0,8861000000,439432275.0,...,61.089058,674715,70698.32,9.543579,816166,77115.68,10.583658,5690475,65496.38,86.882283
9,2009,2956764000000,31295000000,1304308000.0,56970430780,2213890000.0,7943000000,346183950.0,8740000000,424237875.0,...,60.74829,664968,70698.32,9.405711,810814,77115.68,10.514256,5669264,65496.38,86.558433


In [50]:
# Print column names
for y in DataFrame.columns:
    print(y)

Year
USA VMT
IA VMT
IA Gas
MN VMT
MN Gas
ND VMT
ND Gas
SD VMT
SD Gas
WI VMT
WI Gas
IA Pop
IA Area
IA Dens
MN Pop
MN Area
MN Dens
ND Pop
ND Area
ND Dens
SD Pop
SD Area
SD Dens
WI Pop
WI Area
WI Dens


In [51]:
# Rename columns
DataFrame = DataFrame.rename(columns = {"USA VMT":"usaVMT", "IA VMT":"iaVMT", "IA Gas":"iaGAS", "MN VMT":"mnVMT",
                                        "MN Gas":"mnGAS", "ND VMT":"ndVMT", "ND Gas":"ndGAS", "SD VMT":"sdVMT",
                                        "SD Gas":"sdGAS", "WI VMT":"wiVMT", "WI Gas":"wiGAS", "IA Pop":"iaPOP",
                                        "IA Area":"iaAREA", "IA Dens":"iaDENS", "MN Pop":"mnPOP", "MN Area":"mnAREA",
                                        "MN Dens":"mnDENS", "ND Pop":"ndPOP", "ND Area":"ndAREA", "ND Dens":"ndDENS",
                                        "SD Pop":"sdPOP", "SD Area":"sdAREA", "SD Dens":"sdDENS", "WI Pop":"wiPOP",
                                        "WI Area":"wiAREA", "WI Dens":"wiDENS"})

# Display DataFrame
DataFrame.head()

Unnamed: 0,Year,usaVMT,iaVMT,iaGAS,mnVMT,mnGAS,ndVMT,ndGAS,sdVMT,sdGAS,...,mnDENS,ndPOP,ndAREA,ndDENS,sdPOP,sdAREA,sdDENS,wiPOP,wiAREA,wiDENS
0,2018,3240327000000,33507000000,1140457000.0,60438313272,2271234000.0,9866000000,396844125.0,9736000000,453202200.0,...,64.487209,758080,70698.32,10.722744,878698,77115.68,11.394544,5807406,65496.38,88.667587
1,2017,3212347000000,33751000000,1181657000.0,59970745402,2331281000.0,9703000000,402176775.0,9638000000,441222000.0,...,64.026881,754942,70698.32,10.678358,872868,77115.68,11.318943,5790186,65496.38,88.404672
2,2016,3174408000000,33263000000,1198349000.0,58856547322,2407911000.0,9740000000,400058325.0,9508000000,427050300.0,...,63.526672,754434,70698.32,10.671173,862996,77115.68,11.190928,5772628,65496.38,88.136596
3,2015,3095373000000,33109000000,1149113000.0,57795772499,2367989000.0,10079000000,430921950.0,9325000000,418065150.0,...,63.058373,754066,70698.32,10.665968,853988,77115.68,11.074116,5760940,65496.38,87.958144
4,2014,3025656000000,32332000000,1186003000.0,57393220230,2289606000.0,10437000000,443669175.0,9226000000,411052350.0,...,62.702329,737401,70698.32,10.430248,849129,77115.68,11.011107,5751525,65496.38,87.814395


In [52]:
# Print column names
for z in DataFrame.columns:
    print(z)

Year
usaVMT
iaVMT
iaGAS
mnVMT
mnGAS
ndVMT
ndGAS
sdVMT
sdGAS
wiVMT
wiGAS
iaPOP
iaAREA
iaDENS
mnPOP
mnAREA
mnDENS
ndPOP
ndAREA
ndDENS
sdPOP
sdAREA
sdDENS
wiPOP
wiAREA
wiDENS


In [53]:
# Nick's Code Begins Here

In [54]:
# import data on state highs and lows (pre-cleaned csv)
temp_df = pd.read_csv('data/state_temp_highs_and_lows.csv')
temp_df.head()

Unnamed: 0,Date,mnTempHigh,mnTempLow,iaTempHigh,iaTempLow,wiTempHigh,wiTempLow,ndTempHigh,ndTempLow,sdTempHigh,sdTempLow
0,2000,52.4,31.2,59.3,38.0,54.2,33.5,52.6,29.5,58.4,33.5
1,2001,53.4,32.7,59.3,38.8,55.6,35.0,53.7,30.8,58.4,34.6
2,2002,52.1,32.0,59.9,38.4,54.5,34.7,52.3,29.7,58.6,33.8
3,2003,52.4,30.7,58.9,36.9,54.0,32.4,52.5,29.0,58.4,33.7
4,2004,51.7,30.9,58.9,38.1,53.8,33.2,52.0,29.1,58.6,34.0


In [55]:
# merge data to main DataFrame
DataFrame = pd.merge(DataFrame, temp_df, how = 'left', left_on=['Year'], right_on=['Date'])

In [56]:
DataFrame = DataFrame.drop(columns = ['Date'])

In [57]:
# import data on commute information for each state
commute_df = pd.read_csv('data/Commute_mode_2000__to_2019_data.csv')

In [58]:
# filter down df to only midwest states
midwest_commute_df = commute_df.loc[(commute_df['State'] == 'Minnesota') | 
                                   (commute_df['State'] == 'Wisconsin') |
                                   (commute_df['State'] == 'North Dakota') |
                                   (commute_df['State'] == 'South Dakota') |
                                   (commute_df['State'] == 'Iowa')]

# filter down df to only 2000 to 2018
midwest_commute_df = midwest_commute_df.loc[(midwest_commute_df['Year'] <= 2018) &
                                            (midwest_commute_df['Year'] >= 2000)]
midwest_commute_df

Unnamed: 0,State,Mode,Year,Commute mode share (percent)
15,Iowa,Carpool,2000,0.108000
23,Minnesota,Carpool,2000,0.104000
34,North Dakota,Carpool,2000,0.100000
41,South Dakota,Carpool,2000,0.140000
50,Wisconsin,Carpool,2000,0.127000
...,...,...,...,...
3915,Iowa,Worked at home,2018,0.051177
3923,Minnesota,Worked at home,2018,0.060798
3934,North Dakota,Worked at home,2018,0.037365
3941,South Dakota,Worked at home,2018,0.049919


In [59]:
#convert state to abbreviated version
midwest_commute_df = midwest_commute_df.replace(['Minnesota', 'Iowa', 'North Dakota'
                                    ,'South Dakota', 'Wisconsin'], ['mn', 'ia'
                                    ,'nd','sd','wi'])

In [60]:
# create future column names for DataFrame df (called stateMode)
midwest_commute_df['stateMode'] = midwest_commute_df['State'] + midwest_commute_df['Mode']

# remove spaces from stateMode values
midwest_commute_df['stateMode'] = midwest_commute_df['stateMode'].str.replace(" ","")

# remove commas from statemode values
midwest_commute_df['stateMode'] = midwest_commute_df['stateMode'].str.replace(",","")

In [61]:
midwest_commute_df_formated = midwest_commute_df.pivot(index='Year', columns='stateMode', values='Commute mode share (percent)')
midwest_commute_df_formated

stateMode,iaBicycle,iaCarpool,iaDrovealone,iaPublictransportation,iaTaximotorcycleorother,iaWalked,iaWorkedathome,mnBicycle,mnCarpool,mnDrovealone,...,sdTaximotorcycleorother,sdWalked,sdWorkedathome,wiBicycle,wiCarpool,wiDrovealone,wiPublictransportation,wiTaximotorcycleorother,wiWalked,wiWorkedathome
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000,,0.108,0.786,0.01,0.009,0.04,0.047,,0.104,0.776,...,0.013,0.023,0.021,,0.127,0.803,0.008,0.009,0.029,0.024
2005,0.005431,0.100735,0.800977,0.007862,0.010169,0.03158,0.043246,0.005303,0.094981,0.789822,...,0.007195,0.040016,0.060855,0.006358,0.090278,0.811286,0.014979,0.009523,0.029258,0.038318
2010,0.00522,0.09702,0.79326,0.010999,0.009276,0.035891,0.048334,0.006517,0.085386,0.781922,...,0.012354,0.04384,0.052067,0.007523,0.088048,0.805139,0.01669,0.009324,0.031448,0.041827
2011,0.004708,0.0891,0.805276,0.010904,0.008858,0.035042,0.046113,0.0071,0.093057,0.777315,...,0.008492,0.042675,0.055357,0.006883,0.089087,0.798362,0.01963,0.008866,0.034014,0.043158
2012,0.005099,0.090446,0.798969,0.010867,0.011219,0.036396,0.047004,0.008136,0.089489,0.780284,...,0.010916,0.039457,0.057693,0.008734,0.089374,0.802066,0.017995,0.010028,0.032734,0.03907
2013,0.005526,0.085967,0.808612,0.010974,0.01013,0.037175,0.041616,0.007893,0.085778,0.783857,...,0.009883,0.041651,0.05241,0.008319,0.082066,0.805216,0.019802,0.008554,0.035409,0.040634
2014,0.004888,0.086139,0.808054,0.010435,0.009282,0.033641,0.04756,0.008867,0.088149,0.776997,...,0.009275,0.042341,0.055342,0.007928,0.081654,0.808791,0.018979,0.009663,0.032232,0.040752
2015,0.005778,0.086244,0.809587,0.013746,0.00953,0.034317,0.040797,0.008634,0.083212,0.781317,...,0.007861,0.031211,0.057953,0.008183,0.078866,0.812343,0.019135,0.009002,0.031603,0.040868
2016,0.004651,0.081657,0.81246,0.010776,0.008813,0.034385,0.047258,0.00704,0.085877,0.778296,...,0.009843,0.038315,0.055273,0.007031,0.082198,0.80692,0.017426,0.008973,0.030303,0.047148
2017,0.00509,0.079951,0.814114,0.008555,0.009203,0.031941,0.051146,0.007084,0.081501,0.780208,...,0.006978,0.033904,0.058456,0.006296,0.076339,0.811193,0.017505,0.009206,0.029962,0.049499


In [62]:
# merge data to main DataFrame
DataFrame = pd.merge(DataFrame, midwest_commute_df_formated, how = 'left', on=['Year'])

In [63]:
# import data on laws and incentives for each state (pre-cleaned)
laws_df = pd.read_csv('data/laws_and_incentives_cleaned.csv')

In [64]:
DataFrame = pd.merge(DataFrame, laws_df, how = 'left', on=['Year'])

In [65]:
print(DataFrame.columns)

Index(['Year', 'usaVMT', 'iaVMT', 'iaGAS', 'mnVMT', 'mnGAS', 'ndVMT', 'ndGAS',
       'sdVMT', 'sdGAS', 'wiVMT', 'wiGAS', 'iaPOP', 'iaAREA', 'iaDENS',
       'mnPOP', 'mnAREA', 'mnDENS', 'ndPOP', 'ndAREA', 'ndDENS', 'sdPOP',
       'sdAREA', 'sdDENS', 'wiPOP', 'wiAREA', 'wiDENS', 'mnTempHigh',
       'mnTempLow', 'iaTempHigh', 'iaTempLow', 'wiTempHigh', 'wiTempLow',
       'ndTempHigh', 'ndTempLow', 'sdTempHigh', 'sdTempLow', 'iaBicycle',
       'iaCarpool', 'iaDrovealone', 'iaPublictransportation',
       'iaTaximotorcycleorother', 'iaWalked', 'iaWorkedathome', 'mnBicycle',
       'mnCarpool', 'mnDrovealone', 'mnPublictransportation',
       'mnTaximotorcycleorother', 'mnWalked', 'mnWorkedathome', 'ndBicycle',
       'ndCarpool', 'ndDrovealone', 'ndPublictransportation',
       'ndTaximotorcycleorother', 'ndWalked', 'ndWorkedathome', 'sdBicycle',
       'sdCarpool', 'sdDrovealone', 'sdPublictransportation',
       'sdTaximotorcycleorother', 'sdWalked', 'sdWorkedathome', 'wiBicycle',
 

In [66]:
# Nick's Code Ends Here

In [67]:
# Export to CSV
DataFrame.to_csv("resources/annualData.csv", index = False, header = True)

# GHG Emissions by State, measured in CO2 equivalents (1990-2018)

In [68]:
# Carbon emissions
carbonURL = "https://www.wri.org/data/climate-watch-historical-emissions-data-countries-us-states-unfccc"

# This is where the data CSV lives locally:
carbonFilepath = "data/climatewatch-usemissions.csv"

print(f"{carbonFilepath} is {round(os.path.getsize(carbonFilepath)/1024/1024, 2)} megabytes (MB).\nMore info here:\n{carbonURL}")

# Read CSV into DataFrame
carbon = pd.read_csv(carbonFilepath)

# Display DataFrame
carbon

data/climatewatch-usemissions.csv is 0.35 megabytes (MB).
More info here:
https://www.wri.org/data/climate-watch-historical-emissions-data-countries-us-states-unfccc


Unnamed: 0,State,Year,Total GHG Emissions Excluding LUCF (MtCO2e),Total GHG Emissions Including LUCF (MtCO2e),Total CO2 (excluding LUCF) (MtCO2e),Total CH4 (MtCO2e),Total N2O (MtCO2e),Total F-Gas (MtCO2e),Energy (MtCO2e),Industrial Processes (MtCO2e),...,Bunker Fuels (MtCO2e),Electric Power (MtCO2e),Commercial (MtCO2e),Residential (MtCO2e),Industrial (MtCO2e),Transportation (MtCO2e),Fugitive Emissions (MtCO2e),State GDP (Million US$ (chained 1997/2005)),Population (People),Total Energy Use (Thous. tonnes oil eq. (ktoe))
0,Alabama,1990,142.417267,81.645894,112.157128,23.710782,6.034132,0.515225,127.881865,3.442879,...,0.000000,50.580225,2.455356,3.233194,25.450395,29.582733,16.579961,83766.0,4050055,4.197098e+04
1,Alaska,1990,47.394637,44.956391,35.893979,11.185360,0.278483,0.036815,45.867810,1.093494,...,0.000000,2.612978,2.210446,1.612772,15.851075,12.300752,11.279787,28772.0,553290,1.471864e+04
2,Arizona,1990,71.687896,68.048443,63.291388,4.090110,3.480351,0.826048,64.545882,1.155930,...,0.000000,32.680547,1.915212,1.901865,3.887329,24.064867,0.096061,81606.0,3684097,2.366177e+04
3,Arkansas,1990,71.973584,24.753434,52.211375,9.563331,9.963139,0.235740,53.107620,1.151356,...,0.000000,21.697883,1.630468,2.559084,9.488513,16.882259,0.849414,44496.0,2356586,2.156930e+04
4,California,1990,430.660671,363.225554,368.717790,35.785491,23.600369,2.557020,379.550134,7.731291,...,0.000000,40.343922,19.101576,30.189209,71.881356,212.571891,5.462181,906103.0,29959515,1.876164e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1503,Washington,2018,97.296446,65.342674,82.046289,6.185934,4.346750,4.717473,81.620211,5.939311,...,0.027622,10.061289,4.766244,5.857520,13.327250,47.607909,0.000000,524486.9,7523869,2.078665e+06
1504,West Virginia,2018,115.714795,100.078012,89.895261,23.579975,1.029359,1.210201,109.725451,3.371314,...,0.000000,59.915487,1.828314,1.872126,11.468839,13.154700,21.485985,71858.7,1804291,8.329140e+05
1505,Wisconsin,2018,128.884615,109.999975,101.752437,12.627558,11.445894,3.058726,101.452129,4.234074,...,0.000000,39.238932,6.683348,10.194157,14.078687,31.257005,0.000000,303767.4,5807406,1.885868e+06
1506,Wyoming,2018,88.995627,100.170035,66.742306,18.373199,3.565572,0.314551,78.844546,3.421720,...,0.000000,39.958798,0.969447,0.990317,13.544744,8.110488,15.270751,38696.3,577601,5.585940e+05


In [69]:
# Create new DataFrame filtered on desired states
emissions = carbon[(carbon.State == "Iowa") |
                   (carbon.State == "Minnesota") |
                   (carbon.State == "North Dakota") |
                   (carbon.State == "South Dakota") |
                   (carbon.State == "Wisconsin")]

# Reset index in place
emissions.reset_index(inplace = True, drop = True)

# Display DataFrame
emissions

Unnamed: 0,State,Year,Total GHG Emissions Excluding LUCF (MtCO2e),Total GHG Emissions Including LUCF (MtCO2e),Total CO2 (excluding LUCF) (MtCO2e),Total CH4 (MtCO2e),Total N2O (MtCO2e),Total F-Gas (MtCO2e),Energy (MtCO2e),Industrial Processes (MtCO2e),...,Bunker Fuels (MtCO2e),Electric Power (MtCO2e),Commercial (MtCO2e),Residential (MtCO2e),Industrial (MtCO2e),Transportation (MtCO2e),Fugitive Emissions (MtCO2e),State GDP (Million US$ (chained 1997/2005)),Population (People),Total Energy Use (Thous. tonnes oil eq. (ktoe))
0,Iowa,1990,110.915841,102.566194,65.620191,14.514050,30.527785,0.253815,65.026922,1.918627,...,0.000000,26.781547,3.208394,5.104766,12.801368,17.116484,0.014363,63460.0,2781018,2.383555e+04
1,Minnesota,1990,109.839495,101.088556,80.162016,8.782568,20.471405,0.423506,81.435380,0.498996,...,0.000000,29.753372,5.712355,8.226778,12.597242,25.145633,0.000000,120078.0,4389857,3.530511e+04
2,North Dakota,1990,53.127887,47.526957,40.856044,4.398852,7.730953,0.142038,42.146760,0.217319,...,0.000000,28.068425,0.855737,1.138863,6.106187,4.844608,1.132939,13161.0,637685,7.874918e+03
3,South Dakota,1990,28.386372,24.913264,12.130215,6.999835,9.201620,0.054702,12.619985,0.076289,...,0.000000,3.112679,0.692023,1.383120,2.124677,4.916453,0.391034,14696.0,697101,5.476271e+03
4,Wisconsin,1990,113.305703,94.052375,86.657388,14.027264,12.193197,0.427854,88.186750,0.814344,...,0.000000,33.383448,4.851392,9.621040,14.522145,25.808726,0.000000,115638.0,4904562,3.741558e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140,Iowa,2018,136.704605,127.527828,85.857356,17.355389,31.819182,1.672678,82.959792,4.996532,...,0.000000,28.897697,4.283267,5.512929,23.457281,20.808618,0.000000,172844.7,3148618,1.616101e+06
141,Minnesota,2018,127.398881,117.619766,94.019543,9.814089,20.576959,2.988290,93.166898,3.514935,...,0.000017,26.341183,7.372015,10.065932,17.775893,31.611875,0.000000,337215.9,5606249,1.913919e+06
142,North Dakota,2018,94.269542,87.732223,61.427017,15.273381,17.157851,0.411293,72.335296,0.849768,...,0.000000,31.305894,1.184410,1.184930,11.426276,9.480680,17.753106,53472.8,758080,6.609590e+05
143,South Dakota,2018,42.687138,38.637315,16.160010,9.283869,16.778220,0.465039,15.621569,1.142578,...,0.000000,2.841137,0.871226,1.175242,3.842051,6.856952,0.034960,47287.0,878698,3.968370e+05


In [70]:
# Print column names
for a in emissions.columns:
    print(a)

State
Year
Total GHG Emissions Excluding LUCF (MtCO2e)
Total GHG Emissions Including LUCF (MtCO2e)
Total CO2 (excluding LUCF) (MtCO2e)
Total CH4 (MtCO2e)
Total N2O (MtCO2e)
Total F-Gas (MtCO2e)
Energy (MtCO2e)
Industrial Processes (MtCO2e)
Agriculture (MtCO2e)
Waste (MtCO2e)
Land Use and Forestry (MtCO2e)
Bunker Fuels (MtCO2e)
Electric Power (MtCO2e)
Commercial (MtCO2e)
Residential (MtCO2e)
Industrial (MtCO2e)
Transportation (MtCO2e)
Fugitive Emissions (MtCO2e)
State GDP (Million US$ (chained 1997/2005))
Population (People)
Total Energy Use (Thous. tonnes oil eq. (ktoe))


In [71]:
# Create new DataFrame with only desired columns
carbonEmissions = emissions[["Year", "State", "Transportation (MtCO2e)"]].copy()

# Display DataFrame
carbonEmissions.head()

Unnamed: 0,Year,State,Transportation (MtCO2e)
0,1990,Iowa,17.116484
1,1990,Minnesota,25.145633
2,1990,North Dakota,4.844608
3,1990,South Dakota,4.916453
4,1990,Wisconsin,25.808726


In [72]:
# Rename column
carbonEmissions = carbonEmissions.rename(columns = {"Transportation (MtCO2e)":"Emissions"})

# Display DataFrame
carbonEmissions.head()

Unnamed: 0,Year,State,Emissions
0,1990,Iowa,17.116484
1,1990,Minnesota,25.145633
2,1990,North Dakota,4.844608
3,1990,South Dakota,4.916453
4,1990,Wisconsin,25.808726


In [73]:
# Use pandas pivot function to shift annual values onto single row
carbonEmissions = carbonEmissions.pivot(index = "Year", columns = "State", values = "Emissions")

# Display DataFrame
carbonEmissions

State,Iowa,Minnesota,North Dakota,South Dakota,Wisconsin
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990,17.116484,25.145633,4.844608,4.916453,25.808726
1991,16.564712,26.3108,4.913473,4.687325,25.71648
1992,16.431845,27.720538,5.14365,5.288593,26.368581
1993,17.064168,29.233824,5.402739,5.384092,27.180918
1994,18.053886,30.217382,5.357596,5.72423,28.721214
1995,18.724337,31.288081,5.362954,5.841534,29.297354
1996,20.019862,32.2763,5.496267,5.838887,29.752413
1997,19.550619,31.868159,5.582767,5.688363,29.899735
1998,20.188845,33.368612,5.017909,5.854561,31.497489
1999,19.991046,35.429096,5.922174,5.997861,32.150269


In [74]:
# Convert index to its own column
carbonEmissions["Year"] = carbonEmissions.index

# Display DataFrame
carbonEmissions.head()

State,Iowa,Minnesota,North Dakota,South Dakota,Wisconsin,Year
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1990,17.116484,25.145633,4.844608,4.916453,25.808726,1990
1991,16.564712,26.3108,4.913473,4.687325,25.71648,1991
1992,16.431845,27.720538,5.14365,5.288593,26.368581,1992
1993,17.064168,29.233824,5.402739,5.384092,27.180918,1993
1994,18.053886,30.217382,5.357596,5.72423,28.721214,1994


In [75]:
# Reset index in place
carbonEmissions.reset_index(inplace = True, drop = True)

# Display DataFrame
carbonEmissions

State,Iowa,Minnesota,North Dakota,South Dakota,Wisconsin,Year
0,17.116484,25.145633,4.844608,4.916453,25.808726,1990
1,16.564712,26.3108,4.913473,4.687325,25.71648,1991
2,16.431845,27.720538,5.14365,5.288593,26.368581,1992
3,17.064168,29.233824,5.402739,5.384092,27.180918,1993
4,18.053886,30.217382,5.357596,5.72423,28.721214,1994
5,18.724337,31.288081,5.362954,5.841534,29.297354,1995
6,20.019862,32.2763,5.496267,5.838887,29.752413,1996
7,19.550619,31.868159,5.582767,5.688363,29.899735,1997
8,20.188845,33.368612,5.017909,5.854561,31.497489,1998
9,19.991046,35.429096,5.922174,5.997861,32.150269,1999


In [76]:
# Rearrange columns
carbonEmissions = carbonEmissions[["Year", "Iowa", "Minnesota", "North Dakota", "South Dakota", "Wisconsin"]]

# Rename columns
carbonEmissions = carbonEmissions.rename(columns = {"Iowa":"iaGHG", "Minnesota":"mnGHG", "North Dakota":"ndGHG",
                                                    "South Dakota":"sdGHG", "Wisconsin":"wiGHG"})

# Sort by year
carbonEmissions = carbonEmissions.sort_values("Year", ascending = False)

# Reset index in place
carbonEmissions.reset_index(inplace = True, drop = True)

# Display DataFrame
carbonEmissions

State,Year,iaGHG,mnGHG,ndGHG,sdGHG,wiGHG
0,2018,20.808618,31.611875,9.48068,6.856952,31.257005
1,2017,20.600683,31.390331,8.930514,6.711773,29.37521
2,2016,21.6935,31.904079,8.440201,6.927084,29.988725
3,2015,20.568721,30.408227,9.398703,6.953502,30.318151
4,2014,21.278089,30.787186,10.149001,6.860861,30.751677
5,2013,20.658018,30.666647,9.538938,6.599205,28.580062
6,2012,20.301501,32.215651,9.144185,6.875894,28.870989
7,2011,21.222488,31.813273,8.032738,6.489779,28.91537
8,2010,21.055694,32.164674,6.876221,6.502886,30.012055
9,2009,21.117831,32.333745,6.022292,6.288773,29.549675


In [77]:
# Drop index label
carbonEmissions.columns.name = None

# Display DataFrame
carbonEmissions.head()

Unnamed: 0,Year,iaGHG,mnGHG,ndGHG,sdGHG,wiGHG
0,2018,20.808618,31.611875,9.48068,6.856952,31.257005
1,2017,20.600683,31.390331,8.930514,6.711773,29.37521
2,2016,21.6935,31.904079,8.440201,6.927084,29.988725
3,2015,20.568721,30.408227,9.398703,6.953502,30.318151
4,2014,21.278089,30.787186,10.149001,6.860861,30.751677


### *Merge DataFrame with carbonEmissions*

In [78]:
# Merge DataFrames
DataFrame = DataFrame.merge(carbonEmissions, how = "left", left_on = "Year", right_on = "Year")

# Display DataFrame
DataFrame

Unnamed: 0,Year,usaVMT,iaVMT,iaGAS,mnVMT,mnGAS,ndVMT,ndGAS,sdVMT,sdGAS,...,iaLaws,mnLaws,sdLaws,ndLaws,wiLaws,iaGHG,mnGHG,ndGHG,sdGHG,wiGHG
0,2018,3240327000000,33507000000,1140457000.0,60438313272,2271234000.0,9866000000,396844125.0,9736000000,453202200.0,...,18.0,28.0,12.0,13.0,19.0,20.808618,31.611875,9.48068,6.856952,31.257005
1,2017,3212347000000,33751000000,1181657000.0,59970745402,2331281000.0,9703000000,402176775.0,9638000000,441222000.0,...,16.0,23.0,12.0,13.0,18.0,20.600683,31.390331,8.930514,6.711773,29.37521
2,2016,3174408000000,33263000000,1198349000.0,58856547322,2407911000.0,9740000000,400058325.0,9508000000,427050300.0,...,14.0,20.0,12.0,12.0,13.0,21.6935,31.904079,8.440201,6.927084,29.988725
3,2015,3095373000000,33109000000,1149113000.0,57795772499,2367989000.0,10079000000,430921950.0,9325000000,418065150.0,...,11.0,17.0,12.0,12.0,13.0,20.568721,30.408227,9.398703,6.953502,30.318151
4,2014,3025656000000,32332000000,1186003000.0,57393220230,2289606000.0,10437000000,443669175.0,9226000000,411052350.0,...,11.0,17.0,12.0,12.0,13.0,21.278089,30.787186,10.149001,6.860861,30.751677
5,2013,2988280000000,31542000000,1261610000.0,56993471770,2307029000.0,10100000000,435268425.0,9114000000,432163800.0,...,7.0,15.0,12.0,12.0,12.0,20.658018,30.666647,9.538938,6.599205,28.580062
6,2012,2968570000000,31581000000,1300947000.0,57018582582,2266559000.0,10093000000,417152025.0,9077000000,413389950.0,...,6.0,12.0,12.0,10.0,12.0,20.301501,32.215651,9.144185,6.875894,28.870989
7,2011,2950402000000,31411000000,1340687000.0,56675049785,2207060000.0,9166000000,391328850.0,8993000000,442646475.0,...,6.0,12.0,10.0,10.0,12.0,21.222488,31.813273,8.032738,6.489779,28.91537
8,2010,2967266000000,31579000000,1387475000.0,56762480425,2218309000.0,8303000000,370400025.0,8861000000,439432275.0,...,2.0,11.0,9.0,7.0,12.0,21.055694,32.164674,6.876221,6.502886,30.012055
9,2009,2956764000000,31295000000,1304308000.0,56970430780,2213890000.0,7943000000,346183950.0,8740000000,424237875.0,...,2.0,11.0,9.0,7.0,7.0,21.117831,32.333745,6.022292,6.288773,29.549675


In [79]:
# Print column names
for b in DataFrame.columns:
    print(b)

Year
usaVMT
iaVMT
iaGAS
mnVMT
mnGAS
ndVMT
ndGAS
sdVMT
sdGAS
wiVMT
wiGAS
iaPOP
iaAREA
iaDENS
mnPOP
mnAREA
mnDENS
ndPOP
ndAREA
ndDENS
sdPOP
sdAREA
sdDENS
wiPOP
wiAREA
wiDENS
mnTempHigh
mnTempLow
iaTempHigh
iaTempLow
wiTempHigh
wiTempLow
ndTempHigh
ndTempLow
sdTempHigh
sdTempLow
iaBicycle
iaCarpool
iaDrovealone
iaPublictransportation
iaTaximotorcycleorother
iaWalked
iaWorkedathome
mnBicycle
mnCarpool
mnDrovealone
mnPublictransportation
mnTaximotorcycleorother
mnWalked
mnWorkedathome
ndBicycle
ndCarpool
ndDrovealone
ndPublictransportation
ndTaximotorcycleorother
ndWalked
ndWorkedathome
sdBicycle
sdCarpool
sdDrovealone
sdPublictransportation
sdTaximotorcycleorother
sdWalked
sdWorkedathome
wiBicycle
wiCarpool
wiDrovealone
wiPublictransportation
wiTaximotorcycleorother
wiWalked
wiWorkedathome
iaLaws
mnLaws
sdLaws
ndLaws
wiLaws
iaGHG
mnGHG
ndGHG
sdGHG
wiGHG


In [80]:
# Export to CSV
DataFrame.to_csv("resources/annualData.csv", index = False, header = True)

### *Merge DataFrame with EV Market Share Data*

In [81]:
# Merge DataFrames
DataFrame = DataFrame.merge(shareData, how = "left", left_on = "Year", right_on = "Year")

# Display DataFrame
DataFrame

Unnamed: 0,Year,usaVMT,iaVMT,iaGAS,mnVMT,mnGAS,ndVMT,ndGAS,sdVMT,sdGAS,...,mnLaws,sdLaws,ndLaws,wiLaws,iaGHG,mnGHG,ndGHG,sdGHG,wiGHG,EV Market Share
0,2018,3240327000000,33507000000,1140457000.0,60438313272,2271234000.0,9866000000,396844125.0,9736000000,453202200.0,...,28.0,12.0,13.0,19.0,20.808618,31.611875,9.48068,6.856952,31.257005,0.0254
1,2017,3212347000000,33751000000,1181657000.0,59970745402,2331281000.0,9703000000,402176775.0,9638000000,441222000.0,...,23.0,12.0,13.0,18.0,20.600683,31.390331,8.930514,6.711773,29.37521,0.012
2,2016,3174408000000,33263000000,1198349000.0,58856547322,2407911000.0,9740000000,400058325.0,9508000000,427050300.0,...,20.0,12.0,12.0,13.0,21.6935,31.904079,8.440201,6.927084,29.988725,0.0099
3,2015,3095373000000,33109000000,1149113000.0,57795772499,2367989000.0,10079000000,430921950.0,9325000000,418065150.0,...,17.0,12.0,12.0,13.0,20.568721,30.408227,9.398703,6.953502,30.318151,0.0094
4,2014,3025656000000,32332000000,1186003000.0,57393220230,2289606000.0,10437000000,443669175.0,9226000000,411052350.0,...,17.0,12.0,12.0,13.0,21.278089,30.787186,10.149001,6.860861,30.751677,0.0082
5,2013,2988280000000,31542000000,1261610000.0,56993471770,2307029000.0,10100000000,435268425.0,9114000000,432163800.0,...,15.0,12.0,12.0,12.0,20.658018,30.666647,9.538938,6.599205,28.580062,0.0064
6,2012,2968570000000,31581000000,1300947000.0,57018582582,2266559000.0,10093000000,417152025.0,9077000000,413389950.0,...,12.0,12.0,10.0,12.0,20.301501,32.215651,9.144185,6.875894,28.870989,0.0021
7,2011,2950402000000,31411000000,1340687000.0,56675049785,2207060000.0,9166000000,391328850.0,8993000000,442646475.0,...,12.0,10.0,10.0,12.0,21.222488,31.813273,8.032738,6.489779,28.91537,0.0017
8,2010,2967266000000,31579000000,1387475000.0,56762480425,2218309000.0,8303000000,370400025.0,8861000000,439432275.0,...,11.0,9.0,7.0,12.0,21.055694,32.164674,6.876221,6.502886,30.012055,0.0
9,2009,2956764000000,31295000000,1304308000.0,56970430780,2213890000.0,7943000000,346183950.0,8740000000,424237875.0,...,11.0,9.0,7.0,7.0,21.117831,32.333745,6.022292,6.288773,29.549675,0.0


In [82]:
# Rename column
DataFrame = DataFrame.rename(columns = {"EV Market Share":"evSHARE"})

# Display DataFrame
DataFrame

Unnamed: 0,Year,usaVMT,iaVMT,iaGAS,mnVMT,mnGAS,ndVMT,ndGAS,sdVMT,sdGAS,...,mnLaws,sdLaws,ndLaws,wiLaws,iaGHG,mnGHG,ndGHG,sdGHG,wiGHG,evSHARE
0,2018,3240327000000,33507000000,1140457000.0,60438313272,2271234000.0,9866000000,396844125.0,9736000000,453202200.0,...,28.0,12.0,13.0,19.0,20.808618,31.611875,9.48068,6.856952,31.257005,0.0254
1,2017,3212347000000,33751000000,1181657000.0,59970745402,2331281000.0,9703000000,402176775.0,9638000000,441222000.0,...,23.0,12.0,13.0,18.0,20.600683,31.390331,8.930514,6.711773,29.37521,0.012
2,2016,3174408000000,33263000000,1198349000.0,58856547322,2407911000.0,9740000000,400058325.0,9508000000,427050300.0,...,20.0,12.0,12.0,13.0,21.6935,31.904079,8.440201,6.927084,29.988725,0.0099
3,2015,3095373000000,33109000000,1149113000.0,57795772499,2367989000.0,10079000000,430921950.0,9325000000,418065150.0,...,17.0,12.0,12.0,13.0,20.568721,30.408227,9.398703,6.953502,30.318151,0.0094
4,2014,3025656000000,32332000000,1186003000.0,57393220230,2289606000.0,10437000000,443669175.0,9226000000,411052350.0,...,17.0,12.0,12.0,13.0,21.278089,30.787186,10.149001,6.860861,30.751677,0.0082
5,2013,2988280000000,31542000000,1261610000.0,56993471770,2307029000.0,10100000000,435268425.0,9114000000,432163800.0,...,15.0,12.0,12.0,12.0,20.658018,30.666647,9.538938,6.599205,28.580062,0.0064
6,2012,2968570000000,31581000000,1300947000.0,57018582582,2266559000.0,10093000000,417152025.0,9077000000,413389950.0,...,12.0,12.0,10.0,12.0,20.301501,32.215651,9.144185,6.875894,28.870989,0.0021
7,2011,2950402000000,31411000000,1340687000.0,56675049785,2207060000.0,9166000000,391328850.0,8993000000,442646475.0,...,12.0,10.0,10.0,12.0,21.222488,31.813273,8.032738,6.489779,28.91537,0.0017
8,2010,2967266000000,31579000000,1387475000.0,56762480425,2218309000.0,8303000000,370400025.0,8861000000,439432275.0,...,11.0,9.0,7.0,12.0,21.055694,32.164674,6.876221,6.502886,30.012055,0.0
9,2009,2956764000000,31295000000,1304308000.0,56970430780,2213890000.0,7943000000,346183950.0,8740000000,424237875.0,...,11.0,9.0,7.0,7.0,21.117831,32.333745,6.022292,6.288773,29.549675,0.0


In [83]:
# Sort by year
DataFrame = DataFrame.sort_values("Year", ascending = True)

# Reset index in place
DataFrame.reset_index(inplace = True, drop = True)

# Display DataFrame
DataFrame.head()

Unnamed: 0,Year,usaVMT,iaVMT,iaGAS,mnVMT,mnGAS,ndVMT,ndGAS,sdVMT,sdGAS,...,mnLaws,sdLaws,ndLaws,wiLaws,iaGHG,mnGHG,ndGHG,sdGHG,wiGHG,evSHARE
0,1990,2147501000000,23165000000,1419690000.0,39216794645,1975783000.0,5957000000,320141625.0,6985000000,376353600.0,...,,,,,17.116484,25.145633,4.844608,4.916453,25.808726,0.0
1,1991,2172214000000,23572000000,1380901000.0,39301381935,1968734000.0,5999000000,323136675.0,6690000000,380517450.0,...,,,,,16.564712,26.3108,4.913473,4.687325,25.71648,0.0
2,1992,2247152000000,24411000000,1381485000.0,41685288180,1983161000.0,6105000000,334970775.0,7176000000,402067200.0,...,,,,,16.431845,27.720538,5.14365,5.288593,26.368581,0.0
3,1993,2296705000000,25396000000,1386306000.0,42473511890,2043391000.0,6204000000,361159200.0,7352000000,403966500.0,...,,,,,17.064168,29.233824,5.402739,5.384092,27.180918,0.0
4,1994,2357587000000,26039000000,1432109000.0,43262537865,2156034000.0,6388000000,364556025.0,7550000000,418759125.0,...,,,,,18.053886,30.217382,5.357596,5.72423,28.721214,0.0


In [84]:
# Export to CSV
DataFrame.to_csv("resources/annualData.csv", index = False, header = True)

In [85]:
# Matt's Code Ends Here