In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gmaps

# Google developer API key
from config import gkey

# Access maps with unique API key
gmaps.configure(api_key=gkey)

#import County Level Coordinate Data with encoding
csv_path_county = "../data/County_Coordinates.csv"
county_df = pd.read_csv(csv_path_county, delimiter=",",encoding="ISO-8859-1")
county_df.head()


Unnamed: 0,county,state,population,lat,lon,lat_gmap,lon_gmap
0,Abbeville County,sc,16713,34.3015,-82.42675,34.189122,-82.475276
1,Acadia Parish,la,44719,30.249556,-92.393,30.229735,-92.381362
2,Accomack County,va,33115,37.78425,-75.64975,37.706332,-75.806908
3,Ada County,id,417501,43.607966,-116.273847,43.478767,-116.24203
4,Adair County,ia,4561,41.28,-94.4855,41.274173,-94.479976


In [2]:
#import cleaned data at City Level with encoding
csv_path_clean = "../data/Cleaned_Data.csv"
solar_df = pd.read_csv(csv_path_clean, delimiter=",", encoding="ISO-8859-1")
solar_df.head()

Unnamed: 0.1,Unnamed: 0,tile_count,solar_system_count,total_panel_area,fips,average_household_income,county,education_bachelor,education_college,education_doctoral,...,incentive_residential_state_level,incentive_nonresidential_state_level,net_metering,feedin_tariff,cooperate_tax,property_tax,sales_tax,rebate,avg_electricity_retail_rate,Solar_Status
0,0,0,0,0.0,27145011200,70352.78987,Stearns County,569,1690,13,...,11,13,34,0,0,25,12,0,9.46,No
1,2,3,3,64.505776,27145011302,71496.88658,Stearns County,854,1459,31,...,11,13,34,0,0,25,12,0,9.46,Yes
2,3,0,0,0.0,27145011304,86840.15275,Stearns County,640,1116,68,...,11,13,34,0,0,25,12,0,9.46,No
3,4,5,5,164.583303,27145011400,89135.3156,Stearns County,654,1314,15,...,11,13,34,0,0,25,12,0,9.46,Yes
4,5,0,0,0.0,27145011500,62225.90361,Stearns County,522,1395,24,...,11,13,34,0,0,25,12,0,9.46,No


In [27]:
#create new column for total county income
solar_df["county_personal_income"]=solar_df["population"]*solar_df["per_capita_income"]
solar_df["total_dem_voters_weighted"]=solar_df["population"]*solar_df["voting_2016_dem_percentage"]

#create units of measure in transposed data (visual way to see categories and values in one column)
units_df=solar_df.head(3).T
units_df.to_csv("../data/Units_Data.csv", index=True, header=True, encoding = "ISO-8859-1")

In [30]:
#new groupby object 

#grouby by county and state

grouped_location = solar_df.groupby(["county", "state"])

#create empty dataframe for sorted county and state data

location_df = pd.DataFrame()

#pull in target dependent variable data for total panel area and number of solar systems per person
location_df["total_panel_area"]=grouped_location["total_panel_area"].sum()
location_df["number_of_solar_system_per_person"]=grouped_location["solar_system_count"].sum()/grouped_location["population"].sum()

#pull in independent variables on welfare
location_df["per_capita_income"]=grouped_location["county_personal_income"].sum()/grouped_location["population"].sum()
location_df["voting_2016_dem_weighted"]=grouped_location["total_dem_voters_weighted"].sum()/grouped_location["population"].sum()

#pull in other independent variables for study from dataset that require means
independent_var = ["housing_unit_median_value", 
                   "daily_solar_radiation", "electricity_price_residential", "electricity_price_commercial",
                   "electricity_price_industrial", "incentive_count_residential", "incentive_count_nonresidential", 
                   "education_high_school_graduate_rate"]

#repeated task on groupby object to find mean of all independent variables described above
def grouped_mean(x):
    location_df[x] = grouped_location[x].mean()

#create groupby objects for each independent variable
for var in independent_var:
    grouped_mean(var)

#reset index on location_df to ensure county names appear in each row
location_df=location_df.reset_index()
location_df.head()

Unnamed: 0,county,state,total_panel_area,number_of_solar_system_per_person,per_capita_income,voting_2016_dem_weighted,housing_unit_median_value,daily_solar_radiation,electricity_price_residential,electricity_price_commercial,electricity_price_industrial,incentive_count_residential,incentive_count_nonresidential,education_high_school_graduate_rate
0,Abbeville County,sc,55.207033,0.00018,19351.627655,0.346139,101050.0,4.31,12.57,10.21,6.05,16,25,0.359489
1,Acadia Parish,la,2312.784175,0.001029,20755.342472,0.205872,90300.0,4.36,9.33,8.66,5.41,16,17,0.402662
2,Accomack County,va,271.344537,0.000242,23230.53281,0.42761,151862.5,4.37,11.37,8.21,6.95,19,16,0.408055
3,Ada County,id,19231.515912,0.001293,28850.999202,0.386917,199111.864407,4.45,9.93,7.8,6.6,14,19,0.211122
4,Adair County,ia,568.099858,0.001096,26577.583205,0.299814,92750.0,3.94,11.63,8.92,5.9,18,26,0.462849


In [31]:
#merge dataframes by county and state together to get location and demographic data together
new_df=pd.merge(county_df, location_df, how="left", on=["county", "state"])
new_df.head()

Unnamed: 0,county,state,population,lat,lon,lat_gmap,lon_gmap,total_panel_area,number_of_solar_system_per_person,per_capita_income,voting_2016_dem_weighted,housing_unit_median_value,daily_solar_radiation,electricity_price_residential,electricity_price_commercial,electricity_price_industrial,incentive_count_residential,incentive_count_nonresidential,education_high_school_graduate_rate
0,Abbeville County,sc,16713,34.3015,-82.42675,34.189122,-82.475276,55.207033,0.00018,19351.627655,0.346139,101050.0,4.31,12.57,10.21,6.05,16,25,0.359489
1,Acadia Parish,la,44719,30.249556,-92.393,30.229735,-92.381362,2312.784175,0.001029,20755.342472,0.205872,90300.0,4.36,9.33,8.66,5.41,16,17,0.402662
2,Accomack County,va,33115,37.78425,-75.64975,37.706332,-75.806908,271.344537,0.000242,23230.53281,0.42761,151862.5,4.37,11.37,8.21,6.95,19,16,0.408055
3,Ada County,id,417501,43.607966,-116.273847,43.478767,-116.24203,19231.515912,0.001293,28850.999202,0.386917,199111.864407,4.45,9.93,7.8,6.6,14,19,0.211122
4,Adair County,ia,4561,41.28,-94.4855,41.274173,-94.479976,568.099858,0.001096,26577.583205,0.299814,92750.0,3.94,11.63,8.92,5.9,18,26,0.462849


In [33]:
#create_map function for two layered map
#x is heat map variable
#y is symbol map variable

def create_map_country(x,y):
    
    # store the figure in a variable to eventually add multiple layers
    fig = gmaps.figure()
    
    #coordinates on gmap lat and lon for each county
    coordinates=new_df[['lat_gmap', 'lon_gmap']]
    
    #density is proportial to column values associated with x in function, which is independent variable
    density = new_df[x].astype(float)
    
  
    # Create a heat layer using coordinates
    heat_layer = gmaps.heatmap_layer(coordinates, weights=density,point_radius=0.45, dissipating=False)
          
    # Add the layer to the map
    fig.add_layer(heat_layer)
    
       
    #pull in dependent variable data for marker map
    data = new_df[y]
    
    # Normalize the data from column["y"] from 0 to 1 and store that in a new variable
    norm_data = (data - min(data)) / (max(data) - min(data))
    
    #plot histogram of normalized data for variable y
    #norm_data.hist()
    
    #create data layer for variable y
    data_layer = gmaps.symbol_layer(coordinates, 
        fill_color="green",
        stroke_color='navy',
        fill_opacity=norm_data,
        stroke_opacity=norm_data)
        #info_box_content=[f"str({y}): {round(y,2)}" for y in data])
                                   

    # Add the layer to the figure
    fig.add_layer(data_layer)

    # Display the figure
    return fig

In [34]:
create_map_country("voting_2016_dem_weighted","number_of_solar_system_per_person")

Figure(layout=FigureLayout(height='420px'))

In [9]:
#save dataframe chosen values to csv
new_df.to_csv("../data/Solar_Data.csv", index=False, header=True, encoding = "ISO-8859-1")

In [17]:
#What's up with Louisiana?

#create state level data for Louisiana and surrounding states to generate heat map
state_all_df = new_df.set_index("state")
state_df = state_all_df.loc[["tx", "la", "ok", "ar", "ms", "al"]]

state_df.head()

Unnamed: 0_level_0,county,population,lat,lon,lat_gmap,lon_gmap,total_panel_area,number_of_solar_system_per_person,per_capita_income,housing_unit_median_value,daily_solar_radiation,electricity_price_residential,electricity_price_commercial,electricity_price_industrial,incentive_count_residential,incentive_count_nonresidential,education_high_school_graduate_rate,voting_2016_dem_percentage
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
tx,Anderson County,45947,31.780556,-95.637889,31.776932,-95.645795,1178.287555,0.000239,21836.639672,82700.0,4.47,11.56,8.15,5.59,47,54,0.376864,0.198851
tx,Andrews County,16775,32.288,-102.5695,32.342627,-102.713512,1434.505237,0.001371,29423.214903,127150.0,5.18,11.56,8.15,5.59,47,54,0.328533,0.169712
tx,Angelina County,83296,31.306125,-94.7,31.27047,-94.645035,5380.836686,0.000636,21709.13051,93268.75,4.41,11.56,8.15,5.59,47,54,0.309692,0.25236
tx,Aransas County,24292,28.0192,-97.0514,28.076126,-96.963862,152.020801,0.000247,28472.468632,168380.0,4.628,11.56,8.15,5.59,47,54,0.285331,0.234833
tx,Archer County,8779,33.648667,-98.614,33.576151,-98.748117,169.55075,0.000683,29380.011163,104900.0,4.79,11.56,8.15,5.59,47,54,0.364515,0.092293


In [15]:
#create_map function for two layered map
#x is heat map variable
#y is symbol map variable

def create_map_state(x,y):
    
    # store the figure in a variable so we can continue to add layers to the same map
    fig = gmaps.figure()
    
    #coordinates on gmap lat and lon for each county
    coordinates=state_df[['lat_gmap', 'lon_gmap']]
    
    #density is proportial to column values associated with x in function
    density = state_df[x].astype(float)
    
      
    # Create a marker layer using our coordinates
    heat_layer = gmaps.heatmap_layer(coordinates, weights=density, point_radius=0.4,dissipating=False)
          
    # Add the layer to the map
    fig.add_layer(heat_layer)
    
    #heat_layer.gradient = ["red", "yellow", "green"]
    #heat_layer.max_intensity = np.max(density*0.8)
    #heat_layer.point_radius = 12
    
    #pull in column data for marker map
    data = state_df[y]
    
    # Normalize the data from column["y"] from 0 to 1 and store that in a new variable
    norm_data = (data - min(data)) / (max(data) - min(data))
    
    #plot histogram of normalized data for variable y
    #norm_data.hist()
    
    #create data layer for variable y
    data_layer = gmaps.symbol_layer(coordinates, 
        fill_color="green",
        stroke_color='navy',
        fill_opacity=norm_data,
        stroke_opacity=norm_data,
        info_box_content=[f"y: {y}" for y in data])
                                   

    # Add the layer to the figure
    fig.add_layer(data_layer)

    # Display the figure
    #return fig

In [16]:
create_map_state("incentive_count_residential","number_of_solar_system_per_person")