In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gmaps

# Google developer API key
from config import gkey

# Access maps with unique API key
gmaps.configure(api_key=gkey)

csv_path_county = "../data/County_Coordinates.csv"
county_df = pd.read_csv(csv_path_county, delimiter=",",encoding="ANSI")
county_df.head()

Unnamed: 0,county,state,population,lat,lon,lat_gmap,lon_gmap
0,Abbeville County,sc,16713,34.3015,-82.42675,34.189122,-82.475276
1,Acadia Parish,la,44719,30.249556,-92.393,30.229735,-92.381362
2,Accomack County,va,33115,37.78425,-75.64975,37.706332,-75.806908
3,Ada County,id,417501,43.607966,-116.273847,43.478767,-116.24203
4,Adair County,ia,4561,41.28,-94.4855,41.274173,-94.479976


In [2]:
csv_path_clean = "../data/Cleaned_Data.csv"
solar_clean_df = pd.read_csv(csv_path_clean, delimiter=",", encoding="ANSI")
solar_clean_df.head()

Unnamed: 0.1,Unnamed: 0,tile_count,solar_system_count,total_panel_area,fips,average_household_income,county,education_bachelor,education_college,education_doctoral,...,incentive_residential_state_level,incentive_nonresidential_state_level,net_metering,feedin_tariff,cooperate_tax,property_tax,sales_tax,rebate,avg_electricity_retail_rate,Solar_Status
0,0,0,0,0.0,27145011200,70352.78987,Stearns County,569,1690,13,...,11,13,34,0,0,25,12,0,9.46,No
1,2,3,3,64.505776,27145011302,71496.88658,Stearns County,854,1459,31,...,11,13,34,0,0,25,12,0,9.46,Yes
2,3,0,0,0.0,27145011304,86840.15275,Stearns County,640,1116,68,...,11,13,34,0,0,25,12,0,9.46,No
3,4,5,5,164.583303,27145011400,89135.3156,Stearns County,654,1314,15,...,11,13,34,0,0,25,12,0,9.46,Yes
4,5,0,0,0.0,27145011500,62225.90361,Stearns County,522,1395,24,...,11,13,34,0,0,25,12,0,9.46,No


In [3]:
#create new column for total county income

solar_clean_df["county_household_income"]=solar_clean_df["population"]*solar_clean_df["average_household_income"]
solar_clean_df.head()

Unnamed: 0.1,Unnamed: 0,tile_count,solar_system_count,total_panel_area,fips,average_household_income,county,education_bachelor,education_college,education_doctoral,...,incentive_nonresidential_state_level,net_metering,feedin_tariff,cooperate_tax,property_tax,sales_tax,rebate,avg_electricity_retail_rate,Solar_Status,county_household_income
0,0,0,0,0.0,27145011200,70352.78987,Stearns County,569,1690,13,...,13,34,0,0,25,12,0,9.46,No,462710300.0
1,2,3,3,64.505776,27145011302,71496.88658,Stearns County,854,1459,31,...,13,34,0,0,25,12,0,9.46,Yes,498404800.0
2,3,0,0,0.0,27145011304,86840.15275,Stearns County,640,1116,68,...,13,34,0,0,25,12,0,9.46,No,549524500.0
3,4,5,5,164.583303,27145011400,89135.3156,Stearns County,654,1314,15,...,13,34,0,0,25,12,0,9.46,Yes,469208300.0
4,5,0,0,0.0,27145011500,62225.90361,Stearns County,522,1395,24,...,13,34,0,0,25,12,0,9.46,No,406397400.0


In [4]:
#new groupby object 

grouped_location = solar_clean_df.groupby(["county", "state"])

#create empty dataframe for sorted county and state data

location_df = pd.DataFrame()

#pull in dependent variable data
location_df["tile_count_per1000"]=grouped_location["tile_count"].sum()*1000/grouped_location["population"].sum()
location_df["solar_system_count_per1000"]=grouped_location["solar_system_count"].sum()*1000/grouped_location["population"].sum()
location_df["total_panel_area_per1000"]=grouped_location["total_panel_area"].sum()*1000/grouped_location["population"].sum()
location_df["total_panel_area"]=grouped_location["total_panel_area"].sum()

#pull in independent variables on welfare
location_df["average_household_income"]=grouped_location["county_household_income"].sum()/grouped_location["population"].sum()
location_df["per_capita_income"]=grouped_location["per_capita_income"].mean()
location_df["median_household_income"]=grouped_location["median_household_income"].mean()
location_df["housing_unit_median_value"]=grouped_location["housing_unit_median_value"].mean()

#pull in independent variables on weather conditions
location_df["daily_solar_radiation"]=grouped_location["daily_solar_radiation"].mean()


#reset index on location_df to ensure county names appear in each row
location_df=location_df.reset_index()
location_df.head()


Unnamed: 0,county,state,tile_count_per1000,solar_system_count_per1000,total_panel_area_per1000,total_panel_area,average_household_income,per_capita_income,median_household_income,housing_unit_median_value,daily_solar_radiation
0,Abbeville County,sc,0.179501,0.179501,3.303239,55.207033,49709.200865,19624.75,37707.25,101050.0,4.31
1,Acadia Parish,la,1.252264,1.028646,51.718155,2312.784175,54859.088846,20804.666667,40660.888889,90300.0,4.36
2,Accomack County,va,0.241582,0.241582,8.194007,271.344537,54158.288332,23293.375,39467.0,151862.5,4.37
3,Ada County,id,1.607182,1.29341,46.063401,19231.515912,76074.034496,29962.338983,59137.40678,199111.864407,4.45
4,Adair County,ia,1.096251,1.096251,124.555987,568.099858,58670.624386,27300.0,47743.0,92750.0,3.94


In [5]:
#merge dataframes by county and state together
new_df=pd.merge(county_df, location_df, how="left", on=["county", "state"])
new_df.head()

Unnamed: 0,county,state,population,lat,lon,lat_gmap,lon_gmap,tile_count_per1000,solar_system_count_per1000,total_panel_area_per1000,total_panel_area,average_household_income,per_capita_income,median_household_income,housing_unit_median_value,daily_solar_radiation
0,Abbeville County,sc,16713,34.3015,-82.42675,34.189122,-82.475276,0.179501,0.179501,3.303239,55.207033,49709.200865,19624.75,37707.25,101050.0,4.31
1,Acadia Parish,la,44719,30.249556,-92.393,30.229735,-92.381362,1.252264,1.028646,51.718155,2312.784175,54859.088846,20804.666667,40660.888889,90300.0,4.36
2,Accomack County,va,33115,37.78425,-75.64975,37.706332,-75.806908,0.241582,0.241582,8.194007,271.344537,54158.288332,23293.375,39467.0,151862.5,4.37
3,Ada County,id,417501,43.607966,-116.273847,43.478767,-116.24203,1.607182,1.29341,46.063401,19231.515912,76074.034496,29962.338983,59137.40678,199111.864407,4.45
4,Adair County,ia,4561,41.28,-94.4855,41.274173,-94.479976,1.096251,1.096251,124.555987,568.099858,58670.624386,27300.0,47743.0,92750.0,3.94


In [30]:
#take random 500 data points for graphing purposes
randomize=new_df.sample(n=1000)
random_df=randomize.reset_index()
random_df


Unnamed: 0,index,county,state,population,lat,lon,lat_gmap,lon_gmap,tile_count_per1000,solar_system_count_per1000,total_panel_area_per1000,total_panel_area,average_household_income,per_capita_income,median_household_income,housing_unit_median_value,daily_solar_radiation
0,708,Danville city,va,42450,36.586733,-79.403333,36.585972,-79.395023,0.636042,0.424028,28.168525,1195.753893,47808.062526,20617.066667,32850.800000,87553.333333,4.120000
1,77,Archuleta County,co,12174,37.201000,-107.077250,37.160432,-107.006703,1.149992,0.985707,22.082007,268.826349,65310.626403,29756.000000,47176.500000,282425.000000,4.802500
2,2054,Nodaway County,mo,23186,40.366000,-94.882600,40.290728,-94.810596,4.485465,1.940826,244.553903,5670.226792,47810.123451,19735.600000,39810.200000,108600.000000,4.050000
3,194,Berrien County,mi,151457,41.980174,-86.411935,42.032074,-86.741637,0.719676,0.600831,26.039409,3943.850785,62453.294526,24885.826087,47529.478261,128771.739130,3.850652
4,859,Ellsworth County,ks,6410,38.735000,-98.269500,38.615225,-98.221298,1.092044,0.936037,98.457300,631.111292,58942.870735,25525.000000,49803.500000,77650.000000,4.420000
5,1804,Mason County,wa,60791,47.287714,-123.079429,47.425115,-123.195138,0.806040,0.361896,42.940922,2610.421568,62417.759360,23512.214286,47626.571429,192585.714286,3.350000
6,1315,Ida County,ia,4454,42.344500,-95.497000,42.341868,-95.477781,0.224517,0.224517,10.144637,45.184212,60662.478785,27088.500000,44973.000000,75350.000000,3.900000
7,2012,Nassau County,ny,1220505,40.719715,-73.602496,40.654615,-73.559413,9.228967,6.798006,260.420910,317845.022869,125650.465256,43559.394309,102232.991870,502528.455285,3.980000
8,889,Fairfield County,sc,19510,34.351750,-81.123000,34.447835,-81.075466,0.461302,0.461302,13.250657,258.520317,49090.373815,19815.500000,36126.750000,110050.000000,4.355000
9,112,Bailey County,tx,7126,34.069000,-102.831000,34.091306,-102.897510,0.701656,0.701656,30.837196,219.745861,47640.593650,17007.000000,37397.000000,58300.000000,5.170000


In [31]:
def create_map(x,y):
    
    # store the figure in a variable so we can continue to add layers to the same map
    fig = gmaps.figure()
    coordinates=random_df[['lat_gmap', 'lon_gmap']]
    density = random_df[x].astype(float)

    # Create a marker layer using our coordinates
    heat_layer = gmaps.heatmap_layer(coordinates, weights=density, dissipating=True)

    # Add the layer to the map
    fig.add_layer(heat_layer)

    data = random_df[y]
    
    # Normalize the average solar radiation from 0 to 1 and store that in a new variable
    norm_data = (data - min(data)) / (max(data) - min(data))

    #norm_radiation.hist()

    data_layer = gmaps.symbol_layer(coordinates, 
        fill_color="green",
        stroke_color='navy',
        fill_opacity=norm_data,
        stroke_opacity=norm_data,
        info_box_content=[f"y: {y}" for y in data])
                                   

    # Add the layer to the figure
    fig.add_layer(data_layer)

    # Display the figure
    return fig

In [34]:
create_map("housing_unit_median_value","total_panel_area_per1000")

Figure(layout=FigureLayout(height='420px'))

In [None]:
new_df.to_csv("../data/Solar_Data.csv", index=False, header=True)

In [None]:
new_df.sort_values(by=['median_household_income'], ascending=False)