In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gmaps

# Google developer API key
from config import gkey

# Access maps with unique API key
gmaps.configure(api_key=gkey)

#import County Level Coordinate Data with ANSI encoding
csv_path_county = "../data/County_Coordinates.csv"
county_df = pd.read_csv(csv_path_county, delimiter=",",encoding="ANSI")
county_df.head()

Unnamed: 0,county,state,population,lat,lon,lat_gmap,lon_gmap
0,Abbeville County,sc,16713,34.3015,-82.42675,34.189122,-82.475276
1,Acadia Parish,la,44719,30.249556,-92.393,30.229735,-92.381362
2,Accomack County,va,33115,37.78425,-75.64975,37.706332,-75.806908
3,Ada County,id,417501,43.607966,-116.273847,43.478767,-116.24203
4,Adair County,ia,4561,41.28,-94.4855,41.274173,-94.479976


In [2]:
#import cleaned data at City Level with ANSI encoding

csv_path_clean = "../data/Cleaned_Data.csv"
solar_clean_df = pd.read_csv(csv_path_clean, delimiter=",", encoding="ANSI")
solar_clean_df.head()

Unnamed: 0.1,Unnamed: 0,tile_count,solar_system_count,total_panel_area,fips,average_household_income,county,education_bachelor,education_college,education_doctoral,...,incentive_residential_state_level,incentive_nonresidential_state_level,net_metering,feedin_tariff,cooperate_tax,property_tax,sales_tax,rebate,avg_electricity_retail_rate,Solar_Status
0,0,0,0,0.0,27145011200,70352.78987,Stearns County,569,1690,13,...,11,13,34,0,0,25,12,0,9.46,No
1,2,3,3,64.505776,27145011302,71496.88658,Stearns County,854,1459,31,...,11,13,34,0,0,25,12,0,9.46,Yes
2,3,0,0,0.0,27145011304,86840.15275,Stearns County,640,1116,68,...,11,13,34,0,0,25,12,0,9.46,No
3,4,5,5,164.583303,27145011400,89135.3156,Stearns County,654,1314,15,...,11,13,34,0,0,25,12,0,9.46,Yes
4,5,0,0,0.0,27145011500,62225.90361,Stearns County,522,1395,24,...,11,13,34,0,0,25,12,0,9.46,No


In [3]:
#create new column for total county income
solar_clean_df["county_household_income"]=solar_clean_df["population"]*solar_clean_df["average_household_income"]
solar_clean_df.head()

Unnamed: 0.1,Unnamed: 0,tile_count,solar_system_count,total_panel_area,fips,average_household_income,county,education_bachelor,education_college,education_doctoral,...,incentive_nonresidential_state_level,net_metering,feedin_tariff,cooperate_tax,property_tax,sales_tax,rebate,avg_electricity_retail_rate,Solar_Status,county_household_income
0,0,0,0,0.0,27145011200,70352.78987,Stearns County,569,1690,13,...,13,34,0,0,25,12,0,9.46,No,462710300.0
1,2,3,3,64.505776,27145011302,71496.88658,Stearns County,854,1459,31,...,13,34,0,0,25,12,0,9.46,Yes,498404800.0
2,3,0,0,0.0,27145011304,86840.15275,Stearns County,640,1116,68,...,13,34,0,0,25,12,0,9.46,No,549524500.0
3,4,5,5,164.583303,27145011400,89135.3156,Stearns County,654,1314,15,...,13,34,0,0,25,12,0,9.46,Yes,469208300.0
4,5,0,0,0.0,27145011500,62225.90361,Stearns County,522,1395,24,...,13,34,0,0,25,12,0,9.46,No,406397400.0


In [35]:
#new groupby object 

#grouby by county and state

grouped_location = solar_clean_df.groupby(["county", "state"])

#create empty dataframe for sorted county and state data

location_df = pd.DataFrame()

#pull in target variable data
location_df["tile_count_per1000"]=grouped_location["tile_count"].sum()*1000/grouped_location["population"].sum()
location_df["solar_system_count_per1000"]=grouped_location["solar_system_count"].sum()*1000/grouped_location["population"].sum()
location_df["total_panel_area_per1000"]=grouped_location["total_panel_area"].sum()*1000/grouped_location["population"].sum()
location_df["total_panel_area"]=grouped_location["total_panel_area"].sum()

#pull in independent variables on welfare
location_df["average_household_income"]=grouped_location["county_household_income"].sum()/grouped_location["population"].sum()
location_df["per_capita_income"]=grouped_location["per_capita_income"].mean()
location_df["median_household_income"]=grouped_location["median_household_income"].mean()
location_df["housing_unit_median_value"]=grouped_location["housing_unit_median_value"].mean()

#pull in independent variables on weather conditions
location_df["daily_solar_radiation"]=grouped_location["daily_solar_radiation"].mean()


#reset index on location_df to ensure county names appear in each row
location_df=location_df.reset_index()
location_df.head()


Unnamed: 0,county,state,tile_count_per1000,solar_system_count_per1000,total_panel_area_per1000,total_panel_area,average_household_income,per_capita_income,median_household_income,housing_unit_median_value,daily_solar_radiation
0,Abbeville County,sc,0.179501,0.179501,3.303239,55.207033,49709.200865,19624.75,37707.25,101050.0,4.31
1,Acadia Parish,la,1.252264,1.028646,51.718155,2312.784175,54859.088846,20804.666667,40660.888889,90300.0,4.36
2,Accomack County,va,0.241582,0.241582,8.194007,271.344537,54158.288332,23293.375,39467.0,151862.5,4.37
3,Ada County,id,1.607182,1.29341,46.063401,19231.515912,76074.034496,29962.338983,59137.40678,199111.864407,4.45
4,Adair County,ia,1.096251,1.096251,124.555987,568.099858,58670.624386,27300.0,47743.0,92750.0,3.94


In [36]:
#merge dataframes by county and state together
new_df=pd.merge(county_df, location_df, how="left", on=["county", "state"])
new_df.head()

Unnamed: 0,county,state,population,lat,lon,lat_gmap,lon_gmap,tile_count_per1000,solar_system_count_per1000,total_panel_area_per1000,total_panel_area,average_household_income,per_capita_income,median_household_income,housing_unit_median_value,daily_solar_radiation
0,Abbeville County,sc,16713,34.3015,-82.42675,34.189122,-82.475276,0.179501,0.179501,3.303239,55.207033,49709.200865,19624.75,37707.25,101050.0,4.31
1,Acadia Parish,la,44719,30.249556,-92.393,30.229735,-92.381362,1.252264,1.028646,51.718155,2312.784175,54859.088846,20804.666667,40660.888889,90300.0,4.36
2,Accomack County,va,33115,37.78425,-75.64975,37.706332,-75.806908,0.241582,0.241582,8.194007,271.344537,54158.288332,23293.375,39467.0,151862.5,4.37
3,Ada County,id,417501,43.607966,-116.273847,43.478767,-116.24203,1.607182,1.29341,46.063401,19231.515912,76074.034496,29962.338983,59137.40678,199111.864407,4.45
4,Adair County,ia,4561,41.28,-94.4855,41.274173,-94.479976,1.096251,1.096251,124.555987,568.099858,58670.624386,27300.0,47743.0,92750.0,3.94


In [37]:
#take random 1000 data points for graphing purposes enables a smaller file size
randomize=new_df.sample(n=1000)
#reset index so it's in numerical order
random_df=randomize.reset_index()

random_df.head()


Unnamed: 0,index,county,state,population,lat,lon,lat_gmap,lon_gmap,tile_count_per1000,solar_system_count_per1000,total_panel_area_per1000,total_panel_area,average_household_income,per_capita_income,median_household_income,housing_unit_median_value,daily_solar_radiation
0,1253,Hill County,tx,34923,31.993455,-97.174364,32.062177,-97.179026,0.085903,0.085903,3.77624,131.877641,55634.526681,20242.363636,39638.272727,82390.909091,4.668182
1,765,Deuel County,ne,1946,41.112,-102.336,41.140175,-102.30058,1.027749,1.027749,12.619399,24.55735,61419.22141,27439.0,50962.0,69400.0,4.39
2,1978,Morgan County,oh,11176,39.609333,-81.809667,39.652191,-81.822436,0.0,0.0,0.0,0.0,48537.844611,19336.666667,35695.333333,86766.666667,3.78
3,1850,Mecosta County,mi,41349,43.6606,-85.3724,43.620103,-85.354965,0.120922,0.120922,3.369743,139.335516,53332.161062,21191.0,42001.0,109240.0,3.71
4,1412,Jersey County,il,22625,39.069333,-90.342833,39.070475,-90.374835,2.519337,1.635359,76.614793,1733.409687,64394.344246,25579.333333,51273.5,129116.666667,4.021667


In [41]:
#create_map function for two layered map
#x is heat map variable
#y is symbol map variable

def create_map(x,y):
    
    # store the figure in a variable so we can continue to add layers to the same map
    fig = gmaps.figure()
    
    #coordinates on gmap lat and lon for each county
    coordinates=random_df[['lat_gmap', 'lon_gmap']]
    
    #density is proportial to column values associated with x in function
    density = random_df[x].astype(float)

    # Create a marker layer using our coordinates
    heat_layer = gmaps.heatmap_layer(coordinates, weights=density, dissipating=True)

    # Add the layer to the map
    fig.add_layer(heat_layer)
    
    #pull in column data for marker map
    data = random_df[y]
    
    # Normalize the data from column["y"] from 0 to 1 and store that in a new variable
    norm_data = (data - min(data)) / (max(data) - min(data))
    
    #plot histogram of normalized data for variable y
    #norm_data.hist()
    
    #create data layer for variable y
    data_layer = gmaps.symbol_layer(coordinates, 
        fill_color="green",
        stroke_color='navy',
        fill_opacity=norm_data,
        stroke_opacity=norm_data,
        info_box_content=[f"y: {y}" for y in data])
                                   

    # Add the layer to the figure
    fig.add_layer(data_layer)

    # Display the figure
    return fig

In [42]:
create_map("housing_unit_median_value","total_panel_area_per1000")

Figure(layout=FigureLayout(height='420px'))

In [None]:
#save dataframe chosen values to csv
new_df.to_csv("../data/Solar_Data.csv", index=False, header=True)