In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gmaps

# Google developer API key
from config import gkey

# Access maps with unique API key
gmaps.configure(api_key=gkey)

#import County Level Coordinate Data with ANSI encoding
csv_path_county = "../data/County_Coordinates.csv"
county_df = pd.read_csv(csv_path_county, delimiter=",",encoding="ISO-8859-1")
county_df.head()
len(county_df)

3091

In [2]:
#import cleaned data at City Level with ANSI encoding

csv_path_clean = "../data/Cleaned_Data.csv"
solar_df = pd.read_csv(csv_path_clean, delimiter=",", encoding="ISO-8859-1")
solar_df.head()

Unnamed: 0.1,Unnamed: 0,tile_count,solar_system_count,total_panel_area,fips,average_household_income,county,education_bachelor,education_college,education_doctoral,...,incentive_residential_state_level,incentive_nonresidential_state_level,net_metering,feedin_tariff,cooperate_tax,property_tax,sales_tax,rebate,avg_electricity_retail_rate,Solar_Status
0,0,0,0,0.0,27145011200,70352.78987,Stearns County,569,1690,13,...,11,13,34,0,0,25,12,0,9.46,No
1,2,3,3,64.505776,27145011302,71496.88658,Stearns County,854,1459,31,...,11,13,34,0,0,25,12,0,9.46,Yes
2,3,0,0,0.0,27145011304,86840.15275,Stearns County,640,1116,68,...,11,13,34,0,0,25,12,0,9.46,No
3,4,5,5,164.583303,27145011400,89135.3156,Stearns County,654,1314,15,...,11,13,34,0,0,25,12,0,9.46,Yes
4,5,0,0,0.0,27145011500,62225.90361,Stearns County,522,1395,24,...,11,13,34,0,0,25,12,0,9.46,No


In [3]:
#create new column for total county income
solar_df["county_personal_income"]=solar_df["population"]*solar_df["per_capita_income"]
units_df=solar_df.head(3).T
units_df.to_csv("../data/Units_Data.csv", index=True, header=True, encoding = "ISO-8859-1")

In [4]:
#new groupby object 

#grouby by county and state

grouped_location = solar_df.groupby(["county", "state"])

#create empty dataframe for sorted county and state data

location_df = pd.DataFrame()

#pull in target dependent variable data
location_df["total_panel_area"]=grouped_location["total_panel_area"].sum()
location_df["number_of_solar_system_per_person"]=grouped_location["solar_system_count"].sum()/grouped_location["population"].sum()

#pull in independent variables on welfare
location_df["per_capita_income"]=grouped_location["county_personal_income"].sum()/grouped_location["population"].sum()

independent_var = ["housing_unit_median_value", 
                   "daily_solar_radiation", "electricity_price_residential", "electricity_price_commercial",
                   "electricity_price_industrial", "incentive_count_residential", "incentive_count_nonresidential", "education_high_school_graduate_rate", "voting_2016_dem_percentage"]

def grouped_mean(x):
    location_df[x] = grouped_location[x].mean()

for var in independent_var:
    grouped_mean(var)

#reset index on location_df to ensure county names appear in each row
location_df=location_df.reset_index()
location_df.head()




Unnamed: 0,county,state,total_panel_area,number_of_solar_system_per_person,per_capita_income,housing_unit_median_value,daily_solar_radiation,electricity_price_residential,electricity_price_commercial,electricity_price_industrial,incentive_count_residential,incentive_count_nonresidential,education_high_school_graduate_rate,voting_2016_dem_percentage
0,Abbeville County,sc,55.207033,0.00018,19351.627655,101050.0,4.31,12.57,10.21,6.05,16,25,0.359489,0.346139
1,Acadia Parish,la,2312.784175,0.001029,20755.342472,90300.0,4.36,9.33,8.66,5.41,16,17,0.402662,0.205872
2,Accomack County,va,271.344537,0.000242,23230.53281,151862.5,4.37,11.37,8.21,6.95,19,16,0.408055,0.42761
3,Ada County,id,19231.515912,0.001293,28850.999202,199111.864407,4.45,9.93,7.8,6.6,14,19,0.211122,0.386917
4,Adair County,ia,568.099858,0.001096,26577.583205,92750.0,3.94,11.63,8.92,5.9,18,26,0.462849,0.299814


In [5]:
#merge dataframes by county and state together
new_df=pd.merge(county_df, location_df, how="left", on=["county", "state"])
new_df.head()

Unnamed: 0,county,state,population,lat,lon,lat_gmap,lon_gmap,total_panel_area,number_of_solar_system_per_person,per_capita_income,housing_unit_median_value,daily_solar_radiation,electricity_price_residential,electricity_price_commercial,electricity_price_industrial,incentive_count_residential,incentive_count_nonresidential,education_high_school_graduate_rate,voting_2016_dem_percentage
0,Abbeville County,sc,16713,34.3015,-82.42675,34.189122,-82.475276,55.207033,0.00018,19351.627655,101050.0,4.31,12.57,10.21,6.05,16,25,0.359489,0.346139
1,Acadia Parish,la,44719,30.249556,-92.393,30.229735,-92.381362,2312.784175,0.001029,20755.342472,90300.0,4.36,9.33,8.66,5.41,16,17,0.402662,0.205872
2,Accomack County,va,33115,37.78425,-75.64975,37.706332,-75.806908,271.344537,0.000242,23230.53281,151862.5,4.37,11.37,8.21,6.95,19,16,0.408055,0.42761
3,Ada County,id,417501,43.607966,-116.273847,43.478767,-116.24203,19231.515912,0.001293,28850.999202,199111.864407,4.45,9.93,7.8,6.6,14,19,0.211122,0.386917
4,Adair County,ia,4561,41.28,-94.4855,41.274173,-94.479976,568.099858,0.001096,26577.583205,92750.0,3.94,11.63,8.92,5.9,18,26,0.462849,0.299814


In [6]:
#take random 1000 data points for graphing purposes enables a smaller file size
randomize=new_df
#reset index so it's in numerical order
random_df=randomize.reset_index()

random_df.sort_values("daily_solar_radiation", ascending=True)


Unnamed: 0,index,county,state,population,lat,lon,lat_gmap,lon_gmap,total_panel_area,number_of_solar_system_per_person,per_capita_income,housing_unit_median_value,daily_solar_radiation,electricity_price_residential,electricity_price_commercial,electricity_price_industrial,incentive_count_residential,incentive_count_nonresidential,education_high_school_graduate_rate,voting_2016_dem_percentage
1493,1493,Kitsap County,wa,235235,47.610041,-122.633633,47.647661,-122.641258,1.341403e+04,0.001828,32540.076370,264559.183673,3.350000,9.09,8.22,4.35,23,30,0.236842,0.510525
1804,1804,Mason County,wa,60791,47.287714,-123.079429,47.425115,-123.195138,2.610422e+03,0.000362,25014.763024,192585.714286,3.350000,9.09,8.22,4.35,23,30,0.297722,0.427558
2221,2221,Pierce County,wa,710719,47.180699,-122.400384,47.067588,-122.129527,4.565318e+04,0.001323,29244.233358,238308.904110,3.355342,9.09,8.22,4.35,23,30,0.290408,0.496931
1479,1479,King County,wa,1808974,47.555325,-122.233186,47.548034,-121.983603,2.149603e+05,0.002677,42083.326508,412760.734463,3.359322,9.09,8.22,4.35,23,30,0.165916,0.721489
2744,2744,Thurston County,wa,225381,47.000762,-122.839548,46.864575,-122.769599,1.430604e+04,0.001948,29954.072038,234147.619048,3.371429,9.09,8.22,4.35,23,30,0.221578,0.531598
2584,2584,Snohomish County,wa,663262,47.931424,-122.185818,48.032998,-121.833947,3.602242e+04,0.001642,32733.993532,298293.939394,3.373636,9.09,8.22,4.35,23,30,0.244091,0.542634
1337,1337,Island County,wa,76081,48.200150,-122.573900,48.197650,-122.579457,3.247616e+03,0.001249,32064.438020,283510.000000,3.392500,9.09,8.22,4.35,23,30,0.224908,0.489476
509,509,Clallam County,wa,72397,48.084810,-123.553048,48.040512,-124.016766,3.203595e+03,0.001395,26999.799923,212504.761905,3.396190,9.09,8.22,4.35,23,30,0.284256,0.459421
2469,2469,San Juan County,wa,15956,48.592750,-122.988750,48.611804,-122.960262,6.786407e+01,0.000188,39266.383680,452450.000000,3.400000,9.09,8.22,4.35,23,30,0.178762,0.666069
1087,1087,Grays Harbor County,wa,71419,47.043312,-123.778375,46.995353,-123.701247,3.170641e+03,0.001456,22386.673588,142956.250000,3.400000,9.09,8.22,4.35,23,30,0.320100,0.425569


In [7]:
#create_map function for two layered map
#x is heat map variable
#y is symbol map variable

def create_map_country(x,y):
    
    # store the figure in a variable so we can continue to add layers to the same map
    fig = gmaps.figure()
    
    #coordinates on gmap lat and lon for each county
    coordinates=random_df[['lat_gmap', 'lon_gmap']]
    
    #density is proportial to column values associated with x in function
    density = random_df[x].astype(float)
    
  
    # Create a marker layer using our coordinates
    heat_layer = gmaps.heatmap_layer(coordinates, weights=density,point_radius=0.45, dissipating=False
                                    )
          
    # Add the layer to the map
    fig.add_layer(heat_layer)
    
    
    
    #pull in column data for marker map
    data = random_df[y]
    
    # Normalize the data from column["y"] from 0 to 1 and store that in a new variable
    norm_data = (data - min(data)) / (max(data) - min(data))
    
    #plot histogram of normalized data for variable y
    #norm_data.hist()
    
    #create data layer for variable y
    data_layer = gmaps.symbol_layer(coordinates, 
        fill_color="green",
        stroke_color='navy',
        fill_opacity=norm_data,
        stroke_opacity=norm_data)
        #info_box_content=[f"str({y}): {round(y,2)}" for y in data])
                                   

    # Add the layer to the figure
    fig.add_layer(data_layer)

    # Display the figure
    return fig

In [8]:
create_map_country("daily_solar_radiation","number_of_solar_system_per_person")

Figure(layout=FigureLayout(height='420px'))

In [9]:
#save dataframe chosen values to csv
new_df.to_csv("../data/Solar_Data.csv", index=False, header=True, encoding = "ISO-8859-1")

In [10]:
#What's up with Louisiana?

state_all_df = new_df.set_index("state")
state_df = state_all_df.loc[["tx", "la", "ok", "ar", "ms", "al"]]

state_df

Unnamed: 0_level_0,county,population,lat,lon,lat_gmap,lon_gmap,total_panel_area,number_of_solar_system_per_person,per_capita_income,housing_unit_median_value,daily_solar_radiation,electricity_price_residential,electricity_price_commercial,electricity_price_industrial,incentive_count_residential,incentive_count_nonresidential,education_high_school_graduate_rate,voting_2016_dem_percentage
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
tx,Anderson County,45947,31.780556,-95.637889,31.776932,-95.645795,1178.287555,0.000239,21836.639672,82700.000000,4.470000,11.56,8.15,5.59,47,54,0.376864,0.198851
tx,Andrews County,16775,32.288000,-102.569500,32.342627,-102.713512,1434.505237,0.001371,29423.214903,127150.000000,5.180000,11.56,8.15,5.59,47,54,0.328533,0.169712
tx,Angelina County,83296,31.306125,-94.700000,31.270470,-94.645035,5380.836686,0.000636,21709.130510,93268.750000,4.410000,11.56,8.15,5.59,47,54,0.309692,0.252360
tx,Aransas County,24292,28.019200,-97.051400,28.076126,-96.963862,152.020801,0.000247,28472.468632,168380.000000,4.628000,11.56,8.15,5.59,47,54,0.285331,0.234833
tx,Archer County,8779,33.648667,-98.614000,33.576151,-98.748117,169.550750,0.000683,29380.011163,104900.000000,4.790000,11.56,8.15,5.59,47,54,0.364515,0.092293
tx,Armstrong County,1943,34.965000,-101.358000,34.937162,-101.433915,0.000000,0.000000,28669.000000,106600.000000,5.030000,11.56,8.15,5.59,47,54,0.242909,0.068830
tx,Atascosa County,29209,28.958667,-98.549167,28.866128,-98.572102,2240.049342,0.000616,21292.766716,81733.333333,4.715000,11.56,8.15,5.59,47,54,0.364953,0.340684
tx,Austin County,28886,29.861667,-96.238000,29.871129,-96.280086,137.547977,0.000138,27658.432978,158766.666667,4.523333,11.56,8.15,5.59,47,54,0.316224,0.189229
tx,Bailey County,7126,34.069000,-102.831000,34.091306,-102.897510,219.745861,0.000702,17007.000000,58300.000000,5.170000,11.56,8.15,5.59,47,54,0.324261,0.222534
tx,Bandera County,20796,29.693200,-99.070400,29.740414,-99.278558,16.385763,0.000048,26662.520869,140920.000000,4.710000,11.56,8.15,5.59,47,54,0.298570,0.169000


In [11]:
#create_map function for two layered map
#x is heat map variable
#y is symbol map variable

def create_map_state(x,y):
    
    # store the figure in a variable so we can continue to add layers to the same map
    fig = gmaps.figure()
    
    #coordinates on gmap lat and lon for each county
    coordinates=state_df[['lat_gmap', 'lon_gmap']]
    
    #density is proportial to column values associated with x in function
    density = state_df[x].astype(float)
    
      
    # Create a marker layer using our coordinates
    heat_layer = gmaps.heatmap_layer(coordinates, weights=density, point_radius=0.4,dissipating=False)
          
    # Add the layer to the map
    fig.add_layer(heat_layer)
    
    #heat_layer.gradient = ["red", "yellow", "green"]
    #heat_layer.max_intensity = np.max(density*0.8)
    #heat_layer.point_radius = 12
    
    #pull in column data for marker map
    data = state_df[y]
    
    # Normalize the data from column["y"] from 0 to 1 and store that in a new variable
    norm_data = (data - min(data)) / (max(data) - min(data))
    
    #plot histogram of normalized data for variable y
    #norm_data.hist()
    
    #create data layer for variable y
    data_layer = gmaps.symbol_layer(coordinates, 
        fill_color="green",
        stroke_color='navy',
        fill_opacity=norm_data,
        stroke_opacity=norm_data,
        info_box_content=[f"y: {y}" for y in data])
                                   

    # Add the layer to the figure
    fig.add_layer(data_layer)

    # Display the figure
    return fig

In [12]:
create_map_state("incentive_count_residential","number_of_solar_system_per_person")

Figure(layout=FigureLayout(height='420px'))

In [13]:
state_df["incentive_count_residential"].unique()

array([47, 16,  8, 10,  9], dtype=int64)