# Data Analysis and Visualization

## Pulling Formatting Solar data

In [None]:
# Import Libraries
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import os
from datetime import datetime
%config IPCompleter.greedy=True


## Penetration by Sector

In [None]:
plt.style.use('seaborn')
plt.figure(figsize=(8.5,3.7))

plt.plot(final_res_df['Date Completed'], final_res_df['Project Count'], color='blue', label='Residential')
plt.plot(final_non_res_df['Date Completed'], final_non_res_df['Project Count'], color='red', label='Non-Residential')
plt.xlabel('Year-Month')
plt.ylabel('Total Projects')
plt.title("Solar Penetration in New York")
plt.legend()
plt.savefig('SolarPenetrationNY.png')
plt.show()

## Output by Sector

In [None]:
plt.style.use('seaborn')
plt.figure(figsize=(8.5,3.7))

plt.plot(final_res_df['Date Completed'], final_res_df['Expected KWh Annual Production'], color='blue', label='Residential')
plt.plot(final_non_res_df['Date Completed'], final_non_res_df['Expected KWh Annual Production'], color='red', label='Non-Residential')
plt.xlabel('Year-Month')
plt.ylabel('Total KWh')
plt.title("Solar Generation Output in New York")
plt.legend()
plt.savefig('SolarGenerationNY.png')
plt.show()

## Adoption by Type

In [None]:
plt.style.use('seaborn')
plt.figure(figsize=(8.5,3.7))
plt.plot(final_purchase_df["Date Completed"], final_purchase_df["Project Count"], c='blue', label='Purchase')

plt.plot(final_lease_df["Date Completed"], final_lease_df["Project Count"], c='green', label='Lease')

plt.plot(final_ppa_df["Date Completed"], final_ppa_df["Project Count"], c='yellow', label='PPA')
plt.xlabel('Year')
plt.ylabel('Count')
plt.title("Solar Power Installation")
plt.legend()
plt.savefig("SolarInstallation_per_Year.png")
plt.show()

## Installation by Income Distribution

In [None]:
# [RESIDENTIAL - PURCHASE] INCOME DITRIBUTION

plt.style.use('seaborn')


bins = [0, 20000, 40000, 60000,80000, 100000, 120000, 
        140000, 160000, 180000, 200000, 220000 ,240000]
labels = ['<$20k','$30k','$40k','$60k','$80k','$100k',
              '$120k','$150k','$160k', '$180k', '$190k', '$210k+']

test_df = r_purchase_df
test_df['income bins'] = pd.cut(test_df['income'],bins=bins, labels=labels)

rp_projects_per_income_bin = test_df['income bins'].value_counts()
rp_projects_per_income_bin = rp_projects_per_income_bin.to_frame()
rp_projects_per_income_bin = rp_projects_per_income_bin.reset_index()
rp_projects_per_income_bin = rp_projects_per_income_bin.rename(columns={'index':'income bin', 'income bins':'Purchase'})


x_axis = np.arange(0, len(bins))
plt.figure(figsize=(8.5,3.7))
plt.bar(rp_projects_per_income_bin['income bin'], rp_projects_per_income_bin['Purchase'])
plt.xticks(x_axis, labels)
plt.xlabel('Income Bracket')
plt.ylabel('Total Installations')
plt.title("Installation per Income in New York")
#plt.savefig("Installation_per_income.png")
plt.show()

## Adpotion Cost and Incentives

In [None]:
# Get current size
from pylab import rcParams
fig_size = plt.rcParams[“figure.figsize”]

# Set figure width to 12 and height to 9
fig_size[0] = 10
fig_size[1] = 7
plt.rcParams[“figure.figsize”] = fig_size

sns.set_style(‘dark’)
sns.set()
normalize_factor = 25
plt.scatter(years_type_purchase, percent_covered_type_purchase, count_type_purchase / normalize_factor, color=‘blue’, label = ‘Purchase’, alpha=.5)
plt.scatter(years_type_lease, percent_covered_type_lease, count_type_lease / normalize_factor, color=‘green’, label = ‘Lease’, alpha=.5)
plt.scatter(years_type_power, percent_covered_type_power, count_type_power / normalize_factor, color=‘red’, label = ‘Power Purchase Agreement’, alpha=.5)

plt.xticks(np.arange(min(years_type_purchase), max(years_type_purchase)+1, 1.0))

plt.xlabel(‘Year’)
plt.ylabel(‘Mean Incentivization of Project Costs’)
plt.title(‘Mean of Subsidies Awarded Yearly to Each Purchase Type’)
plt.ylim(0, 80)
plt.legend(framealpha=1, loc=‘best’, frameon=True)

plt.text(10, -10, wrap = True )

plt.show()
plt.tight_layout()
plt.savefig(“incent_by_year.png”)

## Per Capita Solar Penetration Heat Map

In [None]:
#Create location lat/long combos
import numpy as np
solar_byzip.replace([np.inf, -np.inf], np.nan, inplace=True)
solar_byzip.dropna(how = 'any', inplace = True)
locations = solar_byzip[["lat", "long"]]

#Options
solar_byzip['solar_weight'] = solar_byzip.solar_per_000.rank(pct = True)*8.9 + 1

# Plot Base map              
fig = gmaps.figure(map_type="TERRAIN", zoom_level=7, center=(42.9, -75))


# Create heat layer
heat_layer = gmaps.heatmap_layer(
    solar_byzip[["lat", "long"]], dissipating=False, weights= solar_byzip['solar_weight'], 
    max_intensity=50, point_radius=0.10, opacity = 0.4)

fig.add_layer(heat_layer)
                                                        
# Display figure
fig


## Per Capita Solar Penetration Symbol Layer Map

In [None]:
# Plot Base map     
from ipywidgets.embed import embed_minimal_html
fig = gmaps.figure(map_type="TERRAIN", zoom_level=7, center=(42.9, -75))

#Info Box content - Iterate through row, cresolar_byzip['solar_per_000']te string and append to pop up list
popup_list = []

for index, row in solar_byzip.iterrows():
    popup = f"Zip Code: {index}<br>\n" \
            f"Solar per 1,000 (kW): {row['solar_per_000']:.2f}<br>\n" \
            f"Population: {row['population']:,.0f}<br>\n" \
            f"Total solar (kW): {row['Total_installedkW']:,.0f}"
            
    popup_list.append(popup) 


#Create symbol layer (with hover and popups)
solar_byzip['solar_weight'] = solar_byzip.solar_per_000.rank(pct = True)*8.9 + 1
symbol_layer = gmaps.symbol_layer(
    locations, fill_color = 'rgba(0, 152, 215, 0.4)',
    stroke_color = 'rgba(0, 152, 215, 0.2)', 
    scale = [int(x) for x in solar_byzip.solar_weight.values],
    info_box_content = popup_list
)

# #Add layers
fig.add_layer(symbol_layer)

embed_minimal_html('Solar Concentration.html', views=[fig])

## Per Capita Solar Concentration Cloropleth Map

In [None]:
#A. Set color scale 
PuBu8.reverse()
color_mapper = LinearColorMapper(palette=PuBu8)

#B. Define source
source = ColumnDataSource(zipdata)

#C. Specify Figure
TOOLS = "pan,wheel_zoom,reset,hover,box_zoom,save"
p = figure(
    title="New York Solar Penetration by zip code", tools=TOOLS,
    x_axis_location=None, y_axis_location=None, 
    plot_width = 1200, plot_height = 800
)

#D. Add Polygons
p.patches('lats', 'longs', source=source,
          fill_color={'field': 'solar_rank', 'transform': color_mapper},
          fill_alpha=0.7, line_color="white", line_width=0.5)

#E. Specify Hover Options
hover = p.select_one(HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = [
    ("Zip Code:", "@zipcode"),
    ("Solar (kW) per 1,000 people", "@solar_per_000{0,0.0}"),
    ("Total Solar Installed (kW)", "@Total_installedkW{0,0}"),
    ("Population", "@population{0,0}"), 
    ("(Long, Lat)", "($x, $y)"),
]

#F. Remove grid lines
p.grid.grid_line_color = None

#G. Add Scale
#H.Show and save figure
show(p)