In [1]:
import pandas as pd
df_input = pd.read_csv('../Data/Processed/input_data.csv')
df_input

Unnamed: 0,zip,lat,lng,timezone,county_name,city,state_id,state_name,population,density,...,pct_cloudy_days_Dec,pct_cloudy_days_Feb,pct_cloudy_days_Jan,pct_cloudy_days_Jul,pct_cloudy_days_Jun,pct_cloudy_days_Mar,pct_cloudy_days_May,pct_cloudy_days_Nov,pct_cloudy_days_Oct,pct_cloudy_days_Sep
0,1001,42.06259,-72.62589,America/New_York,Hampden,Agawam,MA,Massachusetts,17312.0,581.0,...,71.65,69.15,65.30,64.25,67.2,74.10,70.35,74.70,67.25,63.65
1,1002,42.37492,-72.46210,America/New_York,Hampshire,Amherst,MA,Massachusetts,30014.0,210.5,...,69.15,67.90,64.30,63.85,67.0,73.90,68.80,73.60,66.05,63.90
2,1003,42.39192,-72.52479,America/New_York,Hampshire,Amherst,MA,Massachusetts,11357.0,6164.3,...,71.65,69.15,65.30,64.25,67.2,74.10,70.35,74.70,67.25,63.65
3,1005,42.42017,-72.10615,America/New_York,Worcester,Barre,MA,Massachusetts,5128.0,44.7,...,69.15,67.90,64.30,63.85,67.0,73.90,68.80,73.60,66.05,63.90
4,1007,42.27875,-72.40036,America/New_York,Hampshire,Belchertown,MA,Massachusetts,15005.0,110.1,...,69.15,67.90,64.30,63.85,67.0,73.90,68.80,73.60,66.05,63.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32629,99363,46.06652,-118.88846,America/Los_Angeles,Walla Walla,Wallula,WA,Washington,350.0,3.0,...,85.90,86.25,85.80,37.00,56.8,72.85,61.35,80.85,63.40,53.20
32630,99371,46.80678,-118.31679,America/Los_Angeles,Adams,Washtucna,WA,Washington,325.0,0.6,...,85.75,85.35,86.65,36.95,58.1,71.45,62.65,81.35,65.05,53.85
32631,99401,46.08744,-117.25143,America/Los_Angeles,Asotin,Anatone,WA,Washington,642.0,1.1,...,83.35,84.75,86.20,37.30,59.7,73.40,64.75,79.40,62.75,52.25
32632,99402,46.19394,-117.14736,America/Los_Angeles,Asotin,Asotin,WA,Washington,1325.0,1.8,...,83.35,84.75,86.20,37.30,59.7,73.40,64.75,79.40,62.75,52.25


In [2]:
# Use 3hr as the frequency as ran into issues with DST and spring forward (03-11-2018 2:00 is a non-existent time since it jumps from 1:59AM to 3:00AM)
naive_times = pd.date_range(start='2018', end='2019', freq='3h')

df_input_set = df_input[['lat', 'lng', 'zip', 'timezone']]

# Convert our dataframe into an array of tuples
coordinates = list(df_input_set.itertuples(index=False))

In [3]:
#!pip install pvlib
#!pip install tqdm
#!pip install tables

import pvlib
# Library for displaying progress
from tqdm.notebook import tqdm
sandia_modules = pvlib.pvsystem.retrieve_sam('SandiaMod')
sapm_inverters = pvlib.pvsystem.retrieve_sam('cecinverter')
module = sandia_modules['Canadian_Solar_CS5P_220M___2009_']
# 5ft x 4 ft, average 30 panels per household
inverter = sapm_inverters['ABB__MICRO_0_25_I_OUTD_US_208__208V_']
temperature_model_parameters = pvlib.temperature.TEMPERATURE_MODEL_PARAMETERS['sapm']['open_rack_glass_glass']
# This is something we could incorporate when running the script instead of keeping it constant
temp_air = 20
wind_speed = 0


system = {'module': module, 'inverter': inverter,
           'surface_azimuth': 180}


energies = {}

for latitude, longitude, name, timezone in tqdm(coordinates):
    # Always set the altitude to zero since we dont have the information
    altitude = 0
    times = naive_times.tz_localize(timezone)
    system['surface_tilt'] = latitude
    solpos = pvlib.solarposition.get_solarposition(times, latitude, longitude)
    dni_extra = pvlib.irradiance.get_extra_radiation(times)
    airmass = pvlib.atmosphere.get_relative_airmass(solpos['apparent_zenith'])
    pressure = pvlib.atmosphere.alt2pres(altitude)
    am_abs = pvlib.atmosphere.get_absolute_airmass(airmass, pressure)
    tl = pvlib.clearsky.lookup_linke_turbidity(times, latitude, longitude)
    cs = pvlib.clearsky.ineichen(solpos['apparent_zenith'], am_abs, tl,
                                     dni_extra=dni_extra, altitude=altitude)
    aoi = pvlib.irradiance.aoi(system['surface_tilt'], system['surface_azimuth'],
                                   solpos['apparent_zenith'], solpos['azimuth'])
    total_irrad = pvlib.irradiance.get_total_irradiance(system['surface_tilt'],
                                                           system['surface_azimuth'],
                                                           solpos['apparent_zenith'],
                                                          solpos['azimuth'],
                                                           cs['dni'], cs['ghi'], cs['dhi'],
                                                           dni_extra=dni_extra,
                                                           model='haydavies')
    tcell = pvlib.temperature.sapm_cell(total_irrad['poa_global'],
                                         temp_air, wind_speed,
                                          **temperature_model_parameters)
    effective_irradiance = pvlib.pvsystem.sapm_effective_irradiance(
            total_irrad['poa_direct'], total_irrad['poa_diffuse'],
           am_abs, aoi, module)
    dc = pvlib.pvsystem.sapm(effective_irradiance, tcell, module)
    ac = pvlib.inverter.sandia(dc['v_mp'], dc['p_mp'], inverter)
    annual_energy = ac.sum()
    energies[name] = annual_energy

energies = pd.Series(energies)

  0%|          | 0/32634 [00:00<?, ?it/s]

In [4]:
display(energies)

1001     152117.987231
1002     152110.789759
1003     152001.388434
1005     152326.278829
1007     152081.411157
             ...      
99363    148115.556681
99371    146656.020351
99401    149991.771217
99402    148591.763811
99403    146417.259912
Length: 32634, dtype: float64

In [8]:
df_input['annual_output_w_hrs'] = energies.tolist()

In [11]:
df_input.to_csv('../Data/Processed/data_combined', index=False)