In [1]:
import sys, os
import pandas as pd
import numpy as np
import time

## Martijns data collection notebook

Let's take the data for 2020. The code to retrieve this data was taken from
https://developer.nrel.gov/docs/solar/nsrdb/python-examples/

In [2]:
def get_nsrdb_data(year, api_key=''):
    """
    Construct URL to read in NSRDB data into a pandas data frame. Most of this code is copied from  https://developer.nrel.gov/docs/solar/nsrdb/python-examples/
    """
    # Declare all variables as strings. Spaces must be replaced with '+', i.e., change 'John Smith' to 'John+Smith'.
    # Define the lat, long of the location and the year
    lat, lon = 34.0522, -118.243683

    # Set the attributes to extract (e.g., dhi, ghi, etc.), separated by commas.
    attributes = 'air_temperature,clearsky_dhi,clearsky_dni,clearsky_ghi,cloud_type,dew_point,dhi,dni,fill_flag,ghi,relative_humidity,solar_zenith_angle,'\
        'surface_albedo,surface_pressure,total_precipitable_water,wind_direction,wind_speed'

    if int(year)%4==0: 
        leap_year = 'true'
    else:
        leap_year = 'false'

    # Set time interval in minutes, i.e., '30' is half hour intervals. Valid intervals are 30 & 60.
    interval = '30'

    # Specify Coordinated Universal Time (UTC), 'true' will use UTC, 'false' will use the local time zone of the data.
    # NOTE: In order to use the NSRDB data in SAM, you must specify UTC as 'false'. SAM requires the data to be in the
    # local time zone.
    utc = 'false'

    # Your full name, use '+' instead of spaces.
    your_name = 'Martijn+deVries'

    # Your reason for using the NSRDB.
    reason_for_use = 'private+project'
    # Your affiliation
    your_affiliation = 'General+Assembly'
    # Your email address
    your_email = 'martijndevries91@gmail.com'
    # Please join our mailing list so we can keep you up-to-date on new developments.
    mailing_list = 'false'

    # Declare url string
    url = 'https://developer.nrel.gov/api/nsrdb/v2/solar/psm3-download.csv?wkt=POINT({lon}%20{lat})&names={year}&leap_day={leap}&interval={interval}&utc={utc}&full_name={name}&email={email}&affiliation={affiliation}&mailing_list={mailing_list}&reason={reason}&api_key={api}&attributes={attr}'.format(year=year, lat=lat, lon=lon, leap=leap_year, interval=interval, utc=utc, name=your_name, email=your_email, mailing_list=mailing_list, affiliation=your_affiliation, reason=reason_for_use, api=api_key, attr=attributes)
    # Return just the first 2 lines to get metadata:
    info = pd.read_csv(url, nrows=1)
    # See metadata for specified properties, e.g., timezone and elevation
    timezone, elevation = info['Local Time Zone'], info['Elevation']
    
    df = pd.read_csv(url, skiprows=2)
    
    return df

In [12]:
for i in range(2016, 2021):
    time.sleep(10) #polite
    print(i)

    if i == 2016:
        df = get_nsrdb_data(str(i), api_key=api_key)
    else:
        df1 = get_nsrdb_data(str(i), api_key=api_key)
        df = pd.concat([df, df1])
print(df.shape)


2016
2017
2018
2019
2020
(87696, 22)


In [13]:
df.to_csv('./martijn/martijn-nsrdb.csv', index=False)

In [14]:
df.head()

Unnamed: 0,Year,Month,Day,Hour,Minute,Temperature,Clearsky DHI,Clearsky DNI,Clearsky GHI,Cloud Type,...,DNI,Fill Flag,GHI,Relative Humidity,Solar Zenith Angle,Surface Albedo,Pressure,Precipitable Water,Wind Direction,Wind Speed
0,2016,1,1,0,0,5.0,0,0,0,0,...,0,0,0,49.0,168.95,0.128,990,0.472,55.7,4.0
1,2016,1,1,0,30,5.0,0,0,0,0,...,0,0,0,49.0,166.74,0.128,990,0.477,55.7,4.0
2,2016,1,1,1,0,5.0,0,0,0,0,...,0,0,0,48.97,162.23,0.128,990,0.482,55.7,4.1
3,2016,1,1,1,30,5.0,0,0,0,0,...,0,0,0,48.97,156.74,0.128,990,0.489,55.7,4.1
4,2016,1,1,2,0,5.0,0,0,0,0,...,0,0,0,48.98,150.83,0.128,990,0.496,56.0,4.2
