# Data Reader Class
I want to define a data reader class to read in the irradiance data for all of different sites. 
The class needs to take in a site and get the 
* lat
* long
* year interval

It also needs to set the 
* api_key
* attributes
* time interval
* utc falg
* name
* reason_for_use
* affiliation
* email

Read in the metadata

In [1]:
import json

with open('../../data/site_metadata.json', 'r') as file:
    s = file.read()
    
sites = json.loads(s)

In [2]:
import pandas as pd
import calendar
from datetime import datetime

with open('nsrdb_api_key.txt', 'r') as file:
    api_key = file.read()

class IrradianceDataReader:
    
    def __init__(self, irradiance_site):
        
        self.site = irradiance_site

        self.lat = irradiance_site['md']['Latitude']
        self.lon = irradiance_site['md']['Longitude']
        
        self.start_year = irradiance_site['start_year']
        self.end_year = irradiance_site['end_year']

        self.attributes = 'ghi,dhi,dni,wind_speed,air_temperature,solar_zenith_angle'

        self.interval = '30'
        self.utc = 'true'
        self.name = 'Richard+Timpson'
        self.reason_for_use = 'research'
        self.affiliation = 'University+of+Utah'
        self.email = 'richardtimpson80@gmail.com'
        self.mailing_list = 'true'
        self.url = ''

    def set_url(self, year, leap_year):
        self.url = f'http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT({self.lon}%20{self.lat})&names={year}&leap_day={leap_year}&interval={self.interval}&utc={self.utc}&full_name={self.name}&email={self.email}&affiliation={self.affiliation}&mailing_list={self.mailing_list}&reason={self.reason_for_use}&api_key={api_key}&attributes={self.attributes}'
        
    def get_year_interval(self,startDate, endDate):
        start_year = datetime.strptime(startDate, '%Y-%m-%d').year
        end_year = datetime.strptime(endDate, '%Y-%m-%d').year
        return start_year, end_year


    def get_location_metadata(self):
        self.set_url('2017', 'false')
        print(self.url)
        info = pd.read_csv(self.url, nrows=1)
        return info

    def get_location_data(self, year):
        leap_year = calendar.isleap(int(year))
        leap_str = 'true' if leap_year else 'false'
        self.set_url(str(year), leap_str)
        print(self.url)
        data = pd.read_csv(self.url, skiprows=2)
        return data


Read in the metadata and initialize all of the data readers

In [3]:
with open('../../data/site_metadata.json', 'r') as file:
    s = file.read()
sites = json.loads(s)

Read in the solar irradiance site metadata

In [4]:
with open('../../data/irradiance_metadata.json', 'r') as file:
    s = file.read()

irradiance_md = json.loads(s)
irradiance_md

{'113805': {'start_year': 2015,
  'end_year': 2018,
  'md': {'Source': 'NSRDB',
   'Location ID': 113805,
   'City': '-',
   'State': '-',
   'Country': '-',
   'Latitude': 36.97,
   'Longitude': -112.98,
   'Time Zone': 0,
   'Elevation': 1510,
   'Local Time Zone': -7,
   'Clearsky DHI Units': 'w/m2',
   'Clearsky DNI Units': 'w/m2',
   'Clearsky GHI Units': 'w/m2',
   'Dew Point Units': 'c',
   'DHI Units': 'w/m2',
   'DNI Units': 'w/m2',
   'GHI Units': 'w/m2',
   'Solar Zenith Angle Units': 'Degree',
   'Temperature Units': 'c',
   'Pressure Units': 'mbar',
   'Relative Humidity Units': '%',
   'Precipitable Water Units': 'cm',
   'Wind Direction Units': 'Degrees',
   'Wind Speed': 'm/s',
   'Cloud Type -15': None,
   'Cloud Type 0': 'Clear',
   'Cloud Type 1': 'Probably Clear',
   'Cloud Type 2': 'Fog',
   'Cloud Type 3': 'Water',
   'Cloud Type 4': 'Super-Cooled Water',
   'Cloud Type 5': 'Mixed',
   'Cloud Type 6': 'Opaque Ice',
   'Cloud Type 7': 'Cirrus',
   'Cloud Type 8': '

Now that we have the irradiance metadata, we can gather all of the irradiance site data

In [5]:
sample_id = '113805'
ir_site = irradiance_md[sample_id]
ir_dr = IrradianceDataReader(ir_site)
data = ir_dr.get_location_data(2018)
data

http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT(-112.98%2036.97)&names=2018&leap_day=false&interval=30&utc=true&full_name=Richard+Timpson&email=richardtimpson80@gmail.com&affiliation=University+of+Utah&mailing_list=true&reason=research&api_key=65XmZXKU1X6yqFKJ7e4Dab5x35seXfubayeQ4D5a&attributes=ghi,dhi,dni,wind_speed,air_temperature,solar_zenith_angle


Unnamed: 0,Year,Month,Day,Hour,Minute,GHI,DHI,DNI,Wind Speed,Temperature,Solar Zenith Angle
0,2018,1,1,0,0,32,11,328,0.5,9.7,86.28
1,2018,1,1,0,30,2,3,33,0.7,8.8,91.63
2,2018,1,1,1,0,0,0,0,0.8,7.9,97.02
3,2018,1,1,1,30,0,0,0,1.0,7.4,102.59
4,2018,1,1,2,0,0,0,0,1.1,6.9,108.31
...,...,...,...,...,...,...,...,...,...,...,...
17515,2018,12,31,21,30,354,119,574,5.5,-0.4,65.80
17516,2018,12,31,22,0,298,97,562,5.5,-0.7,69.00
17517,2018,12,31,22,30,256,70,625,5.3,-1.4,72.73
17518,2018,12,31,23,0,206,46,706,5.0,-2.0,76.92


In [6]:
from datetime import timedelta, datetime

ir_dfs = {}
for site_id, site in irradiance_md.items():    
    print(f'System ID: {site_id}')
    ir_rd = IrradianceDataReader(site)

    start_year = ir_rd.start_year 
    if (int(start_year) < 2019):
        end_year = ir_rd.end_year 
        # delta = end_date - start_date
        print(f'Start year: {start_year}')
        print(f'End year: {end_year}')
        data_dfs = []
        for year in range(int(start_year), int(end_year) + 1):
            # year = int(start_year) + i
            # year = start_date + timedelta(years=i)
            year_str = str(year)
            print(f'Year loop: {year_str}')
            print("Getting data ...")
            data_df = ir_rd.get_location_data(year_str)
            data_dfs.append(data_df)
        
        if len(data_dfs) > 1:
            data_df = pd.concat(data_dfs)
        else:
            data_df = data_dfs[0]

        ir_dfs[site_id] = data_df

    else:
        ir_dfs[site_id] = None

System ID: 113805
Start year: 2015
End year: 2018
Year loop: 2015
Getting data ...
http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT(-112.98%2036.97)&names=2015&leap_day=false&interval=30&utc=true&full_name=Richard+Timpson&email=richardtimpson80@gmail.com&affiliation=University+of+Utah&mailing_list=true&reason=research&api_key=65XmZXKU1X6yqFKJ7e4Dab5x35seXfubayeQ4D5a&attributes=ghi,dhi,dni,wind_speed,air_temperature,solar_zenith_angle
Year loop: 2016
Getting data ...
http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT(-112.98%2036.97)&names=2016&leap_day=true&interval=30&utc=true&full_name=Richard+Timpson&email=richardtimpson80@gmail.com&affiliation=University+of+Utah&mailing_list=true&reason=research&api_key=65XmZXKU1X6yqFKJ7e4Dab5x35seXfubayeQ4D5a&attributes=ghi,dhi,dni,wind_speed,air_temperature,solar_zenith_angle
Year loop: 2017
Getting data ...
http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT(-112.98%2036.97)&names=2017&le

In [11]:
import os

for key, frame in ir_dfs.items():
    folder_path = f'../../data/irradiance_data/{key}'
    if not os.path.exists(folder_path):
        os.mkdir(folder_path)
    
    if frame is not None:
        frame.to_csv(f'{folder_path}/irradiance_data.csv')
    print(key, type(frame))

113805 <class 'pandas.core.frame.DataFrame'>
116439 <class 'NoneType'>
114245 <class 'pandas.core.frame.DataFrame'>
119311 <class 'pandas.core.frame.DataFrame'>
89627 <class 'pandas.core.frame.DataFrame'>
151965 <class 'pandas.core.frame.DataFrame'>
114696 <class 'pandas.core.frame.DataFrame'>
80122 <class 'pandas.core.frame.DataFrame'>
113806 <class 'pandas.core.frame.DataFrame'>
115995 <class 'pandas.core.frame.DataFrame'>
157836 <class 'pandas.core.frame.DataFrame'>


# Random Data Manipulation

In [13]:
# irradiance_site_ranges = {
#     'id': {
#         'start_year': 
#         'end_year':
#     }
# }

irradiance_metadata = {}

max_end_year = 2018

for site_dr in site_drs:
    ir_site_id = site_dr.site['irradiance_md']['Location ID']

    # check if we have already set the year for the irradiance site
    if ir_site_id in irradiance_metadata:
        # set the start year
        start_cur = int(irradiance_metadata[ir_site_id]['start_year'])
        if site_dr.start_year < start_cur:
            site[ir_site_id]['start_year'] = site_dr.start_year        
    else:
        irradiance_metadata[ir_site_id] = {
            'start_year': site_dr.start_year,
            'end_year': max_end_year,
        }
irradiance_metadata

{113805: {'start_year': 2015, 'end_year': 2018},
 116439: {'start_year': 2019, 'end_year': 2018},
 114245: {'start_year': 2018, 'end_year': 2018},
 119311: {'start_year': 2016, 'end_year': 2018},
 89627: {'start_year': 2016, 'end_year': 2018},
 151965: {'start_year': 2017, 'end_year': 2018},
 114696: {'start_year': 2018, 'end_year': 2018},
 80122: {'start_year': 2018, 'end_year': 2018},
 113806: {'start_year': 2018, 'end_year': 2018},
 115995: {'start_year': 2017, 'end_year': 2018},
 157836: {'start_year': 2017, 'end_year': 2018}}

The following code was used to gather the irradiance site metadata. I'll no longer need it as I can simply read from the stored json file

In [15]:
site_drs = []
for site in sites:
    dr = IrradianceDataReader(site)
    location_md = dr.get_location_metadata()
    site_ir_md_dict = location_md.to_dict('records')
    dr.site['irradiance_md'] = site_ir_md_dict[0]
    site_drs.append(dr)

http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT(-112.9758274%2036.9587533)&names=2017&leap_day=false&interval=30&utc=true&full_name=Richard+Timpson&email=richardtimpson80@gmail.com&affiliation=University+of+Utah&mailing_list=true&reason=research&api_key=65XmZXKU1X6yqFKJ7e4Dab5x35seXfubayeQ4D5a&attributes=ghi,dhi,dni,wind_speed,air_temperature,solar_zenith_angle
http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT(-113.27477809999999%2037.2065196)&names=2017&leap_day=false&interval=30&utc=true&full_name=Richard+Timpson&email=richardtimpson80@gmail.com&affiliation=University+of+Utah&mailing_list=true&reason=research&api_key=65XmZXKU1X6yqFKJ7e4Dab5x35seXfubayeQ4D5a&attributes=ghi,dhi,dni,wind_speed,air_temperature,solar_zenith_angle
http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT(-112.9757702%2036.9902621)&names=2017&leap_day=false&interval=30&utc=true&full_name=Richard+Timpson&email=richardtimpson80@gmail.com&affiliation=Univer

In [20]:
import simplejson

with open('../../data/site_metadata.json', 'w') as file:
    s = simplejson.dumps(sites, file, ignore_nan=True)    
    file.write(s)


Storing the metadata in the site_metadata file is the wrong thing to do. 
What I would like to have is a solar irradience site metadata file, with the 
site_metadata file simply holding the id of it's corresponding irradiance site. 
I'll have to manipulate the sites again to make that work

In [11]:
with open('../../data/site_metadata.json', 'r') as file:
    s = file.read()
sites = json.loads(s)

In [17]:
for site in sites:
    md = site['irradiance_md']
    ir_id = md['Location ID']
    irradiance_metadata[ir_id]['md'] = md

irradiance_metadata

{113805: {'start_year': 2015,
  'end_year': 2018,
  'md': {'Source': 'NSRDB',
   'Location ID': 113805,
   'City': '-',
   'State': '-',
   'Country': '-',
   'Latitude': 36.97,
   'Longitude': -112.98,
   'Time Zone': 0,
   'Elevation': 1510,
   'Local Time Zone': -7,
   'Clearsky DHI Units': 'w/m2',
   'Clearsky DNI Units': 'w/m2',
   'Clearsky GHI Units': 'w/m2',
   'Dew Point Units': 'c',
   'DHI Units': 'w/m2',
   'DNI Units': 'w/m2',
   'GHI Units': 'w/m2',
   'Solar Zenith Angle Units': 'Degree',
   'Temperature Units': 'c',
   'Pressure Units': 'mbar',
   'Relative Humidity Units': '%',
   'Precipitable Water Units': 'cm',
   'Wind Direction Units': 'Degrees',
   'Wind Speed': 'm/s',
   'Cloud Type -15': None,
   'Cloud Type 0': 'Clear',
   'Cloud Type 1': 'Probably Clear',
   'Cloud Type 2': 'Fog',
   'Cloud Type 3': 'Water',
   'Cloud Type 4': 'Super-Cooled Water',
   'Cloud Type 5': 'Mixed',
   'Cloud Type 6': 'Opaque Ice',
   'Cloud Type 7': 'Cirrus',
   'Cloud Type 8': 'Ov

In [18]:
import simplejson

with open('../../data/irradiance_metadata.json', 'w') as file:
    s = simplejson.dumps(irradiance_metadata, file, ignore_nan=True)
    file.write(s)

Now I need to rewrite the site_metadata file to not have the metadata

In [19]:
for site in sites:
    ir_id = site['irradiance_md']['Location ID']
    site['irradiance_site_id'] = ir_id
    del site['irradiance_md']

In [20]:
import simplejson

with open('../../data/site_metadata.json', 'w') as file:
    s = simplejson.dumps(sites, file, ignore_nan=True)    
    file.write(s)