# Data Reader Class
I want to define a data reader class to read in the irradiance data for all of different sites. 
The class needs to take in a site and get the 
* lat
* long
* year interval

It also needs to set the 
* api_key
* attributes
* time interval
* utc falg
* name
* reason_for_use
* affiliation
* email

Read in the metadata

In [1]:
import json

with open('../../data/site_metadata.json', 'r') as file:
    s = file.read()
    
sites = json.loads(s)

In [2]:
import pandas as pd
from datetime import datetime

with open('nsrdb_api_key.txt', 'r') as file:
    api_key = file.read()

class IrradianceDataReader:
    
    def __init__(self, site):
        
        self.site = site

        self.lat = site['location']['lat']
        self.lon = site['location']['long']
        
        startDate = site['time_interval']['startDate']
        endDate = site['time_interval']['endDate']
        
        self.start_year, self.end_year = self.get_year_interval(startDate, endDate)

        self.attributes = 'ghi,dhi,dni,wind_speed,air_temperature,solar_zenith_angle'

        self.interval = '30'
        self.utc = 'true'
        self.name = 'Richard+Timpson'
        self.reason_for_use = 'research'
        self.affiliation = 'University+of+Utah'
        self.email = 'richardtimpson80@gmail.com'
        self.mailing_list = 'true'
        self.url = ''

    def set_url(self, year, leap_year):
        self.url = f'http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT({self.lon}%20{self.lat})&names={year}&leap_day={leap_year}&interval={self.interval}&utc={self.utc}&full_name={self.name}&email={self.email}&affiliation={self.affiliation}&mailing_list={self.mailing_list}&reason={self.reason_for_use}&api_key={api_key}&attributes={self.attributes}'
        
    def get_year_interval(self,startDate, endDate):
        start_year = datetime.strptime(startDate, '%Y-%m-%d').year
        end_year = datetime.strptime(endDate, '%Y-%m-%d').year
        return start_year, end_year


    def get_location_metadata(self):
        self.set_url('2017', 'false')
        print(self.url)
        info = pd.read_csv(self.url, nrows=1)
        return info


Read in the metadata and initialize all of the data readers

In [6]:
with open('../../data/site_metadata.json', 'r') as file:
    s = file.read()
sites = json.loads(s)

site_drs = [IrradianceDataReader(site) for site in sites]

Once we have the metadata we can find the ranges for each individual irradiance site
The max year will always be capped at 2018

Now we'll want to gather all of the solar irradiance information for each site. 
This is going to be a little funky, because the metadata information is stored with each 
solar site. 

# Random Data Manipulation

In [13]:
# irradiance_site_ranges = {
#     'id': {
#         'start_year': 
#         'end_year':
#     }
# }

irradiance_metadata = {}

max_end_year = 2018

for site_dr in site_drs:
    ir_site_id = site_dr.site['irradiance_md']['Location ID']

    # check if we have already set the year for the irradiance site
    if ir_site_id in irradiance_metadata:
        # set the start year
        start_cur = int(irradiance_metadata[ir_site_id]['start_year'])
        if site_dr.start_year < start_cur:
            site[ir_site_id]['start_year'] = site_dr.start_year        
    else:
        irradiance_metadata[ir_site_id] = {
            'start_year': site_dr.start_year,
            'end_year': max_end_year,
        }
irradiance_metadata

{113805: {'start_year': 2015, 'end_year': 2018},
 116439: {'start_year': 2019, 'end_year': 2018},
 114245: {'start_year': 2018, 'end_year': 2018},
 119311: {'start_year': 2016, 'end_year': 2018},
 89627: {'start_year': 2016, 'end_year': 2018},
 151965: {'start_year': 2017, 'end_year': 2018},
 114696: {'start_year': 2018, 'end_year': 2018},
 80122: {'start_year': 2018, 'end_year': 2018},
 113806: {'start_year': 2018, 'end_year': 2018},
 115995: {'start_year': 2017, 'end_year': 2018},
 157836: {'start_year': 2017, 'end_year': 2018}}

The following code was used to gather the irradiance site metadata. I'll no longer need it as I can simply read from the stored json file

In [15]:
site_drs = []
for site in sites:
    dr = IrradianceDataReader(site)
    location_md = dr.get_location_metadata()
    site_ir_md_dict = location_md.to_dict('records')
    dr.site['irradiance_md'] = site_ir_md_dict[0]
    site_drs.append(dr)

http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT(-112.9758274%2036.9587533)&names=2017&leap_day=false&interval=30&utc=true&full_name=Richard+Timpson&email=richardtimpson80@gmail.com&affiliation=University+of+Utah&mailing_list=true&reason=research&api_key=65XmZXKU1X6yqFKJ7e4Dab5x35seXfubayeQ4D5a&attributes=ghi,dhi,dni,wind_speed,air_temperature,solar_zenith_angle
http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT(-113.27477809999999%2037.2065196)&names=2017&leap_day=false&interval=30&utc=true&full_name=Richard+Timpson&email=richardtimpson80@gmail.com&affiliation=University+of+Utah&mailing_list=true&reason=research&api_key=65XmZXKU1X6yqFKJ7e4Dab5x35seXfubayeQ4D5a&attributes=ghi,dhi,dni,wind_speed,air_temperature,solar_zenith_angle
http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT(-112.9757702%2036.9902621)&names=2017&leap_day=false&interval=30&utc=true&full_name=Richard+Timpson&email=richardtimpson80@gmail.com&affiliation=Univer

In [20]:
import simplejson

with open('../../data/site_metadata.json', 'w') as file:
    s = simplejson.dumps(sites, file, ignore_nan=True)    
    file.write(s)


Storing the metadata in the site_metadata file is the wrong thing to do. 
What I would like to have is a solar irradience site metadata file, with the 
site_metadata file simply holding the id of it's corresponding irradiance site. 
I'll have to manipulate the sites again to make that work

In [11]:
with open('../../data/site_metadata.json', 'r') as file:
    s = file.read()
sites = json.loads(s)

In [17]:
for site in sites:
    md = site['irradiance_md']
    ir_id = md['Location ID']
    irradiance_metadata[ir_id]['md'] = md

irradiance_metadata

{113805: {'start_year': 2015,
  'end_year': 2018,
  'md': {'Source': 'NSRDB',
   'Location ID': 113805,
   'City': '-',
   'State': '-',
   'Country': '-',
   'Latitude': 36.97,
   'Longitude': -112.98,
   'Time Zone': 0,
   'Elevation': 1510,
   'Local Time Zone': -7,
   'Clearsky DHI Units': 'w/m2',
   'Clearsky DNI Units': 'w/m2',
   'Clearsky GHI Units': 'w/m2',
   'Dew Point Units': 'c',
   'DHI Units': 'w/m2',
   'DNI Units': 'w/m2',
   'GHI Units': 'w/m2',
   'Solar Zenith Angle Units': 'Degree',
   'Temperature Units': 'c',
   'Pressure Units': 'mbar',
   'Relative Humidity Units': '%',
   'Precipitable Water Units': 'cm',
   'Wind Direction Units': 'Degrees',
   'Wind Speed': 'm/s',
   'Cloud Type -15': None,
   'Cloud Type 0': 'Clear',
   'Cloud Type 1': 'Probably Clear',
   'Cloud Type 2': 'Fog',
   'Cloud Type 3': 'Water',
   'Cloud Type 4': 'Super-Cooled Water',
   'Cloud Type 5': 'Mixed',
   'Cloud Type 6': 'Opaque Ice',
   'Cloud Type 7': 'Cirrus',
   'Cloud Type 8': 'Ov

In [18]:
import simplejson

with open('../../data/irradiance_metadata.json', 'w') as file:
    s = simplejson.dumps(irradiance_metadata, file, ignore_nan=True)
    file.write(s)

Now I need to rewrite the site_metadata file to not have the metadata

In [19]:
for site in sites:
    ir_id = site['irradiance_md']['Location ID']
    site['irradiance_site_id'] = ir_id
    del site['irradiance_md']

In [20]:
import simplejson

with open('../../data/site_metadata.json', 'w') as file:
    s = simplejson.dumps(sites, file, ignore_nan=True)    
    file.write(s)