In [None]:
import pprint
from pyowm.uvindexapi30 import parsers, uv_client, uvindex
from pyowm.commons.http_client import HttpClient
from pyowm.caches.nullcache import NullCache
from pyowm.utils import timeformatutils
import pickle

owm_api_key = ""

import pytz
from pytz.exceptions import UnknownTimeZoneError
from tzwhere import tzwhere
from datetime import date, datetime, timedelta, timezone

import pandas as pd
import time

In [None]:
parser = parsers.UVIndexParser()
list_parser = parsers.UVIndexListParser()
cache = NullCache()
client = uv_client.UltraVioletHttpClient(owm_api_key,
                                         HttpClient(cache=cache))

### Adjust Data Collection Setting:

Dataset to collect UV data for:

In [None]:
#dataset = "US"
dataset = "global"

Set start and end date:

In [None]:
start_date_triple = (2019, 1, 23)
end_date_triple = (2020, 4, 22)

### Loading Template and Timezone Files:

In [None]:
template_filename = dataset + "_template.xlsx"
timezones_filename = dataset + "_timezones.xlsx"
locations = pd.read_excel(template_filename)
locs_list = locations["Location"]

In [None]:
timezones_df = pd.read_excel(timezones_filename)
utc_offsets = list(timezones_df["UTC Offsets"])

In [None]:
def perform_delta(start, end, delta):
    curr = start
    while curr < end:
        yield curr
        curr += delta

In [None]:
study_date_range = [
    d.isoformat() for d in perform_delta(
        date(start_date_triple[0], start_date_triple[1], start_date_triple[2]), 
        date(end_date_triple[0], end_date_triple[1], end_date_triple[2]), 
        timedelta(days=1)
    )
]

In [None]:
def get_query_template():
    return {'lat': None, 'lon': None, 
          'start': None,
          'end': None
         }

In [None]:
lats=list(locations["Lat"])
longs = list(locations["Long"])
len(lats)

In [None]:
def get_start_and_end_for_location(lat, long, utc_offset):
    tzone = timezone(timedelta(hours = utc_offset))
        
    start_noon_local = datetime(year = start_date_triple[0], 
                                  month = start_date_triple[1], 
                                  day = start_date_triple[2], 
                                  hour = 12,
                                  tzinfo = tzone
                                 )

    end_noon_local = datetime(year = end_date_triple[0], 
                                  month = end_date_triple[1], 
                                  day = end_date_triple[2], 
                                  hour = 12,
                                  tzinfo = tzone
                                 )
    return (
          int(start_noon_local.timestamp()),
          int(end_noon_local.timestamp())
          )

In [None]:
def get_queries():
    queries = []
    for utc_offset, lat, long in zip(utc_offsets, lats, longs):
        query = get_query_template()
        query['lat'] = lat
        query['lon'] = long
        
        start, end = get_start_and_end_for_location(lat, long, utc_offset)
        query['start'] = start
        query['end'] = end
        
        queries.append(query)
    return queries

In [None]:
queries = get_queries()

In [None]:
#pickle.dump(queries, open("backup_queries.p", "wb"))

In [None]:
results = []
bad_indices = []

In [None]:
for i, q in enumerate(queries):
    try:
        results.append(client.get_uvi_history(q))
        time.sleep(0.5)
        #prevent rate of access exception. Takes 10 min for 600 locs
    except Exception as e:
        bad_indices.append(i)
        print(e)
        print("Exception arose for query ", q)
        results.append("N/A")
        

In [None]:
bad_indices

In [None]:
#pickle.dump(results, open("backup_UVdata.p", "wb"))

In [None]:
def clean_result(result):
    result_list = list_parser.parse_JSON(result)
    values = {}
    for r in result_list:
        date_string = r.get_reference_time(timeformat = 'iso')[:10]
        values[date_string] = r.get_value()
    
    return values

In [None]:
clean_results = []
for result in results:
    clean_results.append(clean_result(result))

print(len(clean_results))

In [None]:
locations.head()

In [None]:
def get_date_data(date_string):
    ordered_data = []
    for clean_result in clean_results:
        if date_string in clean_result:
            ordered_data.append(clean_result[date_string])
        else:
            ordered_data.append("")
    
    return ordered_data

In [None]:
date_columns = list(locations.columns[3:])
date_columns[:5]

In [None]:
for date_string in date_columns:
    data = get_date_data(date_string)
    locations[date_string] = data

In [None]:
output_filename = " ".join((dataset,
                           "UV Data", 
                           "-".join(map(str,start_date_triple)),
                           "to",
                           "-".join(map(str,end_date_triple))
                          ))

output_filename

In [None]:
locations.to_excel(output_filename + ".xlsx", index = False)