In [None]:
from wwo_hist import retrieve_hist_data
import pandas as pd
import numpy as np
import pickle

In [None]:
template_file_name = "NAME.xlsx"
locations = pd.read_excel(template_file_name)

In [None]:
def query_from_coords(tup):
    return str(tup[0]) + "," + str(tup[1])

In [None]:
frequency= 24 #hrs
start = "23-JAN-2019"
end = "22-APR-2020"

api_key = ""

In [None]:
lats = list(locations["lat"])
longs = list(locations["long"])
queries = [query_from_coords(item) for item in zip(lats, longs)]

In [None]:
queries[0]

In [None]:
len(queries)

In [None]:
all_results = []
bad_queries = []

for index, q in enumerate(queries):
    if q == 'nan,nan':
        all_results.append('N/A')
        continue
        
    q_list = [q]
    try:
        result = retrieve_hist_data(api_key,
                                q_list,
                                start,
                                end,
                                frequency,
                                location_label = False,
                                export_csv = True,
                                store_df = True)
        all_results.append(result[0])
    except Exception as e:
        print(e)
        tup = (index, q)
        bad_queries.append(tup)
        all_results.append(tup)

In [None]:
bad_queries

In [None]:
pickle.dump(all_results, open("all_results_save.p", "wb"))

In [None]:
len(all_results)

## Export Weather Data to Excel

In [None]:
all_results[0]

In [None]:
test = all_results[1]
test.columns.duplicated()

In [None]:
for i, r in enumerate(all_results):
    if not isinstance(r,str):
        try:
            all_results[i] = r.loc[:,~r.columns.duplicated()]
        except Exception as e:
            print(e)
            print(i)
            print(r)
            break

In [None]:
len(all_results)

In [None]:
NA_indices = [i for i,e in enumerate(all_results) if isinstance(e,str)]
NA_indices

In [None]:
new_results = []
for i,df in enumerate(all_results):
    if i in NA_indices:
        new_results.append(df)
        continue
        
    df['maxtempC']=df['maxtempC'].apply(int)
    df['mintempC']= df['mintempC'].apply(int)
    maxs = np.asarray(list(df["maxtempC"]))
    mins = np.asarray(list(df["mintempC"]))
    avg = np.add(maxs, mins)
    avg = np.true_divide(avg,2)
    df['avgtempC'] = avg.tolist()
    
    new_results.append(df)

In [None]:
template = pd.read_excel(template_file_name, index = False)
date_columns = list(template.columns[-10:])
date_columns

In [None]:
def generate_excel_tabs(feature_names, results):
    output = [pd.read_excel(template_file_name) for i in range(len(feature_names))]
    column_names = template.columns
    for i, df in enumerate(results):
        if i in NA_indices:
            continue
        for k, feature_name in enumerate(feature_names):
            values = list(df.loc[:,feature_name])
            for j, date in enumerate(date_columns):
                try:
                    output[k][date][i] = values[j]
                except IndexError as e:
                    print(feature_name)
                    print ("j:",j, " date:",date, "i:", i)
                    print(e)
    
    return output

In [None]:
results = new_results
features = ["maxtempC","mintempC","avgtempC","cloudcover","humidity","precipMM","pressure",
           "windspeedKmph","totalSnow_cm",'sunHour','moon_illumination','moonrise',
           'moonset','sunrise','sunset','DewPointC','FeelsLikeC',
            'WindChillC','WindGustKmph','visibility','winddirDegree',
          ]
len(features)

excel_output = generate_excel_tabs(features, results)

In [None]:
writer = pd.ExcelWriter('weather data.xlsx', engine='xlsxwriter')

In [None]:
for i, sheet in enumerate(excel_output):
    sheet.to_excel(writer, sheet_name = features[i], index = False)

In [None]:
writer.save()

## Testing

In [None]:
LA_coords = "34.05,-118.25"
start = "11-APR-2020"
end = "12-APR-2020"
result = retrieve_hist_data(api_keys[0],
                                [LA_coords],
                                start,
                                end,
                                frequency,
                                location_label = False,
                                export_csv = False,
                                store_df = True)

In [None]:
result[0]