In [1]:
import numpy as np
import pandas as pd
import glob
from math import radians, cos, sin, asin, sqrt, atan2

In [None]:
# CREATE LIMITED DATASET WITH REDUCED TIMESTAMPS FOR PYTHON CLUB

filenames = ["precip_all", "temp_all", "rh_all", "solrad_all"]

for fname in filenames: 
    df = pd.read_csv("./data/"+fname+".csv", index_col=0)
    df.index = pd.to_datetime(df.index)
    df = df[df.index.hour == 0] # Limit data to just midnight
    df = df[df.index.day == 1] # Limit data to the 1st day of each month
    df.to_csv("./data/"+fname+"_limited.csv")

In [2]:
# MELT ALL LIMITED DATASETS

for key in ["precip", "temp", "rh", "solrad"]:
    df = pd.read_csv("./data/"+key+"_all_limited.csv", index_col=0)
    df.index = pd.to_datetime(df.index)
    df["time"] = df.index
    df.head()
    v = [x for x in df.columns.values if x != "time"]
    df = df.melt(id_vars=["time"], value_vars=v, value_name=key, var_name="site")
    df.to_csv("./data/"+key+"_melt.csv", index=False)

In [3]:
# MERGE ALL DATASETS TOGETHER

p_df = pd.read_csv("./data/precip_melt.csv")
r_df = pd.read_csv("./data/rh_melt.csv")
s_df = pd.read_csv("./data/solrad_melt.csv")
t_df = pd.read_csv("./data/temp_melt.csv")
print(len(p_df), len(r_df), len(s_df), len(t_df))

m_df = pd.merge(p_df, r_df, on=["time", "site"])
m_df = pd.merge(m_df, s_df, on=["time", "site"])
m_df = pd.merge(m_df, t_df, on=["time", "site"])
print(len(m_df)) # (No missing values so none were dropped)

m_df.to_csv("./data/merged_all_data.csv", index=None)

85320 85320 85320 85320
85320


In [4]:
# FUNCTION TO GET VALUES FROM SITE DF INTO MAIN DF

site_df = pd.read_csv("./data/site_info.csv")
site_df['site'] = site_df['site'].astype(str)
    
def get_site_loc(site, site_df=site_df):
    s = site_df[site_df["site"] == str(site)]
    assert len(s) <= 1
    if len(s) == 0:
        return [np.nan, np.nan, np.nan, np.nan]
    else:
        lat = s["latitude"].values[0]
        lon = s["longitude"].values[0]
        c = s["city"].values[0]
        s = s["state"].values[0]
        return lat, lon, c, s

In [5]:
# ATTACH SITE VALUES TO MAIN DF

df = pd.read_csv("./data/merged_all_data.csv")
df['lat'], df['lon'], df['city'], df['state'] = zip(*df['site'].map(get_site_loc))
df.to_csv("./data/final_loc_data.csv", index=None)
df.head()

Unnamed: 0,time,site,precip,rh,solrad,temp,lat,lon,city,state
0,1961-01-01,3103,,,,,35.08,111.4,FLAGSTAFF,AZ
1,1961-02-01,3103,,,,,35.08,111.4,FLAGSTAFF,AZ
2,1961-03-01,3103,,,,,35.08,111.4,FLAGSTAFF,AZ
3,1961-04-01,3103,,,,,35.08,111.4,FLAGSTAFF,AZ
4,1961-05-01,3103,,,,,35.08,111.4,FLAGSTAFF,AZ
