<a href="https://colab.research.google.com/github/tr3nt-tayl0r/CIMIS/blob/main/CIMIS_API_raw_intake_script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/tr3nt-tayl0r/CIMIS.git

Cloning into 'CIMIS'...
remote: Enumerating objects: 10857, done.[K
remote: Counting objects: 100% (396/396), done.[K
remote: Compressing objects: 100% (364/364), done.[K
remote: Total 10857 (delta 35), reused 386 (delta 32), pack-reused 10461 (from 1)[K
Receiving objects: 100% (10857/10857), 654.94 MiB | 11.22 MiB/s, done.
Resolving deltas: 100% (2214/2214), done.
Updating files: 100% (4030/4030), done.


In [2]:
import glob
import os
import sys
import datetime
import numpy as np
import pandas as pd
import json
import requests
import matplotlib.pyplot as plt
from google.colab import userdata

In [3]:
def fix_col_names(df):
  '''corrects column names from CIMIS based on dictionary'''
  rename_dict = {'DayAirTmpMin.Value':'Tmin',
               'DayAirTmpMin.Qc':'Tmin_Qc',
               'DayAirTmpMax.Value':'Tmax',
               'DayAirTmpMax.Qc':'Tmax_Qc',
               'DayDewPnt.Value':'Tdew',
               'DayDewPnt.Qc':'Tdew_Qc',
               'DayAirTmpAvg.Value':'Tavg',
               'DayAirTmpAvg.Qc':'Tavg_Qc',
               'DayEto.Value':'ETo',
               'DayEto.Qc':'ETo_Qc',
               'DayRelHumMin.Value':'RHmin',
               'DayRelHumMin.Qc':'RHmin_Qc',
               'DayRelHumMax.Value':'RHmax',
               'DayRelHumMax.Qc':'RHmax_Qc',
               'DayRelHumAvg.Value':'RHavg',
               'DayRelHumAvg.Qc':'RHavg_Qc',
               'DayPrecip.Value':'Pr',
               'DayPrecip.Qc':'Pr_Qc',
               'DaySolRadAvg.Value':'Rs',
               'DaySolRadAvg.Qc':'Rs_Qc',
               'DayVapPresAvg.Value':'Ea',
               'DayVapPresAvg.Qc':'Ea_Qc',
               'DayWindSpdAvg.Value':'u2',
               'DayWindSpdAvg.Qc':'u2_Qc',
               'Julian':'Doy'}
  df.rename(columns=rename_dict, inplace=True)
  return df

In [4]:
# http://et.water.ca.gov/api/station

station_url = 'http://et.water.ca.gov/api/station'
res = requests.get(station_url)
response = json.loads(res.text)
payload = response['Stations']
df = pd.json_normalize(payload)

rename_dict = {'HmsLatitude':'Lat',
               'HmsLongitude':'Long'}
df.rename(columns=rename_dict, inplace=True)

# -120º6'46W / -120.112910
def split_and_convert(row):
  return float(row.split(' / ')[1].strip())

df['Lat'] = df['Lat'].apply(split_and_convert)
df['Long'] = df['Long'].apply(split_and_convert)

if os.getcwd() != '/content/CIMIS':
  os.chdir("CIMIS")
if not os.path.exists("./stations"):
  os.mkdir("./stations")

dir = '/content/CIMIS/stations/'
file = f'{dir}stations.csv'
df.to_csv(file, index=False)
#print(df)

file = 'stations.csv'
df = pd.read_csv(f'{dir}{file}')
df_meta = df[(df['IsActive'] == True) & (df['IsEtoStation'] == True)]

station_ids = df_meta['StationNbr'].tolist()
print(station_ids)

dir = '/content/CIMIS/stations/'
file = f'{dir}stations-meta.csv'
df_meta.to_csv(file, index=False)


[2, 6, 7, 12, 13, 15, 35, 39, 41, 43, 44, 47, 52, 64, 68, 70, 71, 75, 77, 78, 80, 83, 84, 87, 90, 91, 99, 103, 104, 105, 106, 107, 113, 114, 117, 124, 125, 126, 129, 131, 139, 140, 144, 146, 147, 150, 151, 152, 153, 157, 158, 160, 163, 165, 170, 171, 173, 174, 175, 178, 179, 181, 182, 184, 187, 191, 192, 193, 194, 195, 197, 199, 200, 202, 204, 206, 207, 208, 209, 210, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 235, 236, 237, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 256, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268]


In [7]:
print(os.getcwd())
if os.getcwd() != '/content/CIMIS':
  os.chdir("CIMIS")
dir = "./CIMIS_Project_Data/CIMIS_daily_raw_data/"

cimis_api = "http://et.water.ca.gov/api"
api_key = userdata.get('cimis_key')
token = userdata.get('github_token')

# list of possible data items found here: https://et.water.ca.gov/Rest/Index
data_items = '''day-eto,day-precip,day-sol-rad-avg,day-vap-pres-avg,day-air-tmp-max,day-air-tmp-min,day-air-tmp-avg,day-rel-hum-max,day-rel-hum-min,day-rel-hum-avg,day-dew-pnt,day-wind-spd-avg,day-wind-run,day-soil-tmp-avg'''

#it may be necessary to manually get stations 125, 208, & 251, because the API seems to get hung up on these stations
#station_ids = [125, 208, 251] #remember to reset station_ids to active eto stations from station-meta.csv or output of previous cell

for id in station_ids:
  if not os.path.exists(f'{dir}station{id}'):
    os.makedirs(f'{dir}station{id}')
  for year in range(2003,2024):
    start_date = f'{year}-01-01'
    end_date = f'{year}-12-31'
    rest_url = f'{cimis_api}/data?appKey={api_key}&targets={id}&startDate={start_date}&endDate={end_date}&dataItems={data_items}&unitOfMeasure=M'
    res = requests.get(rest_url)
    response = json.loads(res.text)
    payload = response['Data']['Providers'][0]['Records']
    df = pd.json_normalize(payload)
    df = df[df.columns.drop(list(df.filter(regex='Unit')))]
    filename = f'{dir}station{id}/station_id{id}_cimis_daily_raw{year}.csv'
    print(filename)
    df.to_csv(filename, index=False)
  today = pd.Timestamp.now().date()
  start_date = f'2024-01-01'
  end_date = f'{today}'
  rest_url = f'{cimis_api}/data?appKey={api_key}&targets={id}&startDate={start_date}&endDate={end_date}&dataItems={data_items}&unitOfMeasure=M'
  res = requests.get(rest_url)
  response = json.loads(res.text)
  payload = response['Data']['Providers'][0]['Records']
  df = pd.json_normalize(payload)
  df = df[df.columns.drop(list(df.filter(regex='Unit')))]

  filename = f'{dir}station{id}/station_id{id}_cimis_daily_raw2024.csv'
  print(filename)
  df.to_csv(filename, index=False)


/content/CIMIS


KeyboardInterrupt: 

In [None]:
dir = "/content/CIMIS/CIMIS_Project_Data/CIMIS_daily_raw_data/"
station_dir = "/content/CIMIS/stations/"

df_stations = pd.read_csv(f'{station_dir}stations.csv')

for id in station_ids:
  for year in range(2003,2025):
    file = f'{dir}station{id}/station_id{id}_cimis_daily_raw{year}.csv'
    if os.path.exists(file):
      if os.stat(file).st_size > 1:
        df = pd.read_csv(file)
        fix_col_names(df)

        df_id = df_stations[df_stations['StationNbr'] == id]

        elev = df_id['Elevation'].values[0]
        df.loc[:,'Elev'] = elev

        lat = df_id['Lat'].values[0]
        df.loc[:,'Lat'] = lat

        longtd = df_id['Long'].values[0]
        df.loc[:,'Long'] = longtd

        df.to_csv(file, index=False)
      else:
        continue
    else:
      continue


In [None]:
if os.getcwd() != '/content/CIMIS':
  os.chdir("CIMIS")
token = userdata.get('github_token')
!git --version
!git config --global user.email "tretaylor@csumb.edu"
!git config --global user.name "tr3nt-tayl0r"
!git add -A
!git commit -m "Commiting raw files"
!git remote rm origin
!git remote add origin https://tr3nt-tayl0r:{token}@github.com/tr3nt-tayl0r/CIMIS.git
!git push --set-upstream origin main