---

## Imports

In [1]:
import pandas as pd
import numpy as np

import wget, os
import time
import glob

---

## Function Definitions

In [2]:
def index_to_datetime(df):
    df['datetime'] = pd.to_datetime(df['datetime']).dt.round('H')
    df.set_index('datetime', inplace=True)
    df.sort_index(inplace=True)
    return df

In [3]:
def shapes_nulls():
    print(f'sand: {sand_df.shape[0]} rows, {sand_df.isna().sum().sum()} nulls')
    print(f'rive: {rive_df.shape[0]} rows, {rive_df.isna().sum().sum()} nulls')
    print(f'redd: {redd_df.shape[0]} rows, {redd_df.isna().sum().sum()} nulls')
    print(f'fres: {fres_df.shape[0]} rows, {fres_df.isna().sum().sum()} nulls')
    return

---

## Read in Four Individual Weather Station DataFrames

In [6]:
sand_df = pd.read_csv('../data/intermediate_stages/san_diego_weather2.csv')
rive_df = pd.read_csv('../data/intermediate_stages/riverside_weather2.csv')
redd_df = pd.read_csv('../data/intermediate_stages/redding_weather2.csv')
fres_df = pd.read_csv('../data/intermediate_stages/fresno_weather2.csv')

In [7]:
shapes_nulls()

sand: 24007 rows, 0 nulls
rive: 17142 rows, 0 nulls
redd: 22201 rows, 0 nulls
fres: 21711 rows, 0 nulls


In [8]:
sand_df = index_to_datetime(sand_df)
rive_df = index_to_datetime(rive_df)
redd_df = index_to_datetime(redd_df)
fres_df = index_to_datetime(fres_df)

In [9]:
sand_df.head()

Unnamed: 0_level_0,sand_temp,sand_wind,sand_vis,sand_ceil
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-01 01:00:00,156,26,16093,22000
2016-01-01 02:00:00,144,21,16093,22000
2016-01-01 03:00:00,139,0,16093,22000
2016-01-01 04:00:00,133,0,16093,22000
2016-01-01 05:00:00,122,0,16093,22000


In [10]:
rive_df.head()

Unnamed: 0_level_0,rive_temp,rive_wind,rive_vis,rive_ceil
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-01 01:00:00,133,51,16093,22000
2016-01-01 02:00:00,128,46,16093,22000
2016-01-01 03:00:00,122,46,16093,22000
2016-01-01 04:00:00,117,41,16093,22000
2016-01-01 05:00:00,111,51,16093,22000


In [11]:
redd_df.head()

Unnamed: 0_level_0,redd_temp,redd_wind,redd_vis,redd_ceil
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-01 01:00:00,94,72,16093,22000
2016-01-01 02:00:00,83,93,16093,22000
2016-01-01 03:00:00,78,67,16093,22000
2016-01-01 04:00:00,67,72,16093,22000
2016-01-01 06:00:00,56,72,16093,22000


In [12]:
fres_df.head()

Unnamed: 0_level_0,fres_temp,fres_wind,fres_vis,fres_ceil
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-01 01:00:00,100,0,11265,22000
2016-01-01 02:00:00,78,0,11265,22000
2016-01-01 03:00:00,67,0,9656,22000
2016-01-01 04:00:00,56,15,8047,22000
2016-01-01 05:00:00,39,0,6437,22000


---

## Join DataFrames

In [13]:
weather_df = sand_df
weather_df.shape

(24007, 4)

In [14]:
weather_df = weather_df.merge(fres_df,
                              how='outer',
                              left_index = True,
                              right_index = True)
weather_df.shape

(33488, 8)

In [15]:
weather_df = weather_df.merge(rive_df,
                              how='outer',
                              left_index = True,
                              right_index = True)
weather_df.shape

(39859, 12)

In [16]:
weather_df = weather_df.merge(redd_df,
                              how='outer',
                              left_index = True,
                              right_index = True)
weather_df.shape

(51727, 16)

In [17]:
weather_df.drop_duplicates(inplace=True)
weather_df.shape

(49932, 16)

In [18]:
weather_df = weather_df[~weather_df.index.duplicated(keep='first')]
weather_df.shape

(26190, 16)

In [19]:
datetime_index = pd.date_range(start = '2016-01-01 01:00',
                               end   = '2019-04-24 07:00',
                               freq  = 'H')
len(datetime_index)

29023

In [20]:
weather_df = weather_df.reindex(datetime_index)
weather_df.fillna(method='ffill', inplace=True)

In [21]:
weather_df = weather_df.tz_localize('America/Los_Angeles',
                                    ambiguous=True,
                                    nonexistent='shift_forward')

In [22]:
weather_df.isna().sum().sum()

0

In [23]:
weather_df = weather_df.astype(int)

In [24]:
weather_df.shape

(29023, 16)

In [25]:
weather_df.head()

Unnamed: 0,sand_temp,sand_wind,sand_vis,sand_ceil,fres_temp,fres_wind,fres_vis,fres_ceil,rive_temp,rive_wind,rive_vis,rive_ceil,redd_temp,redd_wind,redd_vis,redd_ceil
2016-01-01 01:00:00-08:00,156,26,16093,22000,100,0,11265,22000,133,51,16093,22000,94,72,16093,22000
2016-01-01 02:00:00-08:00,144,21,16093,22000,78,0,11265,22000,128,46,16093,22000,83,93,16093,22000
2016-01-01 03:00:00-08:00,139,0,16093,22000,67,0,9656,22000,122,46,16093,22000,78,67,16093,22000
2016-01-01 04:00:00-08:00,133,0,16093,22000,56,15,8047,22000,117,41,16093,22000,67,72,16093,22000
2016-01-01 05:00:00-08:00,122,0,16093,22000,39,0,6437,22000,111,51,16093,22000,67,72,16093,22000


In [26]:
weather_df.to_csv('../data/ca_weather.csv')