In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.vector_ar.var_model import VAR
from sklearn.metrics import mean_squared_error 
import math
%matplotlib inline

In [2]:
data = pd.read_csv("drive/Colab Notebooks/Beach_Weather_Stations_-_Automated_Sensors.csv")
data.head()

Unnamed: 0,Station Name,Measurement Timestamp,Air Temperature,Wet Bulb Temperature,Humidity,Rain Intensity,Interval Rain,Total Rain,Precipitation Type,Wind Direction,Wind Speed,Maximum Wind Speed,Barometric Pressure,Solar Radiation,Heading,Battery Life,Measurement Timestamp Label,Measurement ID
0,63rd Street Weather Station,06/09/2019 04:00:00 PM,16.6,16.6,100,0.0,0.0,70.7,0.0,354,1.4,3.4,992.4,143,356.0,11.9,06/09/2019 4:00 PM,63rdStreetWeatherStation201906091600
1,Foster Weather Station,06/09/2019 04:00:00 PM,15.67,,89,,0.0,,,336,1.3,1.5,991.9,0,,15.1,06/09/2019 4:00 PM,FosterWeatherStation201906091600
2,Oak Street Weather Station,06/09/2019 04:00:00 PM,18.5,16.6,83,0.0,0.0,28.8,0.0,63,0.6,0.9,993.2,103,320.0,12.0,06/09/2019 4:00 PM,OakStreetWeatherStation201906091600
3,63rd Street Weather Station,06/09/2019 03:00:00 PM,16.8,16.8,100,0.0,0.0,70.7,0.0,30,2.8,3.9,992.3,184,356.0,11.9,06/09/2019 3:00 PM,63rdStreetWeatherStation201906091500
4,Foster Weather Station,06/09/2019 03:00:00 PM,16.56,,89,,0.0,,,45,0.9,1.6,991.5,0,,15.1,06/09/2019 3:00 PM,FosterWeatherStation201906091500


In [3]:
data.dtypes

Station Name                    object
Measurement Timestamp           object
Air Temperature                float64
Wet Bulb Temperature           float64
Humidity                         int64
Rain Intensity                 float64
Interval Rain                  float64
Total Rain                     float64
Precipitation Type             float64
Wind Direction                   int64
Wind Speed                     float64
Maximum Wind Speed             float64
Barometric Pressure            float64
Solar Radiation                  int64
Heading                        float64
Battery Life                   float64
Measurement Timestamp Label     object
Measurement ID                  object
dtype: object

## Creating a DateTime index for the dataset using Measurement Timestamp column

In [0]:
data['Measurement Timestamp'] =  pd.to_datetime(data['Measurement Timestamp'], format='%m/%d/%Y %I:%M:%S %p')
data = data.set_index('Measurement Timestamp')
cols = ['Station Name', 'Measurement Timestamp Label', 'Measurement ID']
data.drop(cols, axis=1, inplace=True)

In [5]:
data.columns

Index(['Air Temperature', 'Wet Bulb Temperature', 'Humidity', 'Rain Intensity',
       'Interval Rain', 'Total Rain', 'Precipitation Type', 'Wind Direction',
       'Wind Speed', 'Maximum Wind Speed', 'Barometric Pressure',
       'Solar Radiation', 'Heading', 'Battery Life'],
      dtype='object')

In [6]:
data.describe()

Unnamed: 0,Air Temperature,Wet Bulb Temperature,Humidity,Rain Intensity,Interval Rain,Total Rain,Precipitation Type,Wind Direction,Wind Speed,Maximum Wind Speed,Barometric Pressure,Solar Radiation,Heading,Battery Life
count,90163.0,56638.0,90238.0,56638.0,90238.0,56638.0,56638.0,90238.0,90238.0,90238.0,90092.0,90238.0,56638.0,90238.0
mean,11.379151,9.173843,69.53473,0.191048,0.168073,154.769805,4.765264,180.528314,3.109517,4.594583,994.233139,95.267537,204.319626,13.131933
std,10.592629,9.572233,15.761854,2.056301,1.246283,211.89305,16.41943,108.924007,6.947825,7.228748,7.199156,388.733265,174.542049,1.518133
min,-29.78,-28.9,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,964.4,-100000.0,0.0,11.6
25%,3.3,2.2,59.0,0.0,0.0,13.0,0.0,84.0,1.5,2.4,989.9,0.0,1.0,11.9
50%,11.5,9.6,71.0,0.0,0.0,48.4,0.0,199.0,2.6,3.9,994.2,2.0,354.0,12.1
75%,20.56,17.4,82.0,0.0,0.0,231.5,0.0,272.0,4.1,6.0,998.7,68.0,355.0,15.1
max,35.0,28.4,100.0,183.6,63.42,1056.1,70.0,359.0,999.9,999.9,1022.7,1277.0,359.0,15.3


In [7]:
df = data.resample('H').mean()
df.fillna(method="ffill",axis=0,inplace=True)
df.dropna(inplace=True)
df.shape

(36152, 14)

### Creating train and validation split

In [0]:
train = df[:int(0.8*(len(df)))]
valid = df[int(0.8*(len(df))):]

### Training the model on training datatset

In [0]:
model = VAR(endog=train)
model_fit = model.fit()

In [10]:
model_fit.summary()

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Sun, 28, Jul, 2019
Time:                     00:47:22
--------------------------------------------------------------------
No. of Equations:         14.0000    BIC:                    34.1286
Nobs:                     28920.0    HQIC:                   34.0878
Log likelihood:      -1.06692e+06    FPE:                6.24842e+14
AIC:                      34.0685    Det(Omega_mle):     6.20322e+14
--------------------------------------------------------------------
Results for equation Air Temperature
                             coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------------------
const                          -6.259800         0.911925           -6.864           0.000
L1.Air Temperature              0.936245         0.007341          127.533           0.000
L1.Wet Bul

### make prediction on validation

In [0]:
prediction = model_fit.forecast(model_fit.y, steps=len(valid))

### converting predictions to dataframe

In [0]:
cols = df.columns

pred = pd.DataFrame(index=range(0,len(prediction)),columns=[cols])
for j in range(0,14):
    for i in range(0, len(prediction)):
        pred.iloc[i][j] = prediction[i][j]

### check mse

In [13]:
for i in cols:
    print('mse value for', i, 'is : ', mean_squared_error(pred[i], valid[i]))

mse value for Air Temperature is :  121.22346890915637
mse value for Wet Bulb Temperature is :  99.42818805431799
mse value for Humidity is :  189.84508325316594
mse value for Rain Intensity is :  5.302345891656254
mse value for Interval Rain is :  1.1268225236293972
mse value for Total Rain is :  25410.072251030466
mse value for Precipitation Type is :  305.2423581220381
mse value for Wind Direction is :  7937.462488807512
mse value for Wind Speed is :  39.06445127743078
mse value for Maximum Wind Speed is :  41.93975022880157
mse value for Barometric Pressure is :  54.768567757424016
mse value for Solar Radiation is :  12681.4296263369
mse value for Heading is :  27051.97138614105
mse value for Battery Life is :  0.1985342580521253


### Make final predictions

In [14]:
model = VAR(endog=df) 
model_fit = model.fit()
yhat = model_fit.forecast(model_fit.y, steps=1) # change the steps here for more days in the future
print(yhat)

[[1.69731814e+01 1.65090314e+01 8.94531124e+01 1.37445221e-01
  1.11039547e-01 4.99038654e+01 2.28992246e+00 2.17458949e+02
  1.34937586e+00 2.29092940e+00 9.92699223e+02 8.09535583e+01
  3.24927924e+02 1.30890484e+01]]
