In [None]:
import netCDF4 as nc

import datetime
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from scipy import stats
from sklearn.svm import SVR

%matplotlib inline

## Exploring dataset

In [None]:
ds = nc.Dataset('data/air.2020.nc')

In [None]:
ds['air']

In [None]:
def get_noaa_lat_lng(coords):
    knn_labels = np.load('knn_labels.npy', allow_pickle=True).item()
    coord_idx = knn_labels.kneighbors(np.array([coords[1], coords[0]]).reshape(1, -1), return_distance=False)[0, 0]
    ## NOAA latitude goes from North To South
    X_noaa_lng = np.linspace(0, 360 -2.5, 144)
    Y_noaa_lat = np.linspace(90, -90, 73)

    lat = None
    lng = None
    y_idx = 0
    idx = 0
    for y in Y_noaa_lat:
        y_idx += 1
        x_idx = 0
        for x in X_noaa_lng:
            x_idx += 1
            if (coord_idx == idx):
                lat = y_idx
                lng = x_idx
                break
            idx += 1
        if lat is not None:
            break

    return lat, lng

In [None]:
coords = (10.96854, -74.78132)
lat, lng = get_noaa_lat_lng(coords)

In [None]:
data = ds['air'][0:304, 0, lat, lng].data

stats.describe(data)

## Parsing data (2015-2020)

In [None]:
data = []
for year in range(2015, 2021):
    ds = nc.Dataset(f"data/air.{year}.nc")
    limit = ds['air'].shape[0]
    for day, air in enumerate(ds['air'][0:limit, 0, lat, lng].data):
        date = datetime.datetime(year, 1, 1) + datetime.timedelta(day - 1)
        data.append([date, air])

In [None]:
df = pd.DataFrame(data=data, columns=['date', 'air'])
df.drop_duplicates()

#df['date'] = pd.to_datetime(df.date)
#df = df.sort_values(by='date')

df.reset_index(inplace=True)
df.set_index('date', inplace=True)

## Ploting rolling mean

In [None]:
plt.figure(figsize=(22, 8))
plt.plot(df['air'])
plt.xlabel('Date')
plt.ylabel('Air temp')
plt.show()

close_px = df['air']
mavg = close_px.rolling(window=5).mean()

plt.figure(figsize=(22, 8))
close_px.plot(label='real data')
mavg.plot(label='mavg')
plt.xlabel('Date')
plt.ylabel('Air temp')
plt.legend();

## Converting dates

In [None]:
dates_df = df.copy()
dates_df = dates_df.reset_index()

# Store the original dates for plotting the predicitons
dates = dates_df['date']

# convert to ints
dates_df['date'] = dates_df['date'].map(mdates.date2num)

dates_df.head()

In [None]:
dates_df['date'].values.shape

## Support Vector Regression

In [None]:
dates = dates_df['date'].values
air = df['air'].values

#Convert to 1d Vector
dates = np.reshape(dates, (dates.shape[0], 1))
prices = np.reshape(air, (air.shape[0], 1))

svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(dates, air)

In [None]:
plt.figure(figsize=(22, 8))
plt.plot(dates, air, color='black', label='Data')
plt.plot(org_dates, svr_rbf.predict(dates), color='red', label='RBF model') 
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()