In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib

### Prepare dataframe

In [2]:
filename = 'data_2011_2017.csv'
df = pd.read_csv(filename, header=0)
df = df.drop(columns=['PRCP', 'SNWD'])
df.dropna(inplace=True)
df.DATE = pd.to_datetime(df.DATE)
df.DATE = df.DATE.apply(lambda x: x.timetuple().tm_yday)

In [3]:
df.rename(columns={'DATE':'YDAY'}, inplace=True)

### Train Test Split

In [4]:
train, test = train_test_split(df.drop(columns=['NAME', 'STATION']))

### TAVG Regressor

In [5]:
tavg_rgr = make_pipeline(PolynomialFeatures(degree=2), Ridge())

In [6]:
tavg_x_train, tavg_y_train = train.YDAY.values.reshape(-1, 1), train.TAVG.values.reshape(-1, 1)
tavg_x_test, tavg_y_test = test.YDAY.values.reshape(-1, 1), test.TAVG.values.reshape(-1, 1)

In [7]:
tavg_rgr.fit(tavg_x_train, tavg_y_train)
tavg_rgr.score(tavg_x_test, tavg_y_test)

0.8894440216610064

In [8]:
tavg_rgr_filename = 'tavg_regressor.rgr'
joblib.dump(tavg_rgr, tavg_rgr_filename)

['tavg_regressor.rgr']

### TMIN Regressor

In [9]:
tmin_rgr = make_pipeline(PolynomialFeatures(degree=2), Ridge())

In [10]:
tmin_x_train, tmin_y_train = train.YDAY.values.reshape(-1, 1), train.TMIN.values.reshape(-1, 1)
tmin_x_test, tmin_y_test = test.YDAY.values.reshape(-1, 1), test.TMIN.values.reshape(-1, 1)

In [11]:
tmin_rgr.fit(tmin_x_train, tmin_y_train)
tmin_rgr.score(tmin_x_test, tmin_y_test)

0.8981898712539317

In [12]:
tmin_rgr_filename = 'tmin_regressor.rgr'
joblib.dump(tmin_rgr, tmin_rgr_filename)

['tmin_regressor.rgr']

### TMAX Regressor

In [13]:
tmax_rgr = make_pipeline(PolynomialFeatures(degree=2), Ridge())

In [14]:
tmax_x_train, tmax_y_train = train.YDAY.values.reshape(-1, 1), train.TMAX.values.reshape(-1, 1)
tmax_x_test, tmax_y_test = test.YDAY.values.reshape(-1, 1), test.TMAX.values.reshape(-1, 1)

In [15]:
tmax_rgr.fit(tmax_x_train, tmax_y_train)
tmax_rgr.score(tmax_x_test, tmax_y_test)

0.8037318452087875

In [16]:
tmax_rgr_filename = 'tmax_regressor.rgr'
joblib.dump(tmax_rgr, tmax_rgr_filename)

['tmax_regressor.rgr']

#### and that's all :)

#### now let's try to load the regressor and make predictions.

### Parse string to date and then dayofyear

In [17]:
date_str = '2019-04-05'
date = datetime.strptime(date_str, '%Y-%m-%d')
yday = pd.Series(date.timetuple().tm_yday).values.reshape(-1, 1)
yday

array([[95]])

### TAVG Prediction

In [18]:
tavg_rgr_loaded = joblib.load('tavg_regressor.rgr')

In [19]:
tavg_rgr_loaded.predict(yday)

array([[27.01403801]])

### TMIN Prediction

In [20]:
tmin_rgr_loaded = joblib.load('tmin_regressor.rgr')

In [21]:
tmin_rgr_loaded.predict(yday)

array([[20.74581885]])

### TMAX Prediction

In [22]:
tmax_rgr_loaded = joblib.load('tmax_regressor.rgr')

In [24]:
tmax_rgr_loaded.predict(yday)