# Using Betas to Predict given input

Katrina Wheelan, Updated 6.2.20

### Importing the necessary packages

In [3]:
import warnings
warnings.filterwarnings('ignore')
import xarray as xr
import sklearn
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression, Lasso, LinearRegression
from sklearn.model_selection import ShuffleSplit, cross_val_score, GridSearchCV
from sklearn.metrics import log_loss
import matplotlib.pyplot as plt
import os

## Input Coordinates

In [4]:
#Adjust these if needed; takes a single cell
lat = 38.125
lon = -101.875

obs_var='tmax'

## Import Data

In [6]:
# input data
X_all = xr.open_dataset('/glade/work/kwheelan/linear_data/X_all_temp_bymonth.nc')

# obs data
obs_file = "/glade/p/cisl/risc/narccap/obs/gridMET/common/DCA/tmax.gridMET.NAM-22i.SGP.nc"
obs = xr.open_mfdataset(obs_file).sel(time = slice('1979-01-01','2014-12-31'))
Y_all = obs.sel(lat = lat, lon = lon, method = 'nearest').sel(time = slice('1980-01-01','2014-12-31'))                                     

## Import Betas

In [53]:
ROOT = '/glade/work/kwheelan/Linear_downscaling/betas'

def filepath(lat, lon, betas):
    return os.path.join(ROOT, '{}_tmax_{}_{}.nc'.format(betas, str(lat),str(lon)))
    
preds_to_keep, linear_betas = list(pd.read_csv(filepath(lat, lon, "coefMatrix"))["Unnamed: 0"]), pd.read_csv(filepath(lat, lon, "coefMatrix")).drop("Unnamed: 0", axis=1)
lasso_preds, LASSO_month_betas = list(pd.read_csv(filepath(lat, lon,'betas_LASSO_list'))["Unnamed: 0"]), pd.read_csv(filepath(lat, lon,'betas_LASSO_list')).drop("Unnamed: 0", axis=1)
lasso_preds_annual, LASSO_annual_betas = list(pd.read_csv(filepath(lat, lon, 'betas_LASSO_annual_list'))["Unnamed: 0"]), pd.read_csv(filepath(lat, lon,'betas_LASSO_annual_list')).drop("Unnamed: 0", axis=1)
lasso_preds = list(lasso_preds)[:-1] + ['constant']
lasso_preds_annual = list(lasso_preds_annual)[:-1] + ['constant']

## Make predictions

In [56]:
#Automatically uses all three models to predict for all months
#(Takes a while to run)

X_all_cp = X_all
X_all_cp['time'] = X_all_cp['time-copy'].dt.month
x_all_lasso = [np.matrix([X_all_cp.sel(time = month)[key].values for key in lasso_preds]).transpose() for month in range(1,13)]
x_all_hand = [np.matrix([X_all_cp.sel(time = month)[key].values for key in preds_to_keep]).transpose() for month in range(1,13)]
x_all_lasso_annual = [np.matrix([X_all_cp.sel(time = month)[key].values for key in lasso_preds_annual]).transpose() for month in range(1,13)]

def knit_data(x_all, betas):
    for month in range(1,13):
        X_month = X_all_cp.sel(time=month)
        X_month["preds"] = X_month['time'] + X_month['lat']
        X_month["preds"]= ({'time' : 'time'}, pd.DataFrame(np.matmul(x_all[month-1], betas[monthsFull[month-1]])).values[0])
        X_month['time'] = X_month['time-copy']
        if month == 1:
            X_preds = 0
            X_preds = X_month
        else:
            X_preds = xr.concat([X_preds, X_month], dim = "time")

    return X_preds.sortby('time')

In [57]:
monthsAbrev = ['Jan','Feb', 'Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
monthsFull = ['January','February', 'March','April','May','June','July','August','September','October','November','December']

X_preds_lin = knit_data(x_all_hand, linear_betas)
X_preds_lasso = knit_data(x_all_lasso, LASSO_month_betas)
X_preds_lasso_annual = knit_data(x_all_lasso_annual, LASSO_annual_betas)

In [59]:
#save location
location = '/glade/work/kwheelan/datasets'

#saves the original predictions (no stochastic componenet as a netCDF)
for model in ["lin", 'lasso', 'lasso_annual']:
    fp = '/glade/work/kwheelan/linear_data/finalPreds_{}_tmax_{}_{}.nc'.format(model, str(lat),str(lon))
    try:
        os.remove(fp)
    except: pass
    eval("X_preds_" + model).to_netcdf(fp)