# WINTER PREDICTOR SCAN

Uses the "winter_predictor" oriented-object modules

This code: 
* scans all stations in selected countries
* fit a prelim regression on z90 and sea-ice in sept and oct
* if the fit is not too bad, a lasso regularization is applied
* if the R2 is high, if enough years are available, if the anomaly is remarkable THEN:
* enters the next bet in the winter_pred database

In [1]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
from winter_predictor import Predictor, StationPrediction
import numpy as np
import pymongo
import pandas as pd
from scipy.stats import norm

# PART 1 : GET PREDICTORS

The **SAME** predictor set will then be used for any target station to predict.

In [2]:
PRED = Predictor()
PRED.getPredictorsDF()

# PART 2 : GET STATION DATA

In [3]:
# Gather all relelvant station ids
mongo_host_local = 'mongodb://localhost:27017/'
mg = pymongo.MongoClient(mongo_host_local)
db = mg.GHCN
countries = pd.read_csv('input.csv').name.values
sta_df = pd.DataFrame(list(db.stations.find(filter={'country': {'$in': list(countries)}})))
all_ids = list(sta_df['station_id'].values)
len(all_ids)

3244

In [4]:
#this_id = 64606660000 # Zürich
this_id = 12263612000 # First 
STA = StationPrediction(station_id=this_id, 
                        target_months=['12', '1'],
                        X_df=PRED.X_df)
STA.queryData()
STA.getAnomalies()

In [26]:
STA.anom_df

Unnamed: 0,wyear,x,xbar,anom
0,1980,29.1,28.820658,0.279342
1,1981,27.9,28.823235,-0.923235
2,1982,28.85,28.825811,0.024189
3,1983,28.2,28.828388,-0.628388
4,1984,29.0,28.830965,0.169035
5,1985,28.85,28.833542,0.016458
6,1986,28.65,28.836119,-0.186119
7,1987,29.35,28.838695,0.511305
8,1988,29.95,28.841272,1.108728
9,1989,28.8,28.843849,-0.043849


# PART 3 : FIT ANOMALIES

In [5]:
## Quick fit first
STA.quickfitAnomalies(X_df=PRED.X_df)
STA.R2_prelim, STA.nyears_used
# if r2_prelim > 0.5 and nyears_used > 25 ... # INPUT !!!

(0.044182199257446375, 15)

In [6]:
STA.fitAnomalies(X_df=PRED.X_df)

In [7]:
res = dict({'R2': STA.R2, 
            'nyears_used': STA.nyears_used})
print(res)

{'R2': 0.0021788605506251058, 'nyears_used': 15}


In [8]:
STA.importance_df

Unnamed: 0,coef,pred,absCoef
0,-0.000921,PC2_ci_10,0.000921


# PART 4 : PREDICT FUTURE ANOMALIES

In [9]:
wyear_new = 2017 # !!! INPUT
newX_df = PRED.X_df.query('wyear==@wyear_new') # for instance
STA.predictFutureAnomalies(newX_df)
pred_anomaly = STA.predictedAnomaly
pred_anomaly

array([ 0.00123845])

# PART 5 : EXTREME ANOMALY ?

In [10]:
# Long-term trend
ltt = STA.detrend_fit
ltt_fit = ltt.predict(wyear_new)
# Predicted wyear value :
T_pred = pred_anomaly + ltt_fit
T_pred

array([[ 28.91723845]])

In [32]:
# Anomaly as defined by SwissRe :
SwissRe_df = STA.anom_df.tail(10) # take only the last 10 years
SwissRe_ltt = np.nanmean(SwissRe_df.x.values)
SwissRe_anom = T_pred-SwissRe_ltt
SwissRe_df = SwissRe_df.assign(anom_SwissRe=SwissRe_df.x-SwissRe_ltt)
SwissRe_df = SwissRe_df.dropna()

In [34]:
# Fit Normal distribution
mu, std = norm.fit(SwissRe_df.anom_SwissRe)
quantl = norm.cdf(0, loc=SwissRe_anom, scale=std).ravel()[0]
quantl

0.4405217731106621

In [13]:
# If yes, then store the result
quantl > 0.75 or quantl < 0.25

False

In [14]:
# Summary document:
pred_doc = {'wyear': wyear_new,
            'station_id': STA.metadata['station_id'],
            'country': STA.metadata['country'],
            'name': STA.metadata['name'],
            'R2': STA.R2,
            'nyears_used': STA.nyears_used,
            'SwissRe_anom': SwissRe_anom.ravel()[0],
            'pred_T': T_pred.ravel()[0],
            'quantile_anom': quantl}

In [15]:
pred_doc

{'R2': 0.0021788605506251058,
 'SwissRe_anom': 0.077238454627135411,
 'country': 'KENYA',
 'name': 'LODWAR',
 'nyears_used': 15,
 'pred_T': 28.917238454627132,
 'quantile_anom': nan,
 'station_id': 12263612000,
 'wyear': 2017}

In [16]:
mg.database_names()

['test', 'ECMWF', 'local', 'GHCN', 'winter_pred']

In [17]:
db = mg.winter_pred
db.collection_names()

['system.indexes', 'prediction']

In [18]:
db.prediction.insert_one(pred_doc)

<pymongo.results.InsertOneResult at 0x7fcb480529d8>

In [24]:
pred_col = db.prediction
pred_col.count()

0

In [23]:
#db.prediction.delete_many({})

<pymongo.results.DeleteResult at 0x7fcb438bef30>

In [20]:
import psutil
# Physical cores
psutil.cpu_count(logical = False)

2