# Prepare station values to be predicted

* For Moe

In [1]:
import matplotlib.pyplot as plt
from winter_predictor import Predictor, StationPrediction
import numpy as np
import pymongo
import pandas as pd
from scipy.stats import norm

# Get Station Data

In [2]:
# Gather all relelvant station ids
mongo_host_local = 'mongodb://localhost:27017/'
mg = pymongo.MongoClient(mongo_host_local)
db = mg.GHCN
countries = pd.read_csv('input.csv').name.values
sta_df = pd.DataFrame(list(db.stations.find(filter={'country': {'$in': list(countries)}})))
all_ids = list(sta_df['station_id'].values)
len(all_ids)

3244

In [3]:
sta_df.head()

Unnamed: 0,_id,station_id,loc,country,country_id,wmo_id,elev,name,landcover,popclass
0,5abe7d8523c8cceb0c3f61b9,12263612000,"{'type': 'Point', 'coordinates': [5.62, 3.12]}",KENYA,122,63612,515.0,LODWAR,WARM GRASS/SHRUB,A
1,5abe7d8523c8cceb0c3f61bb,12263619000,"{'type': 'Point', 'coordinates': [9.05, 3.53]}",KENYA,122,63619,1097.0,MOYALE,SUCCULENT THORNS,B
2,5abe7d8523c8cceb0c3f61bd,12263624000,"{'type': 'Point', 'coordinates': [1.87, 3.93]}",KENYA,122,63624,231.0,MANDERA,WARM GRASS/SHRUB,A
3,5abe7d8523c8cceb0c3f61bf,12263661000,"{'type': 'Point', 'coordinates': [5.0, 1.02]}",KENYA,122,63661,1875.0,KITALE,WARM GRASS/SHRUB,C
4,5abe7d8523c8cceb0c3f61c1,12263671000,"{'type': 'Point', 'coordinates': [0.07, 1.75]}",KENYA,122,63671,244.0,WAJIR,SUCCULENT THORNS,A


In [6]:
def getStationAnom(id, target_months=['12', '1']):
    '''
    Queries station data and returns the average over the target_months,
    then detrends the time series and calculates the yearly anomalies.
    ---
    id -- ind Station id
    target_months -- list of month is string of int
    '''
    STA = StationPrediction(station_id=id, 
                            target_months=target_months,
                            X_df=None)
    STA.queryData()
    try:
        STA.getAnomalies()
    except Exception as e:
        df = None
    else:
        df = STA.anom_df
        df = df.assign(station_id=id)
    return(df)

# example:
df = getStationAnom(id=12263671000,
                  target_months=['12', '1'])
df.head()

Unnamed: 0,wyear,x,xbar,anom,station_id
0,1980,28.1,28.915909,-0.815909,12263671000
1,1981,30.05,28.914545,1.135455,12263671000
2,1982,29.55,28.913182,0.636818,12263671000
3,1983,28.15,28.911818,-0.761818,12263671000
4,1984,29.3,28.910455,0.389545,12263671000


In [7]:
dfList = list(map(lambda x: getStationAnom(id=x, target_months=['12', '1']), 
               sta_df['station_id']))

In [8]:
res = pd.concat(dfList).reset_index(drop=True)
res

Unnamed: 0,wyear,x,xbar,anom,station_id
0,1980,29.10,28.820658,0.279342,12263612000
1,1981,27.90,28.823235,-0.923235,12263612000
2,1982,28.85,28.825811,0.024189,12263612000
3,1983,28.20,28.828388,-0.628388,12263612000
4,1984,29.00,28.830965,0.169035,12263612000
...,...,...,...,...,...
77020,2014,5.80,5.151974,0.648026,65103917000
77021,2015,4.50,5.179798,-0.679798,65103917000
77022,2016,6.75,5.207621,1.542379,65103917000
77023,2017,6.50,5.235445,1.264555,65103917000


In [9]:
# Save it
res.to_csv('/home/dmasson/temp/station_anoms.csv')

In [10]:
sta_df.to_csv('/home/dmasson/temp/station_ids.csv')