# Winter Predictor Based on ERA-int

[Database Exploration](#db)

[Indexes](#indexes)

In [1]:
from netCDF4 import Dataset
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap, addcyclic, shiftgrid
import pymongo
from pprint import pprint
from datetime import datetime, timedelta, date

## Database exploration <a id='db'></a>

In [2]:
mongo_host_local = 'mongodb://localhost:27017/'
mg = pymongo.MongoClient(mongo_host_local)

In [3]:
pprint(mg.database_names())

['ECMWF', 'local']


In [23]:
db = mg.ECMWF
db.collection_names()

['ERAINT_grid', 'ERAINT_monthly', 'system.indexes']

In [5]:
con = db.ERAINT_grid
fo = con.find_one()

In [6]:
pprint(fo)

{'_id': ObjectId('5a088e938cb6b80d9b4c7f93'),
 'id_grid': 1,
 'loc': {'coordinates': [-180.0, 90.0], 'type': 'Point'}}


In [7]:
con.count()

115680

In [8]:
#con.drop()

In [9]:
ind = con.index_information()
pprint(ind)

{'_id_': {'key': [('_id', 1)], 'ns': 'ECMWF.ERAINT_grid', 'v': 1},
 'loc_2dsphere_id_grid_1': {'2dsphereIndexVersion': 2,
                            'key': [('loc', '2dsphere'), ('id_grid', 1)],
                            'ns': 'ECMWF.ERAINT_grid',
                            'v': 1}}


In [10]:
con_data = db.ERAINT_monthly
fo = con_data.find_one()
pprint(fo)

{'_id': ObjectId('5a089e0f8cb6b81914062592'),
 'al': 0.07,
 'ci': 1.0,
 'd2m': 235.27,
 'date': datetime.datetime(1979, 2, 1, 0, 0),
 'hcc': 0.2,
 'id_grid': 1,
 'istl1': 238.26,
 'lcc': 0.58,
 'mcc': 0.26,
 'msl': 101948.67,
 'si10': 6.1,
 'skt': 238.46,
 'sp': 101948.57,
 'sst': 271.46,
 'stl1': 238.47,
 't2m': 238.56,
 'u10': 1.18,
 'v10': -0.01,
 'year': 1979}


In [11]:
this_day = datetime(1995,1 ,1)
for doc in con_data.find({'date': this_day}).limit(2):
    pprint(doc)

{'_id': ObjectId('5a08a9bf8cb6b81914771dd2'),
 'al': 0.07,
 'ci': 1.0,
 'd2m': 239.33,
 'date': datetime.datetime(1995, 1, 1, 0, 0),
 'hcc': 0.29,
 'id_grid': 1,
 'istl1': 242.21,
 'lcc': 0.52,
 'mcc': 0.19,
 'msl': 102341.94,
 'si10': 4.96,
 'skt': 242.22,
 'sp': 102341.87,
 'sst': 271.46,
 'stl1': 242.24,
 't2m': 242.46,
 'u10': -0.5,
 'v10': 2.28,
 'year': 1995}
{'_id': ObjectId('5a08a9bf8cb6b81914771dd3'),
 'al': 0.07,
 'ci': 1.0,
 'd2m': 239.33,
 'date': datetime.datetime(1995, 1, 1, 0, 0),
 'hcc': 0.29,
 'id_grid': 2,
 'istl1': 242.21,
 'lcc': 0.52,
 'mcc': 0.19,
 'msl': 102341.94,
 'si10': 4.96,
 'skt': 242.22,
 'sp': 102341.87,
 'sst': 271.46,
 'stl1': 242.24,
 't2m': 242.46,
 'u10': -0.46,
 'v10': 2.27,
 'year': 1995}


In [27]:
# Drop the monthly data collection 
#con_data.drop()

In [12]:
fo = con_data.find_one()
pprint(fo)

{'_id': ObjectId('5a089e0f8cb6b81914062592'),
 'al': 0.07,
 'ci': 1.0,
 'd2m': 235.27,
 'date': datetime.datetime(1979, 2, 1, 0, 0),
 'hcc': 0.2,
 'id_grid': 1,
 'istl1': 238.26,
 'lcc': 0.58,
 'mcc': 0.26,
 'msl': 101948.67,
 'si10': 6.1,
 'skt': 238.46,
 'sp': 101948.57,
 'sst': 271.46,
 'stl1': 238.47,
 't2m': 238.56,
 'u10': 1.18,
 'v10': -0.01,
 'year': 1979}


In [13]:
# How many documents are in the daily data collection
con_data.count()

53675520

In [25]:
alldays = con_data.distinct(key='date')
alldays

[datetime.datetime(2017, 8, 1, 0, 0),
 datetime.datetime(2017, 7, 1, 0, 0),
 datetime.datetime(2017, 6, 1, 0, 0),
 datetime.datetime(2017, 5, 1, 0, 0),
 datetime.datetime(2017, 4, 1, 0, 0),
 datetime.datetime(2017, 3, 1, 0, 0),
 datetime.datetime(2017, 2, 1, 0, 0),
 datetime.datetime(2017, 1, 1, 0, 0),
 datetime.datetime(2016, 12, 1, 0, 0),
 datetime.datetime(2016, 11, 1, 0, 0),
 datetime.datetime(2016, 10, 1, 0, 0),
 datetime.datetime(2016, 9, 1, 0, 0),
 datetime.datetime(2016, 8, 1, 0, 0),
 datetime.datetime(2016, 7, 1, 0, 0),
 datetime.datetime(2016, 6, 1, 0, 0),
 datetime.datetime(2016, 5, 1, 0, 0),
 datetime.datetime(2016, 4, 1, 0, 0),
 datetime.datetime(2016, 3, 1, 0, 0),
 datetime.datetime(2016, 2, 1, 0, 0),
 datetime.datetime(2016, 1, 1, 0, 0),
 datetime.datetime(2015, 12, 1, 0, 0),
 datetime.datetime(2015, 11, 1, 0, 0),
 datetime.datetime(2015, 10, 1, 0, 0),
 datetime.datetime(2015, 9, 1, 0, 0),
 datetime.datetime(2015, 8, 1, 0, 0),
 datetime.datetime(2015, 7, 1, 0, 0),
 datet

## Data collection indexes <a id='indexes'></a>

In [24]:
con_data.index_information()

{'_id_': {'key': [('_id', 1)], 'ns': 'ECMWF.ERAINT_monthly', 'v': 1},
 'date_-1': {'key': [('date', -1)], 'ns': 'ECMWF.ERAINT_monthly', 'v': 1},
 'id_grid_1_date_-1': {'key': [('id_grid', 1), ('date', -1)],
  'ns': 'ECMWF.ERAINT_monthly',
  'v': 1},
 'year_1_id_grid_1': {'key': [('year', 1), ('id_grid', 1)],
  'ns': 'ECMWF.ERAINT_monthly',
  'v': 1}}

In [32]:
con_grid.find_one()

{'_id': ObjectId('5a088e938cb6b80d9b4c7f93'),
 'id_grid': 1,
 'loc': {'coordinates': [-180.0, 90.0], 'type': 'Point'}}

- The domain north of 20°N is chosen for EOF decomposition for Z70 hPa and SIC to focus on the extra-tropical variability 
- The area north of 20°S is chosen for SST to also include key regions of tropical SST variability

In [47]:
#slp_poly = [[-180,20],[-180,0],[180,0],[-180,0],[-180,20]]
slp_poly = [[-179.9,20],[-179.9,89.9],[179.9,89.9],[179.9,20],[-179.9,20]]

In [48]:
this_polygon = slp_poly
geo_qry = {"loc": 
           {"$geoWithin": {
               "$geometry": {
                   "type": "Polygon","coordinates": this_polygon,
                   "crs": {"type": "name", "properties": { "name": "urn:x-mongodb:crs:strictwinding:EPSG:4326" }}}}}}
res = con_grid.find(filter = geo_qry)
res.count()

OperationFailure: Only one simple loop is allowed in a big polygon: [ [ -179.9, 20 ], [ -179.9, 89.90000000000001 ], [ 179.9, 89.90000000000001 ], [ 179.9, 20 ], [ -179.9, 20 ] ]