# Start
This notebook will display the spatial patterns of pressure.

In [None]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
from domain.base import Station
import domain.mongodb_engine as mongodb

from helpers.utils import (
    get_station_coordinates,
    select_near,
    add_alias
)
%matplotlib inline

So let's load up data from the MongoDB running in the back..

In [None]:
# Load data from database
module_list = {
    'thermo_module': [
        'temperature',
        'pressure'
    ],
}

db_connector = mongodb.MongoDBConnector()
stations = list(db_connector.db.stations.find())
stations = [Station.from_dict(s) for s in stations]
print("Found %d stations in area." % len(stations))

Now to convert each station's data to dataframes:

In [None]:
from joblib import Parallel, delayed

def convert_module(station, attribute):
    module = getattr(station, attribute)
    df = pd.DataFrame(module).sort_values('valid_datetime')\
        .drop_duplicates().set_index('valid_datetime', drop=True)

    if not df.empty:
        df = df.resample('10T').mean().interpolate(method='time', limit=2)
    setattr(station, attribute, df)
    return station
    
    
def valid_module(station, attribute):
    return hasattr(station, attribute) and getattr(station, attribute) is not None


def create_dataframe_of_attribute(data, attribute):
    assert isinstance(data, list)

    data = Parallel(n_jobs=8)(
        delayed(convert_module)(station, attribute) 
        for station in data 
        if valid_module(station, attribute)
    )
    return data

stations = create_dataframe_of_attribute(stations, 'thermo_module')

In order to do large scale analysis and plotting, we need to convert all the separate station dataframes into a single large dataframe. We will do that now.

In [None]:
def create_dataframe_of_stations(stations, module):
    return pd.concat(
        [
            getattr(station, module).add_suffix('_' + str(station_id)) 
            for station_id, station in enumerate(stations)
            if valid_module(station, module)
        ], 
        axis=1, join='outer',
    )

data = create_dataframe_of_stations(stations, 'thermo_module')

Great! Now let's plot a few time series..

In [None]:
station_id = 101

# Temperature time series
plt.figure(figsize=(12, 6))
data['pressure_' + str(station_id)].plot()
plt.title("Station {}".format(station_id))
plt.legend(loc='upper left')
plt.show()


In [None]:
# Temperature derivative
old_columns = [c for c in data.columns.values if c.startswith('temperature')]
new_columns = ['temperature-gradient_' + c.split('_')[1] for c in old_columns]
gradients = data[old_columns].diff().rolling(window=6).mean().rename(columns=dict(zip(old_columns, new_columns)))
data = pd.concat([data, gradients], axis=1, join='inner')

The last part is displaying variables on a map..

In [None]:
def make_map(data, stations, element, index):
    assert isinstance(data, pd.DataFrame)
    
    columns = [c for c in data.columns.values if c.startswith(element + '_')]
    vals = data.loc[index, columns]
    lats = [s.latitude for s in stations]
    lons = [s.longitude for s in stations]
    
    f = plt.figure(figsize=(15, 10))
    plt.title(element + ' - ' + str(index))
    if element == 'pressure':
        make_pressure_map(plt.gca(), lats, lons, vals)
    elif element == 'temperature':
        make_temperature_map(plt.gca(), lats, lons, vals)
    elif element == 'temperature-gradient':
        make_gradient_map(plt.gca(), lats, lons, vals)
    return f


def make_gradient_map(ax, lats, lons, vals):
    scatter(ax, lats, lons, vals, -.15, .15)
    
def make_temperature_map(ax, lats, lons, vals):
    scatter(ax, lats, lons, vals, 0, 15)
    
def make_pressure_map(ax, lats, lons, vals):
    scatter(ax, lats, lons, vals, 950, 1050)

def scatter(ax, lats, lons, vals, vmin=None, vmax=None):
    sc = ax.scatter(
        lons, lats,
        c=vals,
        linewidths=0,
        vmin=vmin, vmax=vmax
    )
    plt.colorbar(sc)
    
index = '2016-12-14 23:40'
element = 'temperature'
f = make_map(data, stations, element, index)


Lastly, create a series of maps for using in a moving picture..

In [None]:
from datetime import datetime, timedelta

def date_range(start_date, end_date, minute_increment=30):
    d = start_date
    while d <= end_date:
        yield d
        d += timedelta(minutes=minute_increment)


start_date = data.index[0]
end_date = data.index[-1]
element = 'temperature'

for index in date_range(start_date, end_date, 10):
    f = make_map(data, stations, element, index)
    fig_name = 'img/gif_images/' + element + '/' + element + '_' + str(index) + '.png'
    f.savefig(fig_name)
    plt.close(f)
    print(fig_name)
