# Map

This notebook is dedicated to the data processing required by the map visualisation. 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
%matplotlib inline

In [2]:
BASE_DIR = '../data/climate-change-earth-surface-temperature-data/'

## Extracting data for a month

We first extract data for a given month in order to facilitate early stages of the map design.

In [3]:
# Load data
FILENAME = 'GlobalLandTemperaturesByCity.csv'
df = pd.read_csv(BASE_DIR + FILENAME)

We extract data for a given month:

In [6]:
# Pick a month
DATE = '1900-01-01'
df_m = df[df.dt == DATE].reset_index(drop=True)
df_m.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1900-01-01,-0.989,0.588,Århus,Denmark,57.05N,10.33E
1,1900-01-01,5.763,1.008,Çorlu,Turkey,40.99N,27.69E
2,1900-01-01,0.947,0.771,Çorum,Turkey,40.99N,34.08E
3,1900-01-01,-25.819,1.058,Öskemen,Kazakhstan,50.63N,82.39E
4,1900-01-01,-20.97,1.505,Ürümqi,China,44.20N,87.20E


Coordinates need to be converted to the regular format:

In [4]:
def map_coordinate(coo):
    """
    Correct coordinate format
    """
    if coo[-1] == 'S' or coo[-1] == 'W':
        coo = '-' + coo
    
    return coo[:-1]

In [6]:
df_m.Latitude = df_m.Latitude.map(map_coordinate)
df_m.Longitude = df_m.Longitude.map(map_coordinate)

Finally, we export the `json` data:

In [7]:
df_m = df_m.drop(['dt', 'AverageTemperatureUncertainty'], axis=1)

In [8]:
records = [{DATE: df_m.to_dict(orient='records')}]

In [9]:
OUTNAME = 'temp_city_{}_.json'.format(DATE)
OUTPATH = '../app/data/'
with open(OUTPATH + OUTNAME, 'w') as f:
    json.dump(records, f, ensure_ascii=False)

## Extracting all years temperatures

We now generate the file containing the temperature for all years, the January 1st.

In [5]:
# Convert to datetime
df.dt = pd.to_datetime(df.dt)

In [6]:
df.columns

Index(['dt', 'AverageTemperature', 'AverageTemperatureUncertainty', 'City',
       'Country', 'Latitude', 'Longitude'],
      dtype='object')

In [7]:

remain = df[df.dt.dt.year >= 1850].copy()

remain.Latitude = remain.Latitude.map(map_coordinate).astype(float)
remain.Longitude = remain.Longitude.map(map_coordinate).astype(float)
remain['dt'] = df.dt.dt.year.copy()

remain.set_index(['City'], inplace=True)

# remove useless col
remain.drop(['AverageTemperatureUncertainty','Country'], axis=1, inplace=True)

remain = remain.groupby(['City', 'dt']).mean().dropna().reset_index()


In [8]:
# Get min and max
min_temp = remain['AverageTemperature'].min()
max_temp = remain['AverageTemperature'].max()

In [9]:
# Generate a Serie : year -> list(dict{temp, city, ...})
by_year = remain.groupby('dt').apply(lambda x: x.to_dict(orient='records'))

# Generate data set with min, max and content
dataset = {
    'min': min_temp, 
    'max': max_temp,
    'data': by_year.to_dict()
}

In [10]:
OUTNAME = 'temp_city_all.json'
OUTPATH = '../app/data/'
with open(OUTPATH + OUTNAME, 'w') as f:
    json.dump(dataset, f, ensure_ascii=False)