In [10]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os
import json
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook

## Load Previous Choropleth Map Data

In [7]:

data_dir = os.path.join('..','..','data')

with open(os.path.join(data_dir,'maps','Nakhon-Si-Thammarat-choropleth-each-class.geojson')) as f:
    shapefile = json.load(f)
shapefile['features'][0]['properties']

{'AP_EN': 'Phrom Khiri',
 'AP_IDN': '8002',
 'AP_TN': 'พรหมคีรี',
 'Area': 1070,
 'Avg_BI': 20,
 'Breeding_site_classes': {'bin': 1,
  'bowl': 0,
  'bucket': 314,
  'cup': 37,
  'jar': 177,
  'pottedplant': 147,
  'tire': 2,
  'vase': 0},
 'Breeding_site_counts': 678,
 'Breteau_index': {'2011': 0,
  '2012': 0,
  '2013': 0,
  '2014': 50,
  '2015': 33,
  '2016': 0,
  '2017': 0},
 'Dengue_cases': 25,
 'Density': 6,
 'Density_population': 8,
 'PV_EN': 'Nakhon Si Thammarat',
 'PV_IDN': '80',
 'PV_TN': 'นครศรีธรรมราช',
 'Population': 8387,
 'TB_EN': 'Thon Hong',
 'TB_IDN': '800204',
 'TB_TN': 'ทอนหงส์'}

## Load Survey Data

In [23]:
# Load Surveys Data
def filter_survey(df_survey, index='bi'):
    df_filtered = []
    for addrcode in df_survey['addrcode'].unique():
        tmp = df_survey.loc[df_survey['addrcode'] == addrcode].copy()
        if len(tmp) == 1 and tmp[index].mean() < 100:
            df_filtered.append(tmp.copy())
        df_filtered.append(tmp[np.abs(tmp[index]-tmp[index].mean()) <= (1*tmp[index].std())].copy())
    df_filtered = pd.concat(df_filtered, axis=0)
    return df_filtered

df_survey = pd.read_csv(os.path.join(data_dir,'breeding-sites','csv','addrcode-index','larval-survey.csv'))
df_survey = df_survey.replace(0, np.nan)
df_survey = df_survey.dropna(axis=0, how='any')
df_survey = df_survey.reset_index(drop=True)

df_survey['addrcode'] = df_survey['addrcode'].astype('int')
df_survey['date'] = pd.to_datetime(df_survey['date'], format='%Y-%m')

df_survey = df_survey.set_index('date')
df_survey = df_survey.sort_index()
df_survey = df_survey['2015':'2017']

df_survey = filter_survey(df_survey, index='bi')

df_survey.head(1)
# print('Total data points:',len(df_survey))

# Load Detected Breeding Sites
df_detect = pd.read_csv(os.path.join(data_dir,'breeding-sites','csv','addrcode-index','detection.csv')) 
df_detect['date'] = pd.to_datetime(df_detect['date'], format='%Y-%m')
df_detect['addrcode'] = df_detect['addrcode'].astype('int')
df_detect = df_detect.set_index('date')
df_detect = df_detect.sort_index()

df_detect.head(1)

Unnamed: 0_level_0,addrcode,hi,ci,bi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-01,810405,18.0,3.68,24.0


Unnamed: 0_level_0,addrcode,bin,bowl,bucket,cup,jar,pottedplant,tire,vase,total
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-02-01,810602,270,1,0,35,38,24,2,139,509


## Helper Function

In [24]:
def get_detect(df_detect, addrcode, columns=None):
    if columns is None:
        detect = round(df_detect.loc[df_detect['addrcode'] == addrcode].mean()['total'], 2)
    else:
        # Breeding Site Feature
        detect = df_detect.loc[df_detect['addrcode'] == addrcode][columns].copy()
    return detect

def get_survey(df_survey, dengue_season, addrcode):
    if dengue_season: 
        months = [6,7,8,9,10,11]
    else: 
        months = [1,2,3,4,5,6,7,8,9,10,11,12]

    survey = round(df_survey.loc[
        (df_survey['addrcode'] == addrcode) & 
        (df_survey.index.month.isin(months))
    ]['bi'].mean(), 2)
    
    return survey

## Processing

In [27]:
x = []
cnt = 0

mean_det, std_det = df_detect['total'].mean(), df_detect['total'].std()
for ind, feature in enumerate(shapefile['features']):
    
    prop = feature['properties']
    addrcode = int(prop['TB_IDN'])
    
    detect = get_detect(df_detect, addrcode)
    survey_entire_year = get_survey(df_survey, dengue_season=False, addrcode=addrcode)
    survey_dengue_season = get_survey(df_survey, dengue_season=True, addrcode=addrcode)
    
    if np.isnan(detect):
        detect = 0
    if np.isnan(survey_entire_year):
        survey_entire_year = 0
    if np.isnan(survey_dengue_season):
        survey_dengue_season = 0

    if detect > mean_det+1*std_det or detect < mean_det-1*std_det: 
        detect = survey_entire_year = survey_dengue_season = 0
        
    feature['properties']['BI_entire_year'] = survey_entire_year
    feature['properties']['BI_dengue_season'] = survey_dengue_season

In [28]:
with open(os.path.join(data_dir, 'maps', 'Nakhon-Si-Thammarat-choropleth-dengue-season.geojson'), 'w') as FILE:
    json.dump(shapefile, FILE, indent=4, ensure_ascii=False, sort_keys=True)