# Transform GeoJSON Properties into Records

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

from collections import Counter

import plotly.graph_objs as go
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os, json
import urllib.request 

brd_count = {
    'bin': 0,    'bowl': 0, 
    'bucket': 0, 'cup': 0,
    'tire': 0,   'pottedplant': 0, 
    'jar': 0,    'vase': 0
}

## Sameple data in GeoJSON file

In [2]:
with open(os.path.join('..','data','breeding-sites','geojson','Bangkok-detection.geojson'), 'r') as file:
    data = json.load(file)

print(json.dumps(data['features'][0], ensure_ascii=False, indent=4))

{
    "geometry": {
        "coordinates": [
            100.84767539,
            13.68547656
        ],
        "type": "Point"
    },
    "type": "Feature",
    "properties": {
        "brd_sites": {
            "144": {
                "count": {
                    "bin": 1
                },
                "sum_each": 1
            }
        },
        "sum": 1,
        "province": "กรุงเทพมหานคร",
        "image_name": "13.68547656_100.84767539",
        "subdist": "แขวงขุมทอง",
        "district": "เขตลาดกระบัง",
        "date": {
            "year": "2012",
            "month": "04"
        }
    }
}


In [3]:
rows = []
for i, feature in enumerate(data['features']):
    
    row = feature['properties'].copy()
    
    lng, lat = feature['geometry']['coordinates']
    row['lng'], row['lat'] = lng, lat
    
    row['date'] = row['date']['year']+'-'+row['date']['month']
    
    for degree in row['brd_sites']:
        
        detected_brd = row['brd_sites'][degree]['count']

        for cls in brd_count:
            if cls not in row:
                row[cls] = 0
            
            if cls in detected_brd:
                row[cls] += detected_brd[cls]
                
    _= row.pop('brd_sites')
#     _= row.pop('directory')
    _= row.pop('image_name')

    rows.append(row)
    
df = pd.DataFrame.from_dict(rows)
df = df.drop('province', axis=1)
df['date'] = pd.to_datetime(df['date'], format='%Y-%m')
df = df.set_index('date')
df = df.sort_index()
print('Most common year:', Counter(list(df.index.year)).most_common())
df = df['2011':'2017']
df.head()
df.tail()
print('Total:',len(df))

Most common year: [(2017, 35745), (2012, 23587), (2011, 9784), (2016, 9108), (2014, 293), (2015, 273), (1970, 168), (2013, 84), (2018, 61)]


Unnamed: 0_level_0,bin,bowl,bucket,cup,district,jar,lat,lng,pottedplant,subdist,sum,tire,vase
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2011-08-01,0,0,3,0,เขตบึงกุ่ม,0,13.7765,100.666093,0,แขวงคลองกุ่ม,3,0,0
2011-08-01,0,0,3,0,เขตบางกะปิ,0,13.7765,100.666093,0,แขวงหัวหมาก,3,0,0
2011-08-01,0,0,3,0,เขตคันนายาว,0,13.7765,100.666093,0,แขวงรามอินทรา,3,0,0
2011-08-01,0,0,3,0,เขตคันนายาว,0,13.7765,100.666093,0,แขวงคันนายาว,3,0,0
2011-09-01,0,0,0,0,เขตตลิ่งชัน,0,13.770305,100.427176,1,แขวงบางระมาด,1,0,0


Unnamed: 0_level_0,bin,bowl,bucket,cup,district,jar,lat,lng,pottedplant,subdist,sum,tire,vase
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-12-01,0,0,0,0,เขตพระนคร,0,13.75477,100.493617,2,แขวงพระบรมมหาราชวัง,2,0,0
2017-12-01,0,0,0,1,เขตสาทร,0,13.717253,100.520327,1,แขวงยานนาวา,3,0,1
2017-12-01,0,0,0,0,เขตพระนคร,0,13.754846,100.492285,0,แขวงพระบรมมหาราชวัง,1,0,1
2017-12-01,0,0,0,0,เขตคันนายาว,0,13.802812,100.673727,1,แขวงรามอินทรา,1,0,0
2017-12-01,0,0,0,0,เขตทุ่งครุ,0,13.651933,100.52057,0,แขวงบางมด,1,1,0


Total: 78874


## Load subdistrict polygons

In [13]:
with open(os.path.join('..','data','shapefiles', 'Bangkok-subdistricts.geojson'), 'r') as file:
    data_polygon = json.load(file)

In [14]:
data_polygon['features'][0]['properties']

{'AREA_': 22922521.274935,
 'DCODE': '1005',
 'DNAME': 'เขตบางเขน',
 'OBJECTID': 52.0,
 'PCODE': '10',
 'PERIMETER': 24078.875,
 'PNAME': 'กรุงเทพมหานคร',
 'SCODE': '100508',
 'SCODE_BMA': '100502',
 'SNAME': 'แขวงท่าแร้ง',
 'SUBDIST_': 65,
 'SUBDIST_ID': 64,
 'SUB_CODE': '0508',
 'water': 3007}

## Sum number of breeding sites in each subdistrict

In [15]:
# df_sum = []
# polygons = []
# province = 'กรุงเทพมหานคร'
# for i, feature in enumerate(data_polygon['features']):
#     prop = feature['properties']
    
#     addrcode = prop['TB_IDN']
#     province, district, subdist = prop['PV_TN'], prop['AP_TN'], prop['TB_TN']
#     province_en, district_en, subdist_en = prop['PV_EN'], prop['AP_EN'], prop['TB_EN']
#     value = df[(df.district == district) & (df.subdist == subdist)].copy()
    
#     if len(value) == 0:
#         continue
    
#     total = 0
#     tmp_sum = []
#     for cls in brd_count:
#         tmp_sum.append(value[cls].sum())
#         total += value[cls].sum()

#     df_sum.append([Counter(list(value.index)).most_common(1)[0][0], addrcode,
#                    province, district, subdist,
#                    province_en, district_en, subdist_en,
#                   ]+tmp_sum+[total])
    
# df_sum = pd.DataFrame.from_records(df_sum)
# df_sum.columns = ['date','addrcode','province','district','subdist',
#                   'province_en','district_en','subdist_en',
#                   'bin','bowl', 'bucket','cup','jar',
#                   'pottedplant','tire','vase','total']
# df_sum = df_sum.set_index('date')
# df_sum.drop_duplicates(inplace=True)

# df_sum.head()
# df_sum.tail()
# print('Total:',len(df_sum))

# df_sum.to_csv(os.path.join('..','data','breeding-sites','csv','Krabi-detection.csv'))