In [1]:
import pandas as pd
import os, json
import numpy as np
from shapely.geometry import Polygon, Point
from copy import deepcopy
from tqdm import tqdm
from scipy.stats.stats import pearsonr, spearmanr

data_dir = os.path.join('..','..','data')

In [31]:
def get_gsv_coverage(gsv_meta_filename, polygon_filename, out_filename, gsv_radius=50):
    df_gsv = pd.read_csv(os.path.join(data_dir,'shapefiles','csv', gsv_meta_filename)) 
    df_gsv = df_gsv.drop(['date','degree'], axis=1)
    df_gsv = df_gsv.drop_duplicates()
    
    with open(os.path.join(data_dir,'shapefiles','geojson',polygon_filename)) as f:
        data_polygon = json.load(f)
#     print(json.dumps(data_polygon['features'][0]['properties'], indent=2, ensure_ascii=False))
    
    gsvs = np.array(deepcopy(df_gsv.values))
    
    gsv_coverage = []
    for feature in tqdm(data_polygon['features']):
        prop = feature['properties']
        addrcode = int(prop['addrcode'])

        poly = Polygon(np.squeeze(feature['geometry']['coordinates'][0]))
        poly_area = poly.area*111111

        gsv_area = 0
        for ind, gsv in enumerate(gsvs):
            lat, lng = gsv
            point = Point(lng, lat)

            if poly.contains(point):
                circle = point.buffer(gsv_radius/111111)
                gsv_area += circle.area*111111

        if gsv_area < 1: continue
        gsv_coverage.append([addrcode, poly_area, gsv_area])
        
    df_coverage = pd.DataFrame.from_records(gsv_coverage, columns=['addrcode','land_area','image_area'])
    df_coverage.image_area = df_coverage.image_area
    df_coverage.image_area = df_coverage.image_area.round(4)
    
    print('Matadata length:', len(df_gsv))
    print('Coverage length:', len(df_coverage))
    print('Pearsonr:', np.round(pearsonr(df_coverage.land_area, df_coverage.image_area),4))
    
    df_coverage.to_csv(os.path.join(data_dir,'shapefiles','csv', out_filename), index=None)

In [5]:
get_gsv_coverage('Nakhon-gsv-meta.csv', 'Nakhon-subdistricts.geojson', 'Nakhon-gsv-coverage.csv')

100%|██████████| 170/170 [06:04<00:00,  2.14s/it]

Matadata length: 108280
Coverage length: 170
Pearsonr: [0.2528 0.0009]





In [32]:
get_gsv_coverage('Krabi-gsv-meta.csv', 'Krabi-subdistricts.geojson', 'Krabi-gsv-coverage.csv')

100%|██████████| 112/112 [01:57<00:00,  1.05s/it]

Matadata length: 50089
Coverage length: 52
Pearsonr: [0.5413 0.    ]





In [10]:
get_gsv_coverage('Bangkok-gsv-meta.csv', 'Bangkok-subdistricts.geojson', 'Bangkok-gsv-coverage.csv')

100%|██████████| 169/169 [07:18<00:00,  2.59s/it]

Matadata length: 125750
Coverage length: 169
Pearsonr: [0.5806 0.    ]





#### Merge gsv-coverages

In [47]:
files = ['Bangkok-gsv-coverage.csv','Nakhon-gsv-coverage.csv', 'Krabi-gsv-coverage.csv']
concat = []
for file in files:
    df_tmp =  pd.read_csv(os.path.join(data_dir,'shapefiles','csv', file)) 
    concat.append(df_tmp)
df_concat = pd.concat(concat, axis=0)
df_concat = df_concat.round(2)
df_concat.head(3)

Unnamed: 0,addrcode,land_area,image_area
0,100508,212.97,194.85
1,104605,161.16,40.51
2,104604,98.9,28.09


In [48]:
print('length:', len(df_concat),'\n')
df_concat.to_csv(os.path.join(data_dir,'shapefiles','csv', 'gsv-coverage.csv'), index=None)

length: 389 

