###### As home range of H5 AIV in each month, we combined the virus diffusion in all values of HPD in each month. Corresponding utilization distribution of the virus was the virus diffusion in each value of HPD in every month. For each month slice across a year, we sampled 1000 points at random geographical coordinates within the home range of viruses. 

In [1]:
import json
import pandas as pd
import os
import shutil
import random
import numpy as np
#from geojson_utils import point_in_multipolygon
from shapely import geometry as gt
from shapely import ops
from shapely.prepared import prep

In [2]:
## load a land polygon to test the random points were in land
land = r"../data/world.json"
f1 = open(land)
WORLD_LAND = prep(gt.shape(json.load(f1)['features'][0]['geometry']))
f1.close()

In [2]:
def json2df(file):
    # convert the highest posterior density (HPD) of the inferred virus to dataframe
    f = open(file)
    test = json.load(f)
    f.close()
    df = pd.DataFrame(test['layers'][0]['areas'])
    df.attributes = df['attributes'].apply(lambda x:float(x['HPD']))
    df['corrdinates'] = df['polygon'].apply(lambda cord:
                                                            [list(map(lambda x:
                                                                      [x['xCoordinate'],x['yCoordinate']],cord['coordinates']
                                                                     )
                                                                 )
                                                            ])
    df.drop('polygon',axis = 1,inplace = True)
    df['polygon']=df.corrdinates.apply(lambda x:gt.shape({"type":"MultiPolygon","coordinates":[x]}))
    groups = df.groupby(['startTime','attributes'])
    out_df = pd.DataFrame(groups.apply(lambda x:ops.unary_union(list(x.polygon))),columns=['polygon'])
    return out_df

In [3]:
def get_random(convex_hull,num):
    '''
    convex_hull:  home range of the Mulitpolygon in one time(month)
    out: points within conver_hull and land
    '''
    #print(union_polygon)
    #num = 100
    polygon_shp = convex_hull['convex_hull']
    prep_shp = prep(polygon_shp)
    stime = str(convex_hull.name)
    minx, miny, maxx, maxy = polygon_shp.bounds
    
    tlist_of_points = []
    counter = 0
    while counter < random_num:
        pnt = gt.Point(random.uniform(minx, maxx), random.uniform(miny, maxy))
        if prep_shp.contains(pnt) and WORLD_LAND.contains(pnt):
            tlist_of_points.append([stime,pnt,pnt.coords[:][0][1],pnt.coords[:][0][0],int(stime.split('/')[1])])
            counter += 1 
    out = pd.DataFrame(tlist_of_points,columns=['startTime','point','lat','lon','month'])
    return out

In [4]:
def inclusion_relation(point,polygons):
    '''
    # point - one random point in one time
    # polygon - virus polygons, polygons.loc[point.startTime] get the polygon in this time
    # out - the spatial relationship between point and these polygons ,and are randed by 0.1-0.95HPD
    '''
    out = list(map(lambda x:x.contains(point.point),polygons.loc[point.startTime]['polygon']))
    return out

In [7]:
def stream(virus_path,num,out_path):

    df_v = json2df(virus_path)
    df_v = df_v.reset_index(level='attributes')

    # get the convex_hull of every timesilces
    time_group = df_v.groupby('startTime')
    time_hull = pd.DataFrame(time_group.apply(lambda x:ops.unary_union(list(x.polygon)).convex_hull),columns = ['convex_hull'])

    #random_num = 1000
    random_df = pd.concat(time_hull.apply(get_random,axis=1,**{'num':random_num}).to_list()).reset_index(drop=True)
    
    # The spatial relationship between random points and each HPD 
    spatial_relate = random_df.apply(inclusion_relation,axis=1,**{'polygons':df_v})
    hpd_l = list(np.arange(10,100,5)/100)
    spatial_relate = pd.DataFrame(list(map(lambda x:x, spatial_relate)),columns=hpd_l)
    random_relate = pd.concat([random_df,spatial_relate],axis=1).set_index('startTime')
    
    random_relate.to_csv(out_path)

In [11]:
virus_file1 = r'../data/HPD_10_95_2.3.2.1.json'
outfile1 = r'../data/random_df_2.3.2.1.csv'
random_num1 = 1000
stream(virus_path=virus_file1,num=random_num1,out_path=outfile1)

In [13]:
virus_file2 = r'../data/HPD_10_95_2.3.4.4.json'
outfile2 = r'../data/random_df_2.3.4.4.csv'
random_num2 = 1000
stream(virus_path=virus_file2,num=random_num2,out_path=outfile2)