In [80]:
import json
import ast
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from zipfile import ZipFile

In [81]:
type_of_data = '_synthetic_data'
data = pd.read_csv('./data/test' + type_of_data + '.csv').head(1000)

In [82]:
data = data[['TAXI_ID', 'POLYLINE']]

In [83]:
print('Total Path Trail Records used', len(data))

Total Path Trail Records used 1000


In [84]:
taxi_id = np.unique(data['TAXI_ID'])

In [85]:
print('Total People', len(taxi_id))

Total People 309


In [86]:
coordinates = {}
trips = np.array(data)
for i in range(len(trips)):
    point = trips[i]
    keys = list(coordinates.keys())
    if point[0] in keys:
        appended_coordinate = ast.literal_eval(point[1]) + coordinates[point[0]]
        coordinates[point[0]] = appended_coordinate
    else:
        coordinates[point[0]] = ast.literal_eval(point[1])

In [87]:
taxi_records = [[key, value, 0] for key, value in coordinates.items()]

In [88]:
coronavirus_cases_count = 5
coronavirus_cases = random.sample(range(1, len(taxi_records)), coronavirus_cases_count) 
random_records = [taxi_records[i][0] for i in coronavirus_cases]

In [89]:
random_records

[20000235, 20000653, 20000180, 20000161, 20000574]

In [90]:
from math import sin, cos, sqrt, atan2, radians
R = 6373.0

def calculate_distance(lat2, lon2, lat1, lon1):
    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = (sin(dlat/2))**2 + cos(lat1) * cos(lat2) * (sin(dlon/2))**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    distance = R * c
    return(abs(distance))

In [91]:
threshold = 0.010

def check_if_contact(lat2, lon2, lat1, lon1):
    calculated_value = calculate_distance(lat1, lon1, lat2, lon2)
    if(calculated_value<=threshold):
        return True
    else:
        return False

In [92]:
lat1 = 52.2296756
lon1 = 21.0122287
lat2 = 52.2307756
lon2 = 21.0122287
check_if_contact(lat1, lon1, lat2, lon2)

False

In [93]:
distances = []
for i in taxi_records:
    person_travel_history = i[1]
    max_distance = 0
    for j in range(len(person_travel_history)-1):
        one = person_travel_history[j]
        two = person_travel_history[j+1]
        flag = calculate_distance(one[0], one[1], two[0], two[1])
        if(flag > max_distance):
            max_distance = flag
    distances.append(max_distance)
print('Max distance between any two coordinates', max(distances))

Max distance between any two coordinates 17.403047835066882


In [94]:
def calculate_total_encounters(history_one, history_two):
    count = 0
    for i in history_one:
        for j in history_two:
            if(check_if_contact(i[0], i[1], j[0], j[1])):
                count +=1 
    return count

In [95]:
chances = []
for i in taxi_records:
    chance = 0
    if i[0] not in random_records:
        person_travel_history = i[1]
        for coronavirus_case in coronavirus_cases:
            coronavirus_patient_travel_history = taxi_records[coronavirus_case][1]
            chance += calculate_total_encounters(person_travel_history, coronavirus_patient_travel_history)
    chances.append(chance)
    print(i[0], chance)

20000589 18
20000596 98
20000320 54
20000520 79
20000337 96
20000231 33
20000456 4
20000011 53
20000403 220
20000233 40
20000571 81
20000497 22
20000570 15
20000005 68
20000089 15
20000423 23
20000657 22
20000309 0
20000161 0
20000178 88
20000235 0
20000653 0
20000009 79
20000648 43
20000424 16
20000010 29
20000372 70
20000686 38
20000435 62
20000154 94
20000060 41
20000167 30
20000503 91
20000621 111
20000463 36
20000612 4
20000360 24
20000574 0
20000173 1
20000560 21
20000492 18
20000112 65
20000305 29
20000004 20
20000620 60
20000671 19
20000341 39
20000015 21
20000307 171
20000171 3
20000201 32
20000007 57
20000199 44
20000611 25
20000398 151
20000067 12
20000569 397
20000473 4
20000367 138
20000672 36
20000190 95
20000600 3
20000496 163
20000013 23
20000454 39
20000534 143
20000195 9
20000406 3
20000325 100
20000101 118
20000632 33
20000308 31
20000333 32
20000481 28
20000486 9
20000450 79
20000545 0
20000665 25
20000012 106
20000688 31
20000540 94
20000436 82
20000517 32
20000153

In [96]:
total_chance_record = []
for i in range(len(taxi_records)):
    taxi_record = taxi_records[i]
    record = [
        taxi_record[0], # taxi_id
        taxi_record[1], # path or trail
        chances[i] # total chances
    ]
    total_chance_record.append(record)

In [97]:
pd.DataFrame(total_chance_record)

Unnamed: 0,0,1,2
0,20000589,"[[-8.649639, 41.1541515], [-8.6502105, 41.1539...",18
1,20000596,"[[-8.64405, 41.1588855], [-8.6440365, 41.15893...",98
2,20000320,"[[-8.606529, 41.1479325], [-8.606403, 41.14800...",54
3,20000520,"[[-8.642146499999999, 41.1650145], [-8.6421555...",79
4,20000337,"[[-8.6603985, 41.15871], [-8.659701, 41.159758...",96
5,20000231,"[[-8.647038, 41.175985499999996], [-8.648088, ...",33
6,20000456,"[[-8.580231000000001, 41.1454935], [-8.5818240...",4
7,20000011,"[[-8.6307165, 41.154606], [-8.631414, 41.15389...",53
8,20000403,"[[-8.606529, 41.144643], [-8.606952, 41.144985...",220
9,20000233,"[[-8.61984, 41.148036], [-8.61984, 41.148027],...",40


In [98]:
total_encounter = pd.DataFrame(total_chance_record)
total_encounter = total_encounter.sort_values(by=2, ascending=False)
total_encounter.to_csv('./data/chances' + type_of_data + '.csv')

In [99]:
space_compatible = []
for i in taxi_records:
    taxi_id = i[0]
    trail = i[1]
    chance = total_encounter[2][list(total_encounter[0]).index(taxi_id)]
    for j in trail:
        space_compatible.append([j[0], j[1], chance])

In [100]:
pd.DataFrame(space_compatible).to_csv('./data/space_plotting' + type_of_data + '.csv', index=False)