In [1]:
import json
import ast
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from zipfile import ZipFile

In [2]:
data = pd.read_csv('./data/test.csv').head(1000)

In [3]:
data = data[['TAXI_ID', 'POLYLINE']]

In [4]:
print('Total Path Trail Records used', len(data))

Total Path Trail Records used 1000


In [5]:
taxi_id = np.unique(data['TAXI_ID'])

In [6]:
print('Total People', len(taxi_id))

Total People 309


In [7]:
coordinates = {}
trips = np.array(data)
for i in range(len(trips)):
    point = trips[i]
    keys = list(coordinates.keys())
    if point[0] in keys:
        appended_coordinate = ast.literal_eval(point[1]) + coordinates[point[0]]
        coordinates[point[0]] = appended_coordinate
    else:
        coordinates[point[0]] = ast.literal_eval(point[1])

In [8]:
taxi_records = [[key, value, 0] for key, value in coordinates.items()]

In [9]:
coronavirus_cases_count = 5
coronavirus_cases = random.sample(range(1, len(taxi_records)), coronavirus_cases_count) 
random_records = [taxi_records[i][0] for i in coronavirus_cases]

In [10]:
random_records

[20000473, 20000271, 20000338, 20000651, 20000423]

In [11]:
from math import sin, cos, sqrt, atan2, radians
R = 6373.0

def calculate_distance(lat2, lon2, lat1, lon1):
    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = (sin(dlat/2))**2 + cos(lat1) * cos(lat2) * (sin(dlon/2))**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    distance = R * c
    return(abs(distance))

In [12]:
threshold = 0.010

def check_if_contact(lat2, lon2, lat1, lon1):
    calculated_value = calculate_distance(lat1, lon1, lat2, lon2)
    if(calculated_value<=threshold):
        return True
    else:
        return False

In [13]:
lat1 = 52.2296756
lon1 = 21.0122287
lat2 = 52.2307756
lon2 = 21.0122287
check_if_contact(lat1, lon1, lat2, lon2)

False

In [14]:
distances = []
for i in taxi_records:
    person_travel_history = i[1]
    max_distance = 0
    for j in range(len(person_travel_history)-1):
        one = person_travel_history[j]
        two = person_travel_history[j+1]
        flag = calculate_distance(one[0], one[1], two[0], two[1])
        if(flag > max_distance):
            max_distance = flag
    distances.append(max_distance)
print('Max distance between any two coordinates', max(distances))

Max distance between any two coordinates 17.43246474762607


In [15]:
def calculate_total_encounters(history_one, history_two):
    count = 0
    for i in history_one:
        for j in history_two:
            if(check_if_contact(i[0], i[1], j[0], j[1])):
                count +=1 
    return count

In [16]:
chances = []
for i in taxi_records:
    chance = 0
    if i[0] not in random_records:
        person_travel_history = i[1]
        for coronavirus_case in coronavirus_cases:
            coronavirus_patient_travel_history = taxi_records[coronavirus_case][1]
            chance += calculate_total_encounters(person_travel_history, coronavirus_patient_travel_history)
    chances.append(chance)
    print(i[0], chance)

20000589 1
20000596 12
20000320 27
20000520 22
20000337 40
20000231 6
20000456 3
20000011 13
20000403 14
20000233 18
20000571 36
20000497 6
20000570 4
20000005 29
20000089 21
20000423 0
20000657 2
20000309 1
20000161 1
20000178 26
20000235 25
20000653 17
20000009 27
20000648 27
20000424 0
20000010 5
20000372 15
20000686 10
20000435 115
20000154 15
20000060 0
20000167 5
20000503 6
20000621 19
20000463 2
20000612 8
20000360 7
20000574 8
20000173 5
20000560 6
20000492 9
20000112 11
20000305 6
20000004 10
20000620 12
20000671 7
20000341 13
20000015 7
20000307 60
20000171 1
20000201 9
20000007 23
20000199 23
20000611 0
20000398 47
20000067 3
20000569 4
20000473 0
20000367 26
20000672 4
20000190 56
20000600 13
20000496 74
20000013 10
20000454 23
20000534 19
20000195 2
20000406 10
20000325 15
20000101 33
20000632 4
20000308 11
20000333 5
20000481 38
20000486 5
20000450 32
20000545 3
20000665 19
20000012 7
20000688 0
20000540 16
20000436 5
20000517 40
20000153 67
20000458 4
20000577 11
2000010

In [17]:
total_chance_record = []
for i in range(len(taxi_records)):
    taxi_record = taxi_records[i]
    record = [
        taxi_record[0], # taxi_id
        taxi_record[1], # path or trail
        chances[i] # total chances
    ]
    total_chance_record.append(record)

In [18]:
pd.DataFrame(total_chance_record)

Unnamed: 0,0,1,2
0,20000589,"[[-8.649342, 41.154102], [-8.649936, 41.154201...",1
1,20000596,"[[-8.644077, 41.158845], [-8.644023, 41.158926...",12
2,20000320,"[[-8.606601, 41.147955], [-8.606457, 41.14791]...",27
3,20000520,"[[-8.642142, 41.164992], [-8.642151, 41.165037...",22
4,20000337,"[[-8.660592, 41.158251], [-8.660205, 41.159169...",40
5,20000231,"[[-8.646642, 41.175711], [-8.647434, 41.17626]...",6
6,20000456,"[[-8.57952, 41.145948], [-8.580942, 41.145039]...",3
7,20000011,"[[-8.630433, 41.154831], [-8.631, 41.154381], ...",13
8,20000403,"[[-8.606484, 41.144625], [-8.606574, 41.144661...",14
9,20000233,"[[-8.61984, 41.148036], [-8.61984, 41.148036],...",18


In [19]:
total_encounter = pd.DataFrame(total_chance_record)
total_encounter = total_encounter.sort_values(by=2, ascending=False)
total_encounter.to_csv('./data/chances.csv')