In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

import datetime
from datetime import date
import random



In [2]:
def random_dates(start, end, n=10):

    start_u = start.value//10**9
    end_u = end.value//10**9

    return pd.to_datetime(np.random.randint(start_u, end_u, n), unit='s')

In [3]:
vax_desc = {'pcr':'PCR Test',
               'rec':'Proof Of Recovery',
               'jj':'Johnson & Johnson',
               'az1':'Astra-Zeneca Dose 1',
               'az2':'Astra-Zeneca Dose 2'}

vax_id_type = {'pcr':'test',
               'rec':'rec',
               'jj':'vax',
               'az1':'vax1',
               'az2':'vax2'}
vax_duration = {'test':3,'vax1':0,'vax2':365,'vax':365,'rec':180}
immunization_ids = [key for key,value in vax_id_type.items()]

patient_id_unique = random.sample(range(10000, 99999), 80)
patient_ids = random.choices(patient_id_unique, k=100) # patients can get vaxed multiple times
patient_immunization_ids = random.choices(immunization_ids, k=100)

cities = {'San Francisco':'USA',
          'New York':'USA',
          'Barcelona':'Spain',
          'Paris':'France',
          'London':'UK',
          'Milan':'Italy'
         }
med_center_id_unique = random.sample(range(1000, 9999), 30)
med_center_cities = random.choices([c for c in cities.keys()], k=30)
med_center_countries = [cities[c] for c in med_center_cities]

# generate random vax dates 
start = pd.to_datetime('2021-01-01')
end = pd.to_datetime(date.today())
immunization_dates = random_dates(start, end,n=100)

med_cen_ids = random.sample(range(10000, 90000), 100)



In [4]:
# Immunization dimension table
vax_dims = {'id':immunization_ids,
            'name':[vax_desc[vid] for vid in immunization_ids],
            'type':[value for key,value in vax_id_type.items()],
            'immune_days':[vax_duration[key] for key in vax_id_type.values()]}

vax_dim_df = pd.DataFrame.from_dict(vax_dims)
vax_dim_df.head()

Unnamed: 0,id,name,type,immune_days
0,pcr,PCR Test,test,3
1,rec,Proof Of Recovery,rec,180
2,jj,Johnson & Johnson,vax,365
3,az1,Astra-Zeneca Dose 1,vax1,0
4,az2,Astra-Zeneca Dose 2,vax2,365


In [5]:
# fact table

vax_records = {'patient_id':patient_ids,
               'immunization_id': patient_immunization_ids,
               'immunization_date': immunization_dates,
               'medical_center_id': med_cen_ids}

vax_rec_df = pd.DataFrame.from_dict(vax_records)
vax_rec_df['immunization_validity_date']=vax_rec_df.apply(
    lambda x: x['immunization_date']+ datetime.timedelta(days=vax_duration[vax_id_type['az2']]),axis=1)
vax_rec_df.head()

Unnamed: 0,patient_id,immunization_id,immunization_date,medical_center_id,immunization_validity_date
0,63517,az1,2021-03-04 18:07:31,79786,2022-03-04 18:07:31
1,99890,jj,2021-06-02 19:47:26,56229,2022-06-02 19:47:26
2,10898,az2,2021-01-12 10:29:44,17153,2022-01-12 10:29:44
3,72994,jj,2021-03-01 15:30:07,52507,2022-03-01 15:30:07
4,23384,pcr,2021-04-11 03:58:13,15768,2022-04-11 03:58:13


In [6]:
med_cen_records = {'med_center_id':med_center_id_unique,
                   'name':[f'Dummy Name {i}' for i in med_center_id_unique],
                   'city': med_center_cities,
                   'country':med_center_countries}
med_cen_df = pd.DataFrame.from_dict(med_cen_records)
med_cen_df.head()


Unnamed: 0,med_center_id,name,city,country
0,6517,Dummy Name 6517,New York,USA
1,3617,Dummy Name 3617,London,UK
2,6008,Dummy Name 6008,Milan,Italy
3,5638,Dummy Name 5638,New York,USA
4,3903,Dummy Name 3903,Milan,Italy


In [7]:
vax_rec_df[vax_rec_df.patient_id == 81886].immunization_validity_date.max()

NaT

In [11]:

from_to_combos = [[i,j] for j in cities for i in cities if i!=j]
from_to = random.choices(from_to_combos, k=50)

'''
for immunity status business logic:
get most latest immunity status of customer
see if the travel date is in range
'''

travel_records = {'customer_id':random.choices(patient_ids, k=50),
                  'travel_date':random_dates(start, end,n=50),
                  'from_city':[e[0] for e in from_to],
                  'to_city':[e[1] for e in from_to]
                 }

travel_df = pd.DataFrame.from_dict(travel_records)


travel_df['immunity_end_date'] = travel_df.apply(
    lambda x: vax_rec_df[vax_rec_df.patient_id == x.customer_id].immunization_validity_date.max(),axis=1)
travel_df['immunity_status'] = travel_df.apply(lambda x: True if x.immunity_end_date > x.travel_date else False, axis=1)
travel_df.head()


Unnamed: 0,customer_id,travel_date,from_city,to_city,immunity_end_date,immunity_status
0,16133,2021-02-26 12:51:24,San Francisco,London,2022-03-16 05:03:14,True
1,87115,2021-01-21 16:30:52,San Francisco,Milan,2022-05-08 19:08:37,True
2,53287,2021-05-02 04:57:32,Milan,Paris,2022-02-01 03:52:54,True
3,34631,2021-05-05 16:45:42,Milan,Barcelona,2022-02-18 05:23:26,True
4,39375,2021-01-28 20:23:11,London,Paris,2022-05-19 10:32:43,True
