In [2]:
import pandas as pd
import numpy as np



va_activity_loc_assign = pd.read_csv('va_activity_location_assignment.csv.gz', compression='gzip').iloc[:,1:]
va_activity_locations = pd.read_csv('va_activity_locations.csv.gz', compression='gzip').iloc[:,1:]
va_disease_outcome_target = pd.read_csv('va_disease_outcome_target.csv.gz', compression='gzip').iloc[:,1:]
va_disease_outcome_training = pd.read_csv('va_disease_outcome_training.csv.gz', compression='gzip')
va_household = pd.read_csv('va_household.csv.gz', compression = 'gzip').iloc[:,1:]
va_person = pd.read_csv('va_person.csv.gz', compression='gzip')
va_population_network = pd.read_csv('va_population_network.csv.gz', compression='gzip')
va_residence_locations = pd.read_csv('va_residence_locations.csv.gz', compression='gzip').iloc[:,1:]
# va_residence_locations.head()

In [24]:
##### Locations they been to in past week - and how frequent it is
##### Spread stats for last week at said locations
##### Age, sex
##### Household number
##### Raw duration of time spent with infected neighbors
##### One hot encoding of types of activities participated in or weighted with how long they did those
#####

class GraphStats:
    def __init__(self, va_activity_loc_assign, va_activity_locations, va_disease_outcome_target, va_disease_outcome_training, va_household, va_person, va_population_network, va_residence_locations) -> None:
        self.va_activity_loc_assign = va_activity_loc_assign
        self.va_activity_locations = va_activity_locations
        self.va_disease_outcome_target = va_disease_outcome_target
        self.va_disease_outcome_training = va_disease_outcome_training
        self.va_household = va_household
        self.va_person = va_person
        self.va_population_network = va_population_network
        self.va_residence_locations = va_residence_locations

    ##gets age and sex of a specified person
    def get_age_sex(self, pid):
        res = self.va_person.loc[self.va_person.loc['pid']==pid]
        return (res['age'],res['sex'])

    ### gets household members including person
    def household_members(self, pid):
        hid = self.va_person.query('pid == @pid')['hid'][0]
        return self.va_person.query('hid == @hid')['pid'].to_numpy()
    
    # how long a person is in contact with a infected people on a given day
    def get_raw_time_with_infected_day(self, pid, day):
        interactions = self.va_population_network.query('pid1 == @pid or pid2 == @pid')
        pid1 = interactions['pid1'].to_numpy()
        pid2 = interactions['pid2'].to_numpy()
        time = 0
        pids = np.concatenate((pid1, pid2))
        pids = pids[pids!=pid]
        disease_info = self.va_disease_outcome_training.query('pid in @pids and day == @day').query('state == "I"')
        #print("info: ", disease_info)
       # print(interactions)
        for neighbor in disease_info.iterrows():
            n_pid = neighbor[1]['pid']
            
            time += interactions.query('pid1 == @n_pid or pid2 == @n_pid')['duration'].sum()
          
        

        return time
    
    
    def get_activity_vector(self, pid):
        user_activities = self.va_activity_loc_assign.query('pid == @pid')
        activity_vector = np.zeros(6)
        for i in range(1, 7):
            activity_vector[i-1] += user_activities.query('activity_type == @i')['duration'].sum()
        return activity_vector

    def get_location_durations(self, pid):
        location_durations = {}
        user_activities = self.va_activity_loc_assign.query('pid == @pid')
        for index, row in user_activities.iterrows():
            location_durations[row['lid']] = location_durations.get(row['lid'], 0) + row['duration']
        return location_durations


In [28]:
test = GraphStats(va_activity_loc_assign, va_activity_locations, va_disease_outcome_target, va_disease_outcome_training, va_household, va_person, va_population_network, va_residence_locations)  

print(va_activity_loc_assign.head(20))
print(test.get_location_durations(4543749))
test.get_raw_time_with_infected_day(4543889,21)

        hid      pid  activity_number  activity_type  start_time  duration   
0   1781818  4543749               19              1           0     14400  \
1   1781818  4543749               20              1       14400     10080   
2   1781818  4543749               22              2       25680     13020   
3   1781818  4543749               24              1       39900      6900   
4   1781818  4543749               26              3       47220       840   
5   1781818  4543749               28              1       48300     38099   
6   1781818  4543752               15              1           0     14400   
7   1781818  4543752               16              1       14400     16200   
8   1781818  4543752               18              2       32040     29160   
9   1781818  4543752               20              3       61620      1500   
10  1781818  4543752               22              1       64740     21659   
11  1781819  4543753               11              1           0

4860

In [5]:
va_population_network

Unnamed: 0.1,Unnamed: 0,pid1,pid2,lid,start_time,duration,activity1,activity2
0,15672,4544738,5223465,546,56400,300,3,3
1,15804,4546550,4494588,546,62700,1200,3,3
2,15805,4546550,4330194,546,62700,1200,3,3
3,15806,4546550,1426144,546,62700,600,3,3
4,15816,4361966,4546550,546,63840,60,3,3
...,...,...,...,...,...,...,...,...
228983,185116023,4548589,4548593,1002466962,83100,3299,1,1
228984,185116024,4548595,4548594,1002466963,0,14400,1,1
228985,185116025,4548595,4548594,1002466963,14400,9900,1,1
228986,185116026,4548595,4548594,1002466963,44100,2400,1,1
