In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

ROOT = 'SPData/'

# Content


In this notebook we upload the contact data collected in the schools from the folder `Data/SPData` and then generate a series of temporal graphs containing all the interactions between two classes on a given day. The output is saved in the folder `Data/SP_Classes_2_per_day` and the name identifies the school and the classes.

In [2]:
def PreProcessDFT(df, tres):
    '''This function preprocesses a dataframe in the format t,i,j with the t column called DateTime and it makes
    it a suitable input to our function.'''
    
    # convert time to epochs 
    t0 = pd.to_datetime('1970-01-01 00:00:00')
    df['t'] = (df.DateTime - t0).dt.total_seconds()
        
    # shift t0 = 0 and set the time resoution
    df.t = df.t - df.t.min()
    df['τ'] = 1
    df.t = (df.t/tres).astype(int)

    # keep only the relevant columns
    df = df.reset_index().drop(['Date', 'DateTime'], axis = 1)

    # rename the nodes
    all_nodes = np.unique(df[['i', 'j']].values)
    n = len(all_nodes)
    mapper = dict(zip(all_nodes, np.arange(n)))
    df.i = df.i.map(lambda x: mapper[x])
    df.j = df.j.map(lambda x: mapper[x])

    # move to the (i,j,t,τ) format
    df = df.groupby(['i', 'j', 't']).sum().reset_index()
    
    return df

def ConvertDateFormat(x): 
    '''Function that converts the format from dd/mm/YY to dd-mm-YY'''
    a, b, c = x.split('/')
    return a + '-' + b + '-' + c

def GenerateData(df, name):
    '''This function takes a dataframe it splits it by day, pre-processes it and then it saves it. The name
    variable is used to save the output'''
    
    
    df['DateTime'] = pd.to_datetime(df.t, unit = 's')
    df = df[['DateTime', 'i', 'j']]
    
    df['Date'] = df.DateTime.dt.date.astype(str)
    all_days = df.Date.unique()
    df.set_index('Date', inplace = True)

    # split by day
    DFT = [df.loc[[day]] for day in all_days]
    del df

    # preprocess the graphs
    for day, df in zip(all_days, DFT):
        dft = PreProcessDFT(df, 60)
        dft.to_csv('SP_Classes_2_per_day/' + name + '-' + day + '.csv')
        
    return

In [3]:
name = 'primaryschool'

df = pd.read_csv(ROOT + name + '.csv', sep = '\t', header = None, names = ['t', 'i', 'j', 'C1', 'C2'])
all_classes = ['1A', '1B', '2A', '2B', '3A', '3B', '4A', '4B', '5A', '5B']

for i, a in enumerate(all_classes):
    for j, b in enumerate(all_classes):
        if j > i:
            
            # consider only the contacts between two given classes
            idx = np.isin(df.C1, [a,b])
            dfAB = df[idx]
            idx = np.isin(dfAB.C2, [a,b])
            dfAB = dfAB[idx]
            
            # pre-process and save the data
            GenerateData(dfAB[['t', 'i', 'j']], name + '-' + a + '_' + b)

In [4]:
name = 'highschool_2011'
df = pd.read_csv(ROOT + name + '.csv', sep = '\t', header = None, names = ['t', 'i', 'j', 'C1', 'C2'])
all_classes = ['PC', 'PC*', 'PSI*']

for i, a in enumerate(all_classes):
    for j, b in enumerate(all_classes):
        if j > i:
            idx = np.isin(df.C1, [a,b])
            dfAB = df[idx]
            idx = np.isin(dfAB.C2, [a,b])
            dfAB = dfAB[idx]
            GenerateData(dfAB[['t', 'i', 'j']], name + '-' + a + '_' + b)

In [5]:
name = 'highschool_2012'
df = pd.read_csv(ROOT + name + '.csv', sep = '\t', header = None, names = ['t', 'i', 'j', 'C1', 'C2'])
all_classes = ['MP*1', 'MP*2', 'PC', 'PC*', 'PSI*']

for i, a in enumerate(all_classes):
    for j, b in enumerate(all_classes):
        if j > i:
            idx = np.isin(df.C1, [a,b])
            dfAB = df[idx]
            idx = np.isin(dfAB.C2, [a,b])
            dfAB = dfAB[idx]
            GenerateData(dfAB[['t', 'i', 'j']], name + '-' + a + '_' + b)

In [6]:
name = 'highschool_2013'
df = pd.read_csv(ROOT + name + '.csv', sep = ' ', header = None, names = ['t', 'i', 'j', 'C1', 'C2'])
all_classes = ['2BIO1', '2BIO2', '2BIO3', 'MP', 'MP*1', 'MP*2', 'PC', 'PC*','PSI*']

for i, a in enumerate(all_classes):
    for j, b in enumerate(all_classes):
        if j > i:
            idx = np.isin(df.C1, [a,b])
            dfAB = df[idx]
            idx = np.isin(dfAB.C2, [a,b])
            dfAB = dfAB[idx]
            GenerateData(dfAB[['t', 'i', 'j']], name + '-' + a + '_' + b)