In [None]:
from pathlib import Path
import os

import pandas as pd
import seaborn as sns

import src
from src.sim.agent import Agent
from src.sim.sim import Sim


def get_population_df(state, n_agents, n_internal_runs, save=True):
    
    hh_counter = 0

    dfs = []
    for rep in range(n_internal_runs):
        agent_data = []

        print(round((rep/n_internal_runs)*100),"%")
        
        model = Sim(state)

        population = model.create_soep_population(N=n_agents, agent_class=Agent)

        for i, hh in enumerate(population):
            hh_size = len(hh)
            
            hh_id = hh_counter
            hh_counter += 1
            
            for agent in hh:
                agent.hh_id = hh_id
                agent.hh_size = hh_size
                agent.rep = rep
                agent_data.append(vars(agent))
        
        df = pd.DataFrame(agent_data)
        
        dfs.append(df)
    
    df_total = pd.concat(dfs)
    df_total = df_total.reset_index(drop=True)
    df_total["state"] = state

    if save:
        population_data_folder = Path.joinpath(src.PATH, "important_outputs", "population")
        if not os.path.exists(population_data_folder):
            os.mkdir(population_data_folder)
        
        df_total.to_csv(
            Path.joinpath(population_data_folder, "population_" + str(state) + "_" + str(n_agents) + "_" + str(n_internal_runs) + ".csv"), 
            index=False,
            )

pygame 2.0.1 (SDL 2.0.14, Python 3.8.10)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [None]:
#get_population_df(state=2, n_agents=100000, n_internal_runs=60)
#get_population_df(state=8, n_agents=100000, n_internal_runs=60)
#get_population_df(state=9, n_agents=100000, n_internal_runs=60)
#get_population_df(state=10, n_agents=100000, n_internal_runs=60)

In [3]:
dfs = [pd.read_csv(Path.joinpath(src.PATH, "important_outputs", "population", f"population_{state}_100000_60.csv")) for state in ("2", "8", "9", "10")]
df = pd.concat(dfs)
del(dfs)

In [4]:
df.columns

Index(['name', 'residence_cell', 'x_grid_pos', 'y_grid_pos', 'population',
       'gender', 'age', 'infection', 'tick_of_exposure', 'tick_of_recovery',
       'tick_of_symptom_onset', 'home_cell', 'work_place', 'school',
       'kindergarten', 'fav_supermarkets', 'cell_of_infection', 'target_cell',
       'activity', 'activity_len_in_ticks', 'ticks_doing_this_activity',
       'group_dict', 'activities_done_today', 'stay_at_home', 'quarantine',
       'household_members', 'duration_s', 'duration_i', 'duration_r_a',
       'duration_r_m', 'nace2', 'nace2_short', 'work_hours_day_in_ticks',
       'hours_at_supermarket_in_ticks', 'student', 'hid', 'pid',
       'federal_state', 'hh_id', 'hh_size', 'rep', 'state'],
      dtype='object')

In [5]:
df.head()

Unnamed: 0,name,residence_cell,x_grid_pos,y_grid_pos,population,gender,age,infection,tick_of_exposure,tick_of_recovery,...,work_hours_day_in_ticks,hours_at_supermarket_in_ticks,student,hid,pid,federal_state,hh_id,hh_size,rep,state
0,2022343066048,,,,<class 'list'>,2,43,s,,,...,7.0,0.952701,0,260916,2609102,2,0,4,0,2
1,2022343066192,,,,<class 'list'>,2,4,s,,,...,0.0,0.952701,0,260916,2609104,2,0,4,0,2
2,2022343066240,,,,<class 'list'>,2,7,s,,,...,0.0,0.952701,0,260916,2609103,2,0,4,0,2
3,2022343065904,,,,<class 'list'>,1,42,s,,,...,7.0,1.0,0,260916,2609101,2,0,4,0,2
4,2022343065808,,,,<class 'list'>,2,76,s,,,...,0.0,2.0,0,3100617,31006101,2,1,1,0,2


In [6]:
df["kindergarten"] = df["age"].apply(lambda age: (1 if age <= 5 else 0))
df["school"] = df["age"].apply(lambda age: (1 if 5 < age < 20  else 0))

df["working"] = 0
df.loc[(df["student"] == 0) & (df["work_hours_day_in_ticks"] > 0), "working"] = 1

df["none"] = 0
df.loc[(df["kindergarten"] == 0) & (df["school"] == 0) & (df["student"] == 0) & (df["working"] == 0), "none"] = 1

In [7]:
df["job_status"] = "None"

df.loc[(df["kindergarten"] == 1), "job_status"] = "kindergarten_kid"
df.loc[(df["school"] == 1), "job_status"] = "pupil"

df.loc[(df["student"] == 1), "job_status"] = "student"

df.loc[(df["student"] == 1) & (df["work_hours_day_in_ticks"] == 0), "job_status"] = "student"
#df.loc[(df["student"] == 1) & (df["work_hours_day_in_ticks"] > 0), "job_status"] = "working_student"

df.loc[(df["job_status"] == "None") & (df["work_hours_day_in_ticks"] > 0), "job_status"] = "working"

## Share of status

In [18]:
table = df.groupby("state")["job_status"].value_counts() / df.groupby("state")["job_status"].count()
table = table.round(3)
table

state  job_status      
2      None                0.432
       working             0.340
       pupil               0.143
       kindergarten_kid    0.051
       student             0.035
8      working             0.435
       None                0.361
       pupil               0.131
       kindergarten_kid    0.050
       student             0.023
9      working             0.446
       None                0.360
       pupil               0.125
       kindergarten_kid    0.048
       student             0.021
10     working             0.437
       None                0.377
       pupil               0.126
       kindergarten_kid    0.045
       student             0.014
Name: job_status, dtype: float64

In [19]:
print(table.to_latex())

\begin{tabular}{llr}
\toprule
   &         &  job\_status \\
state & job\_status &             \\
\midrule
2  & None &       0.432 \\
   & working &       0.340 \\
   & pupil &       0.143 \\
   & kindergarten\_kid &       0.051 \\
   & student &       0.035 \\
8  & working &       0.435 \\
   & None &       0.361 \\
   & pupil &       0.131 \\
   & kindergarten\_kid &       0.050 \\
   & student &       0.023 \\
9  & working &       0.446 \\
   & None &       0.360 \\
   & pupil &       0.125 \\
   & kindergarten\_kid &       0.048 \\
   & student &       0.021 \\
10 & working &       0.437 \\
   & None &       0.377 \\
   & pupil &       0.126 \\
   & kindergarten\_kid &       0.045 \\
   & student &       0.014 \\
\bottomrule
\end{tabular}



## Average houshold size by status

In [10]:
table = df.groupby(["state", "job_status"])["hh_size"].mean()
table

state  job_status      
2      None                2.398013
       kindergarten_kid    3.999386
       pupil               4.157018
       student             2.188056
       working             2.428195
8      None                2.207290
       kindergarten_kid    4.169087
       pupil               4.282583
       student             2.779872
       working             2.668880
9      None                2.142372
       kindergarten_kid    3.667491
       pupil               3.971149
       student             2.547139
       working             2.490422
10     None                2.285616
       kindergarten_kid    3.511309
       pupil               3.910949
       student             2.132892
       working             2.219677
Name: hh_size, dtype: float64

In [11]:
print(table.to_latex())

\begin{tabular}{llr}
\toprule
   &         &   hh\_size \\
state & job\_status &           \\
\midrule
2  & None &  2.398013 \\
   & kindergarten\_kid &  3.999386 \\
   & pupil &  4.157018 \\
   & student &  2.188056 \\
   & working &  2.428195 \\
8  & None &  2.207290 \\
   & kindergarten\_kid &  4.169087 \\
   & pupil &  4.282583 \\
   & student &  2.779872 \\
   & working &  2.668880 \\
9  & None &  2.142372 \\
   & kindergarten\_kid &  3.667491 \\
   & pupil &  3.971149 \\
   & student &  2.547139 \\
   & working &  2.490422 \\
10 & None &  2.285616 \\
   & kindergarten\_kid &  3.511309 \\
   & pupil &  3.910949 \\
   & student &  2.132892 \\
   & working &  2.219677 \\
\bottomrule
\end{tabular}



## Household members by status

In [12]:
# sum up statuses by households and save it for each agent
df["hh_n_kindergarten"] = df.groupby(["state", "hh_id"])["kindergarten"].transform(sum)
df["hh_n_school"] = df.groupby(["state", "hh_id"])["school"].transform(sum)
df["hh_n_uni"] = df.groupby(["state", "hh_id"])["student"].transform(sum)
df["hh_n_working"] = df.groupby(["state", "hh_id"])["working"].transform(sum)
df["hh_n_none"] = df.groupby(["state", "hh_id"])["none"].transform(sum)


df["hh_n_total"] = df["hh_n_kindergarten"] + df["hh_n_school"] + df["hh_n_uni"] + df["hh_n_working"] + df["hh_n_none"]

# calculate average number of statuses in household by status
df_household_state = df.groupby(["state", "job_status"])[["hh_n_kindergarten", "hh_n_school", "hh_n_uni", "hh_n_working", "hh_n_none", "hh_n_total"]].mean()
df_household_state = df_household_state.round(2)
df_household_state

Unnamed: 0_level_0,Unnamed: 1_level_0,hh_n_kindergarten,hh_n_school,hh_n_uni,hh_n_working,hh_n_none,hh_n_total
state,job_status,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,,0.08,0.35,0.02,0.21,1.74,2.4
2,kindergarten_kid,1.35,0.59,0.08,1.3,0.68,4.0
2,pupil,0.21,1.94,0.03,0.97,1.03,4.18
2,student,0.12,0.16,1.0,0.67,0.26,2.22
2,working,0.19,0.41,0.07,1.49,0.27,2.43
8,,0.08,0.2,0.02,0.35,1.56,2.21
8,kindergarten_kid,1.53,0.63,0.03,1.4,0.58,4.17
8,pupil,0.24,2.01,0.03,1.47,0.53,4.29
8,student,0.06,0.29,1.19,1.07,0.3,2.9
8,working,0.16,0.45,0.06,1.71,0.29,2.68


In [13]:
print(df_household.to_latex())

\begin{tabular}{llrrrrrr}
\toprule
   &         &  hh\_n\_kindergarten &  hh\_n\_school &  hh\_n\_uni &  hh\_n\_working &  hh\_n\_none &  hh\_n\_total \\
state & job\_status &                    &              &           &               &            &             \\
\midrule
2  & None &               0.08 &         0.35 &      0.02 &          0.21 &       1.74 &        2.40 \\
   & kindergarten\_kid &               1.35 &         0.59 &      0.08 &          1.30 &       0.68 &        4.00 \\
   & pupil &               0.21 &         1.94 &      0.03 &          0.97 &       1.03 &        4.18 \\
   & student &               0.12 &         0.16 &      1.00 &          0.67 &       0.26 &        2.22 \\
   & working &               0.19 &         0.41 &      0.07 &          1.49 &       0.27 &        2.43 \\
8  & None &               0.08 &         0.20 &      0.02 &          0.35 &       1.56 &        2.21 \\
   & kindergarten\_kid &               1.53 &         0.63 &      0.03 &       

In [17]:
df_household = df.groupby(["job_status"])[["hh_n_total", "hh_n_kindergarten", "hh_n_school", "hh_n_uni", "hh_n_working", "hh_n_none", ]].mean()
df_household = df_household.round(2)

job_status,None,kindergarten_kid,pupil,student,working
hh_n_total,2.27,3.85,4.09,2.47,2.46
hh_n_kindergarten,0.08,1.34,0.22,0.07,0.15
hh_n_school,0.24,0.6,1.88,0.24,0.4
hh_n_uni,0.02,0.03,0.03,1.1,0.04
hh_n_working,0.34,1.24,1.27,0.76,1.55
hh_n_none,1.59,0.64,0.69,0.3,0.31


In [16]:
print(df_household.to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} &  hh\_n\_kindergarten &  hh\_n\_school &  hh\_n\_uni &  hh\_n\_working &  hh\_n\_none &  hh\_n\_total \\
job\_status       &                    &              &           &               &            &             \\
\midrule
None             &               0.08 &         0.24 &      0.02 &          0.34 &       1.59 &        2.27 \\
kindergarten\_kid &               1.34 &         0.60 &      0.03 &          1.24 &       0.64 &        3.85 \\
pupil            &               0.22 &         1.88 &      0.03 &          1.27 &       0.69 &        4.09 \\
student          &               0.07 &         0.24 &      1.10 &          0.76 &       0.30 &        2.47 \\
working          &               0.15 &         0.40 &      0.04 &          1.55 &       0.31 &        2.46 \\
\bottomrule
\end{tabular}



In [None]:
df.groupby(["state", "student"])["hh_sum_work_hours"].mean()

state  student
2      0           6.099447
       1           8.174459
8      0           8.053618
       1          10.058744
9      0           7.554268
       1           8.974123
10     0           6.908731
       1           6.394527
Name: hh_sum_work_hours, dtype: float64

In [None]:
df.groupby(["state", "school"])["hh_sum_school"].mean()

state  school
2      0         0.370787
       1         1.946818
8      0         0.346102
       1         1.997435
9      0         0.303203
       1         1.828299
10     0         0.313653
       1         1.713296
Name: hh_sum_school, dtype: float64