In [None]:
import pandas as pd
import random
from collections import defaultdict

# parameters
SIM_POPULATION_SIZE = 10000  #total number of agents
MAX_SIMULATION_TIME = 50    #years for simulation
ADULT_START_AGE = 10       #starting age for sexual activity
MAX_LIVING_AGE = 100        #max age an agent can be --> then replaced by a younger agent -- prob need to change this logic

def map_age_group_to_age(age_group, max_live_age=MAX_LIVING_AGE):
    if 'Under 5 years' == age_group:
        return random.randint(0, 4)
    elif '85 years and over' == age_group:
        return random.randint(85, max_live_age)
    else:
        # Handles "X to Y years" format
        try:
            parts = age_group.split(' to ')
            start_age = int(parts[0])
            end_age_str = parts[1].replace(' years', '')
            end_age = int(end_age_str)
            return random.randint(start_age, end_age)
        except:
            return 30 # Default if parsing fails

# load csv data
df_pop = pd.read_csv("/Users/haileytoeppner/Desktop/ga_age_sex_cleaned.csv") #change to accurate file path

# combine for weighted sampling
df_pop_male = df_pop[['Age Group', 'Male_Estimate']].rename(columns={'Male_Estimate': 'Population'})
df_pop_male['Gender'] = 'Male'
df_pop_female = df_pop[['Age Group', 'Female_Estimate']].rename(columns={'Female_Estimate': 'Population'})
df_pop_female['Gender'] = 'Female'
df_pop_combined = pd.concat([df_pop_male, df_pop_female])

# weights for the initial population
df_pop_combined['Weight'] = df_pop_combined['Population'] / df_pop_combined['Population'].sum()
df_pop_combined['Age_Sex_Tuple'] = list(zip(df_pop_combined['Age Group'], df_pop_combined['Gender']))

# defining the agent class
class Agent:
    def __init__(self, agent_id, age_group, sex):
        self.id = agent_id
        self.age_group = age_group
        self.sex = sex
        self.age = map_age_group_to_age(age_group)
        self.hpv_status = 'Susceptible' # S, I, C are the core states
        self.is_sexually_active = self.age >= ADULT_START_AGE
        self.partners = set()
        ### CHANGE in phase 2 ###
        self.infection_duration = 0

    def infect(self):
        if self.hpv_status == 'Susceptible':
            self.hpv_status = 'Infected'
            self.infection_duration = 0

    def update_status(self):
        # ### CHANGE in phase 2 ### placeholder for disease progression
        self.infection_duration += 1
        pass

    def check_activity(self):
        self.is_sexually_active = self.age >= ADULT_START_AGE and self.age < 60

#model class
class HPVSimModel:
    def __init__(self, pop_size, pop_data):
        self.pop_size = pop_size
        self.schedule = []
        self.pop_data = pop_data
        self.time = 0
        self.results = defaultdict(list)
        self._initialize_population(pop_data)

    def _initialize_population(self, pop_data):
        age_sex_samples = random.choices(
            pop_data['Age_Sex_Tuple'].tolist(),
            weights=pop_data['Weight'].tolist(),
            k=self.pop_size
        )

        for i, (age_group, sex) in enumerate(age_sex_samples):
            agent = Agent(i, age_group, sex)
            self.schedule.append(agent)

            # 1% of sexually active agents start infected - will need to change later on by age group
            if agent.is_sexually_active and random.random() < 0.01:
                agent.hpv_status = 'Infected'

    def _track_results(self):
        status_counts = defaultdict(int)
        for agent in self.schedule:
            status_counts[agent.hpv_status] += 1

        self.results['Time'].append(self.time)
        self.results['Total_Population'].append(self.pop_size)
        self.results['Susceptible'].append(status_counts.get('Susceptible', 0))
        self.results['Infected'].append(status_counts.get('Infected', 0))
        self.results['Cleared'].append(status_counts.get('Cleared', 0))

    def _update_demographics(self):
        HIGH_RISK_DEATH_PROB = 0.05  # 5% chance of death per year for agents over 80

        for i in range(self.pop_size):
            agent = self.schedule[i]
            agent.age += 1
            agent.check_activity()

            # death due to old age by 5%
            if agent.age >= 80 and random.random() < HIGH_RISK_DEATH_PROB:
                must_replace = True
            # die after max age reached
            elif agent.age > MAX_LIVING_AGE:
                must_replace = True
            else:
                must_replace = False

            if must_replace:
                age_group, sex = random.choices(
                    self.pop_data['Age_Sex_Tuple'].tolist(),
                    weights=self.pop_data['Weight'].tolist(),
                    k=1
                )[0]

                self.schedule[i] = Agent(agent.id, age_group, sex)
                self.schedule[i].age = random.randint(0, 4)
                self.schedule[i].is_sexually_active = False

    def step(self):
        self.time += 1

        # PHASE 1: Aging and Replacement
        self._update_demographics()

        # PHASE 2 Placeholder: Network and Transmission
        # (This is where the infection will spread)

        # PHASE 3 Placeholder: Disease Progression
        # (This is where infection will clear)

        self._track_results()

    def run_model(self):
        self._track_results()
        for _ in range(MAX_SIMULATION_TIME):
            self.step()

        return pd.DataFrame(self.results)

if __name__ == '__main__':
    model = HPVSimModel(pop_size=SIM_POPULATION_SIZE, pop_data=df_pop_combined)
    results_df = model.run_model()

    print("phase 1 complete")
    print(results_df.head(MAX_SIMULATION_TIME).to_markdown(index=False))

MAX_SIMULATION_TIME

phase 1 complete
|   Time |   Total_Population |   Susceptible |   Infected |   Cleared |
|-------:|-------------------:|--------------:|-----------:|----------:|
|      0 |              10000 |          9919 |         81 |         0 |
|      1 |              10000 |          9919 |         81 |         0 |
|      2 |              10000 |          9920 |         80 |         0 |
|      3 |              10000 |          9920 |         80 |         0 |
|      4 |              10000 |          9920 |         80 |         0 |
|      5 |              10000 |          9921 |         79 |         0 |
|      6 |              10000 |          9921 |         79 |         0 |
|      7 |              10000 |          9921 |         79 |         0 |
|      8 |              10000 |          9922 |         78 |         0 |
|      9 |              10000 |          9922 |         78 |         0 |
|     10 |              10000 |          9922 |         78 |         0 |
|     11 |              10000 |   

50