### TB Data Processing

Divide into bins and count by Age group

In [10]:
library(openxlsx)
df <- read.xlsx("tests/bcdata.xlsx", startRow = 1)
breaks <- c(0, 1, 4, 14, 24, 34, 44, 54, 64, 74, Inf) 
labels <- c('Less than 1 year old', '1 to 4 years old', '5 to 14 years old', '15 to 24 years old', '25 to 34 years old', '35 to 44 years old', '45 to 54 years old', '55 to 64 years old', '65 to 74 years old', '75 years and older')
df$AgeGroup <- cut(df$Age, breaks = breaks, labels = labels, right = FALSE, include.lowest = TRUE)
age_counts <- table(df$AgeGroup)
age_counts <- age_counts[order(names(age_counts))]
case_counts <- as.data.frame(age_counts)
colnames(case_counts) <- c("Age.group", "ActiveCasesCount")
print(case_counts)

              Age.group ActiveCasesCount
1      1 to 4 years old                0
2    15 to 24 years old              129
3    25 to 34 years old              173
4    35 to 44 years old              151
5    45 to 54 years old              137
6     5 to 14 years old                0
7    55 to 64 years old              156
8    65 to 74 years old              143
9    75 years and older              111
10 Less than 1 year old                0


Using demographics data for rate of active cases per age group

In [11]:
total_counts = read.xlsx("tests/demographics_data.xlsx", startRow=1)
total_counts$Age.group <- sub("^\\d+ - ", "", total_counts$Age.group)

merged_data <- merge(case_counts, total_counts, by = "Age.group", all.x = TRUE, all.y = TRUE)

merged_data$IncidenceRate <- merged_data$ActiveCasesCount / merged_data$Population

print(merged_data)

              Age.group ActiveCasesCount     Population.group
1      1 to 4 years old                0 1 - Total Population
2    15 to 24 years old              129 1 - Total Population
3    25 to 34 years old              173 1 - Total Population
4    35 to 44 years old              151 1 - Total Population
5    45 to 54 years old              137 1 - Total Population
6     5 to 14 years old                0 1 - Total Population
7    55 to 64 years old              156 1 - Total Population
8    65 to 74 years old              143 1 - Total Population
9    75 years and older              111 1 - Total Population
10 Less than 1 year old                0 1 - Total Population
11             All ages               NA 1 - Total Population
   Place.of.residence            Sex        Date Population IncidenceRate
1          1 - Canada 1 - Both sexes July 1 2021      82741   0.000000000
2          1 - Canada 1 - Both sexes July 1 2021       1252   0.103035144
3          1 - Canada 1 - Both sex

Cross tabulate across gender and year of survey

In [12]:
crosstab <- xtabs( ~ Gender + Survey_Year, data = df)
print(crosstab)

      Survey_Year
Gender 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023
     F   32   34   28   28   29   18   26   30   31   21   35
     M   23   24   34   39   34   26   26   39   24   24   41
