In [59]:
import pandas as pd
import pdfplumber

In [60]:
def parse_projected_population_table(table: [[]]) -> pd.DataFrame:
    pops = pd.DataFrame(
        table[1:],
        columns=table[0],
    )
    pops["Population"] = pops["Population"].str.replace(",", "")
    return pops.astype({"Year": "int", "Population": "int"})


def parse_projected_population_comp_table(table: [[]]) -> pd.DataFrame:
    table[0] = [t.replace("\n", " ") for t in table[0]]
    pop_comp = pd.DataFrame(
        table[1:],
        columns=table[0],
    )
    pop_comp["Actual Population"] = pop_comp["Actual Population"].str.replace(
        ",", ""
    )
    pop_comp["Population Projection"] = pop_comp[
        "Population Projection"
    ].str.replace(",", "")
    pop_comp["Difference"] = pop_comp["Difference"].str.replace(",", "")
    pop_comp["Percent Difference"] = pop_comp["Percent Difference"].str.replace(
        "%", ""
    )
    return pop_comp.astype(
        {
            "Year": "int",
            "Actual Population": "int",
            "Population Projection": "int",
            "Difference": "int",
            "Percent Difference": "float",
        }
    )

In [61]:
# January 2024
with pdfplumber.open("./data/IDOC-Quarterly-Report-January-2024.pdf") as pdf:
    projected_pop = parse_projected_population_table(
        pdf.pages[41].extract_table()
    )
    print(projected_pop.head())
    projected_pop_comp = parse_projected_population_comp_table(
        pdf.pages[42].extract_table()
    )
    print(projected_pop_comp.head())

      Month  Year  Population
0   January  2024       29863
1  February  2024       29846
2     March  2024       29830
3     April  2024       29813
4       May  2024       29796
       Month  Year  Actual Population  Population Projection  Difference  \
0  September  2022              29647                  29625          22   
1    October  2022              29772                  29749          23   
2   November  2022              29698                  29675          23   
3   December  2022              29634                  29612          22   
4    January  2023              29667                  29647          20   

   Percent Difference  
0                0.08  
1                0.08  
2                0.08  
3                0.08  
4                0.07  


In [62]:
# April 2024
with pdfplumber.open("./data/IDOC-Quarterly-Report-April-2024.pdf") as pdf:
    projected_pop = parse_projected_population_table(
        pdf.pages[41].extract_table()
    )
    print(projected_pop.head())
    projected_pop_comp = parse_projected_population_comp_table(
        pdf.pages[42].extract_table()
    )
    print(projected_pop_comp.head())

    Month  Year  Population
0   April  2024       29221
1     May  2024       29205
2    June  2024       29188
3    July  2024       29172
4  August  2024       29156
      Month  Year  Actual Population  Population Projection  Difference  \
0  December  2022              29634                  29612          22   
1   January  2023              29667                  29647          20   
2  February  2023              29620                  29600          20   
3     March  2023              29799                  29779          20   
4     April  2023              29824                  29804          20   

   Percent Difference  
0                0.08  
1                0.07  
2                0.07  
3                0.07  
4                0.07  


In [63]:
# July 2024
with pdfplumber.open("./data/IDOC-Quarterly-Report-July-2024.pdf") as pdf:
    projected_pop = parse_projected_population_table(
        pdf.pages[41].extract_table()
    )
    print(projected_pop.head())
    projected_pop_comp = parse_projected_population_comp_table(
        pdf.pages[42].extract_table()
    )
    print(projected_pop_comp.head())

       Month  Year  Population
0       July  2024       28977
1     August  2024       28961
2  September  2024       28945
3    October  2024       28929
4   November  2024       28913
   Month  Year  Actual Population  Population Projection  Difference  \
0  March  2023              29799                  29779          20   
1  April  2023              29824                  29804          20   
2    May  2023              29910                  29890          20   
3   June  2023              30062                  30042          20   
4   July  2023              30034                  30014          20   

   Percent Difference  
0                0.07  
1                0.07  
2                0.07  
3                0.07  
4                0.07  


In [64]:
# October 2024
with pdfplumber.open("./data/IDOC-Quarterly-Report-October-2024.pdf") as pdf:
    projected_pop = parse_projected_population_table(
        pdf.pages[41].extract_table()
    )
    print(projected_pop.head())
    projected_pop_comp = parse_projected_population_comp_table(
        pdf.pages[42].extract_table()
    )
    print(projected_pop_comp.head())

      Month  Year  Population
0   October  2024       28957
1  November  2024       28941
2  December  2024       28925
3   January  2025       28912
4   Feruary  2025       28899
       Month  Year  Actual Population  Population Projection  Difference  \
0       June  2023              30062                  30042          20   
1       July  2023              30034                  30014          20   
2     August  2023              30204                  30184          20   
3  September  2023              30278                  30258          20   
4    October  2023              30084                  30064          20   

   Percent Difference  
0                0.07  
1                0.07  
2                0.07  
3                0.07  
4                0.07  
