In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf

# For inline plots in Jupyter
%matplotlib inline
import pandas as pd
import matplotlib as plt
import glob
import os
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from tqdm import tqdm
import codecs
import csv
import openpyxl

In [2]:
data_directory_census = "/home/paulharford/college/project/project_data/processed/WEATHERED_census_pop_age_grp_gender_region.csv"
full_path_census = os.path.abspath(data_directory_census)

In [3]:
df_census = pd.read_csv(full_path_census)

In [4]:
df_census.head()

Unnamed: 0,region,date,age_group,female,male,total,year
0,HSE Dublin and Midlands,2014-01-01,60 - 64,22999,23073,46072,2014
1,HSE Dublin and Midlands,2014-01-02,60 - 64,22999,23073,46072,2014
2,HSE Dublin and Midlands,2014-01-03,60 - 64,22999,23073,46072,2014
3,HSE Dublin and Midlands,2014-01-04,60 - 64,22999,23073,46072,2014
4,HSE Dublin and Midlands,2014-01-05,60 - 64,22999,23073,46072,2014


In [5]:
df_census.rename(columns={"Male": "male"}, inplace=True)

In [6]:
df_census.rename(columns={"Female": "female"}, inplace=True)

In [7]:
import pandas as pd
import plotly.express as px
from shiny import App, ui, reactive, render
from shiny import App, ui, reactive
from shinywidgets import output_widget, render_widget  #

In [8]:
# 1) Define a user interface
app_ui = ui.page_fluid(
    ui.h2("Census data in Ireland per HSE health region"),
    ui.layout_sidebar(
        ui.sidebar(
            ui.input_select(
                "region_select",
                "Choose a region:",
                choices=sorted(df_census["region"].unique()),
                selected=sorted(df_census["region"].unique())[0]
            ),
            ui.input_slider(
                "year_range",
                "Select Year Range:",
                min=df_census["year"].min(),
                max=df_census["year"].max(),
                value=(df_census["year"].min(), df_census["year"].max()),
                step=1
            ),
            # Add filters for Age_Range
            ui.input_checkbox_group(
                "age_range_select",
                "Select Age Ranges:",
                choices=sorted(df_census["age_group"].unique()),
                selected=sorted(df_census["age_group"].unique())
            ),
            # Add filter for Gender
            ui.input_checkbox_group(
                "gender_select",
                "Select Gender:",
                choices=["male", "female", "total"],
                selected=["total"]
            ),
            # Add a visualization type selector
            ui.input_radio_buttons(
                "viz_type",
                "Visualization Type:",
                choices=["Total Population", "By Age Range", "By Gender"],
                selected="Total Population"
            )
        ),
        output_widget("census_plot")
    )
)

# 2) Define the server setup
def server(input, output, session):
    @reactive_Calc
    def filtered_data():
        selected_region = input.region_select()
        year_min, year_max = input.year_range()
        selected_age_ranges = input.age_range_select()
        
        # Filter by region, year, and age group
        dff = df_census[
            (df_census["region"] == selected_region) &
            (df_census["year"] >= year_min) &
            (df_census["year"] <= year_max) &
            (df_census["age_group"].isin(selected_age_ranges))
        ]
        
        return dff
    
    @render_widget
    def census_plot():
        dff = filtered_data()
        viz_type = input.viz_type()
        selected_genders = input.gender_select()
        
        if dff.empty:
            # Return a blank figure or indicate no data
            fig = px.line(title="No data available for selection.")
            return fig
        
        if viz_type == "Total Population":
            # Prepare data based on gender selection
            if "total" in selected_genders and len(selected_genders) == 1:
                # Use the total column
                pop_by_year = dff.groupby("year")["total"].sum().reset_index(name="Population")
            else:
                # Use specific gender columns
                gender_cols = []
                if "male" in selected_genders:
                    gender_cols.append("male")
                if "female" in selected_genders:
                    gender_cols.append("female")
                
                # Sum the selected gender columns
                pop_by_year = dff.groupby("year")[gender_cols].sum().reset_index()
                pop_by_year["Population"] = pop_by_year[gender_cols].sum(axis=1)
            
            fig = px.line(
                pop_by_year,
                x="year",
                y="Population",
                title=f"Population in {input.region_select()}",
                markers=True
            )
            
        elif viz_type == "By Age Range":
            # Prepare data for age range visualization
            if "total" in selected_genders and len(selected_genders) == 1:
                # Use total population by age group
                age_counts = dff.groupby(["year", "age_group"])["total"].sum().reset_index(name="Population")
            else:
                # Use specific gender columns
                gender_cols = []
                if "male" in selected_genders:
                    gender_cols.append("male")
                if "female" in selected_genders:
                    gender_cols.append("female")
                
                # Sum the selected gender columns for each age group
                age_counts = dff.groupby(["year", "age_group"])[gender_cols].sum().reset_index()
                age_counts["Population"] = age_counts[gender_cols].sum(axis=1)
            
            fig = px.line(
                age_counts,
                x="year",
                y="Population",
                color="age_group",
                title=f"Population by Age Range in {input.region_select()}",
                markers=True
            )
            
        elif viz_type == "By Gender":
            # Only proceed if there are multiple gender selections
            if len(selected_genders) > 1 and "total" not in selected_genders:
                # Create a long-form dataframe for gender comparison
                gender_data = []
                
                for gender in selected_genders:
                    yearly_data = dff.groupby("year")[gender].sum().reset_index()
                    yearly_data["Gender"] = gender.capitalize()  # Capitalize for display
                    yearly_data.rename(columns={gender: "Population"}, inplace=True)
                    gender_data.append(yearly_data)
                
                gender_df = pd.concat(gender_data)
                
                fig = px.line(
                    gender_df,
                    x="year",
                    y="Population",
                    color="Gender",
                    title=f"Population by Gender in {input.region_select()}",
                    markers=True
                )
            else:
                # Default to total population when Total is selected with other genders
                pop_by_year = dff.groupby("year")["total"].sum().reset_index(name="Population")
                
                fig = px.line(
                    pop_by_year,
                    x="year",
                    y="Population",
                    title=f"Total Population in {input.region_select()}",
                    markers=True
                )
        
        # Improve the figure layout
        fig.update_layout(
            xaxis_title="Year",
            yaxis_title="Population",
            legend_title_text="",
            template="plotly_white"
        )
            
        return fig

# 3) Create App object
app = App(app_ui, server)

In [9]:
import shiny
print(shiny.__version__)

1.2.1


In [None]:
import nest_asyncio
nest_asyncio.apply()

# Now you can run the app
app.run()


INFO:     Started server process [170791]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:43114 - "GET / HTTP/1.1" 200 OK


INFO:     ('127.0.0.1', 43128) - "WebSocket /websocket/" [accepted]
INFO:     connection open
Traceback (most recent call last):
  File "/home/paulharford/anaconda3/envs/msc/lib/python3.12/site-packages/shiny/session/_session.py", line 636, in _run
    self.app.server(self.input, self.output, self)
  File "/tmp/ipykernel_170791/4137266279.py", line 48, in server
    @reactive_Calc
     ^^^^^^^^^^^^^
NameError: name 'reactive_Calc' is not defined
name 'reactive_Calc' is not defined
INFO:     connection closed


INFO:     127.0.0.1:51090 - "GET / HTTP/1.1" 200 OK


INFO:     ('127.0.0.1', 51094) - "WebSocket /websocket/" [accepted]
INFO:     connection open
Traceback (most recent call last):
  File "/home/paulharford/anaconda3/envs/msc/lib/python3.12/site-packages/shiny/session/_session.py", line 636, in _run
    self.app.server(self.input, self.output, self)
  File "/tmp/ipykernel_170791/4137266279.py", line 48, in server
    @reactive_Calc
     ^^^^^^^^^^^^^
NameError: name 'reactive_Calc' is not defined
name 'reactive_Calc' is not defined
INFO:     connection closed
