In [15]:
# Global configuration for the MHA data pipeline

DATA_PATH = "..\\Proposal\\Sample_Data.xlsx"

# Example endpoint for a DHIS2 server (placeholder)
DHIS2_BASE_URL = "https://hmis.health.go.ug/"
DHIS2_USERNAME = "your_username"
DHIS2_PASSWORD = "your_password"

REPORT_OUTPUT_DIR = "reports/"

In [None]:
#Load immigration health assessment dataset from CSV or Excel.

import pandas as pd
#from config import DATA_PATH
#import DATA_PATH


def load_mha_data(path: str = DATA_PATH) -> pd.DataFrame:
    
    try:
        if path.endswith(".xlsx"):
            df = pd.read_excel(path)
            print("[INFO] Data loaded successfully.")
            #print(df.info())
        else:
            df = pd.read_csv(path)
            return df
    except Exception as e:
        print(f"[ERROR] Failed to load data: {e}")
        return pd.DataFrame()

In [19]:
#processing/Clean and standardize health assessment data.

import pandas as pd

def clean_mha_data(df: pd.DataFrame) -> pd.DataFrame:
    
    # Standardize column names
    df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

    # Remove duplicates
    df = df.drop_duplicates(subset=["passport_number", "full_name"], keep="last")

    # Handle missing values
    df["age"] = df["age"].fillna(df["age"].median())
    df["gender"] = df["gender"].fillna("Unknown")

    # Convert dates
    date_cols = ["screening_date", "dob"]
    for col in date_cols:
        df[col] = pd.to_datetime(df[col], errors="coerce")

    # Normalize disease results
    disease_cols = ["tb_result", "hiv_result"]
    for col in disease_cols:
        df[col] = df[col].str.title().replace({"Positive": 1, "Negative": 0})

    print("[INFO] Data cleaned and standardized.")
    return df


In [21]:
#processing/analysis
 # Create automated analytics for MHA reporting.

import pandas as pd

def generate_health_indicators(df: pd.DataFrame) -> dict:
    
    indicators = {}

    indicators["total_screened"] = len(df)
    indicators["tb_positive_rate"] = df["tb_result"].mean()
    indicators["hiv_positive_rate"] = df["hiv_result"].mean()
    indicators["children_under_18"] = (df["age"] < 18).sum()

    # Country-specific breakdowns
    indicators["screened_by_nationality"] = (
        df.groupby("nationality")["passport_number"].count().to_dict()
    )

    # Gender-based prevalence
    indicators["tb_by_gender"] = (
        df.groupby("gender")["tb_result"].mean().to_dict()
    )

    print("[INFO] Analytics generated.")
    return indicators


In [23]:
#reporting/generate_reports
#Save computed indicators to a JSON report.

import json
import os
#from config import REPORT_OUTPUT_DIR

def save_report(indicators: dict, report_name: str = "mha_report.json"):

    os.makedirs(REPORT_OUTPUT_DIR, exist_ok=True)
    path = os.path.join(REPORT_OUTPUT_DIR, report_name)

    with open(path, "w") as f:
        json.dump(indicators, f, indent=4)

    print(f"[INFO] Report saved: {path}")


In [24]:
#integration/dhis2
#Mock function for integrating with Uganda national HIS (DHIS2 or similar).

import requests
from requests.auth import HTTPBasicAuth
#from config import DHIS2_BASE_URL, DHIS2_USERNAME, DHIS2_PASSWORD

def send_indicator_to_dhis2(indicators: dict):

    endpoint = DHIS2_BASE_URL + "iha-indicators"

    try:
        response = requests.post(
            endpoint,
            json=indicators,
            auth=HTTPBasicAuth(DHIS2_USERNAME, DHIS2_PASSWORD)
        )
        if response.status_code == 200:
            print("[INFO] Indicators successfully sent to DHIS2.")
        else:
            print(f"[ERROR] DHIS2 returned status: {response.status_code}")
            print(response.text)
    except Exception as e:
        print(f"[ERROR] Failed to send data to DHIS2: {e}")


In [26]:
#main
""" 
from ingestion.ingest import load_mha_data
from processing.cleaning import clean_mha_data
from processing.analysis import generate_health_indicators
from reporting.generate_reports import save_report
from integration.dhis2_api import send_indicator_to_dhis2

"""

def main():
    print("=== Migration Health Assessment Data Pipeline ===")

    # Step 1: Load data
    df = load_mha_data()

    # Step 2: Clean and process
    df_clean = clean_mha_data(df)

    # Step 3: Generate indicators
    indicators = generate_health_indicators(df_clean)

    # Step 4: Save report
    save_report(indicators)

    # Step 5: Integrate with national HIS
    send_indicator_to_dhis2(indicators)

    print("=== Pipeline complete ===")

if __name__ == "__main__":
    main()


=== Migration Health Assessment Data Pipeline ===
[INFO] Data loaded successfully.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   passport_number  3 non-null      object        
 1   full_name        3 non-null      object        
 2   age              3 non-null      int64         
 3   gender           3 non-null      object        
 4   natinality       3 non-null      object        
 5   screening_date   3 non-null      datetime64[ns]
 6   dob              3 non-null      datetime64[ns]
 7   tb_results       3 non-null      object        
 8   hiv_result       3 non-null      object        
dtypes: datetime64[ns](2), int64(1), object(6)
memory usage: 348.0+ bytes
None


AttributeError: 'NoneType' object has no attribute 'columns'

#How This Implements the Proposal

| Proposal Objective                                         | Implementation in Code                          |
| ---------------------------------------------------------- | ----------------------------------------------- |
| **Data pipeline for collection, cleaning, and processing** | `ingest.py`, `cleaning.py`                      |
| **Automated reporting**                                    | `analysis.py`, `generate_reports.py`            |
| **Integration with national systems**                      | `dhis2_api.py` simulates DHIS2 interoperability |
| **Scalable, modular architecture**                         | Directory structure and modular functions       |


#Project Structure 

iha_pipeline/
│
├── config.py
├── main.py
│
├── data/
│   └── sample_iha_data.csv
│
├── ingestion/
│   └── ingest.py
│
├── processing/
│   ├── cleaning.py
│   └── analysis.py
│
├── reporting/
│   └── generate_reports.py
│
└── integration/
    └── dhis2_api.py
