In [1]:
!pip install pandas scikit-learn streamlit matplotlib

Collecting streamlit
  Using cached streamlit-1.50.0-py3-none-any.whl (10.1 MB)
Collecting protobuf<7,>=3.20
  Using cached protobuf-6.33.0-cp39-cp39-win_amd64.whl (436 kB)
Collecting typing-extensions<5,>=4.4.0
  Using cached typing_extensions-4.15.0-py3-none-any.whl (44 kB)
Collecting pydeck<1,>=0.8.0b4
  Using cached pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
Collecting tenacity<10,>=8.1.0
  Using cached tenacity-9.1.2-py3-none-any.whl (28 kB)
Collecting blinker<2,>=1.5.0
  Using cached blinker-1.9.0-py3-none-any.whl (8.5 kB)
Collecting numpy>=1.18.5
  Using cached numpy-2.0.2-cp39-cp39-win_amd64.whl (15.9 MB)
Collecting pyarrow>=7.0
  Using cached pyarrow-21.0.0-cp39-cp39-win_amd64.whl (26.2 MB)
Collecting gitpython!=3.1.19,<4,>=3.0.7
  Using cached gitpython-3.1.45-py3-none-any.whl (208 kB)
Collecting cachetools<7,>=4.0
  Using cached cachetools-6.2.1-py3-none-any.whl (11 kB)
Collecting altair!=5.4.0,!=5.4.1,<6,>=4.0
  Using cached altair-5.5.0-py3-none-any.whl (731 kB)
Collecting

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
daal4py 2021.6.0 requires daal==2021.4.0, which is not installed.
numba 0.55.1 requires numpy<1.22,>=1.18, but you have numpy 1.24.4 which is incompatible.


In [2]:
import pandas as pd

# Create a small sample dataset of citizen service requests
data = {
    "ticket_id": [1,2,3,4,5,6,7,8,9,10],
    "department": ["Health","Infrastructure","Health","Public Safety","Infrastructure",
                   "Health","Public Safety","Infrastructure","Health","Public Safety"],
    "category": ["Hospital Delay","Road Damage","Clinic Shortage","Street Light Failure",
                 "Water Leakage","Ambulance Delay","Garbage Issue","Bridge Crack",
                 "Medical Supply Shortage","Drain Blockage"],
    "severity": ["High","Medium","High","Low","High","High","Medium","High","High","Medium"],
    "citizen_rating": [2,4,1,5,3,2,4,3,2,3],
    "days_open": [10,5,15,2,8,12,6,9,11,7],
    "resolved": ["No","Yes","No","Yes","No","No","Yes","No","No","Yes"],
    "ward": [3,7,5,4,3,1,2,8,5,6]
}

# Convert dictionary to a DataFrame
df = pd.DataFrame(data)

# Show the dataset
df


Unnamed: 0,ticket_id,department,category,severity,citizen_rating,days_open,resolved,ward
0,1,Health,Hospital Delay,High,2,10,No,3
1,2,Infrastructure,Road Damage,Medium,4,5,Yes,7
2,3,Health,Clinic Shortage,High,1,15,No,5
3,4,Public Safety,Street Light Failure,Low,5,2,Yes,4
4,5,Infrastructure,Water Leakage,High,3,8,No,3
5,6,Health,Ambulance Delay,High,2,12,No,1
6,7,Public Safety,Garbage Issue,Medium,4,6,Yes,2
7,8,Infrastructure,Bridge Crack,High,3,9,No,8
8,9,Health,Medical Supply Shortage,High,2,11,No,5
9,10,Public Safety,Drain Blockage,Medium,3,7,Yes,6


Unnamed: 0,ticket_id,department,category,severity,citizen_rating,days_open,resolved,ward
0,1,Health,Hospital Delay,High,2,10,No,3
1,2,Infrastructure,Road Damage,Medium,4,5,Yes,7
2,3,Health,Clinic Shortage,High,1,15,No,5
3,4,Public Safety,Street Light Failure,Low,5,2,Yes,4
4,5,Infrastructure,Water Leakage,High,3,8,No,3
5,6,Health,Ambulance Delay,High,2,12,No,1
6,7,Public Safety,Garbage Issue,Medium,4,6,Yes,2
7,8,Infrastructure,Bridge Crack,High,3,9,No,8
8,9,Health,Medical Supply Shortage,High,2,11,No,5
9,10,Public Safety,Drain Blockage,Medium,3,7,Yes,6


In [4]:
df.to_csv("citizen_services.csv", index=False)
print("âœ… citizen_services.csv saved successfully in your current folder.")


âœ… citizen_services.csv saved successfully in your current folder.


In [5]:
# Step 3 â€” Predictive AI Model for Governance Platform

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import numpy as np

# Step 1: Load dataset
data = pd.read_csv("citizen_services.csv")
print("âœ… Dataset loaded successfully!")
display(data.head())

# Step 2: Privacy â€” Mask identifiers (simulate citizen data protection)
data['ticket_id'] = data['ticket_id'].apply(lambda x: f"ID{x:03}")

# Step 3: Encode categorical columns into numeric form
# Select relevant columns for model input
features = data[['department', 'category', 'severity', 'citizen_rating', 'days_open', 'ward']]
target = data['resolved'].apply(lambda x: 1 if x == 'Yes' else 0)  # 1=Resolved, 0=Unresolved

# One-hot encode categorical columns
X = pd.get_dummies(features, drop_first=True)

# Step 4: Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.3, random_state=42)

# Step 5: Train Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 6: Evaluate model
y_pred = model.predict(X_test)
print("\nðŸ“Š Model Evaluation Report:")
print(classification_report(y_test, y_pred))

# Step 7: Predict on full dataset
data['predicted_resolution'] = model.predict(X)
data['probability_resolved'] = model.predict_proba(X)[:, 1]

# Step 8: Create a Priority Score
data['priority_score'] = np.where(data['predicted_resolution'] == 0, 'High Priority', 'Normal')

# Step 9: Save predictions
data.to_csv("predicted_priorities.csv", index=False)
print("\nâœ… Predictions saved to predicted_priorities.csv")

# Step 10: Display results
display(data[['ticket_id', 'department', 'category', 'severity', 'resolved', 'priority_score']])


âœ… Dataset loaded successfully!


Unnamed: 0,ticket_id,department,category,severity,citizen_rating,days_open,resolved,ward
0,1,Health,Hospital Delay,High,2,10,No,3
1,2,Infrastructure,Road Damage,Medium,4,5,Yes,7
2,3,Health,Clinic Shortage,High,1,15,No,5
3,4,Public Safety,Street Light Failure,Low,5,2,Yes,4
4,5,Infrastructure,Water Leakage,High,3,8,No,3



ðŸ“Š Model Evaluation Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


âœ… Predictions saved to predicted_priorities.csv


Unnamed: 0,ticket_id,department,category,severity,resolved,priority_score
0,ID001,Health,Hospital Delay,High,No,High Priority
1,ID002,Infrastructure,Road Damage,Medium,Yes,Normal
2,ID003,Health,Clinic Shortage,High,No,High Priority
3,ID004,Public Safety,Street Light Failure,Low,Yes,Normal
4,ID005,Infrastructure,Water Leakage,High,No,High Priority
5,ID006,Health,Ambulance Delay,High,No,High Priority
6,ID007,Public Safety,Garbage Issue,Medium,Yes,Normal
7,ID008,Infrastructure,Bridge Crack,High,No,High Priority
8,ID009,Health,Medical Supply Shortage,High,No,High Priority
9,ID010,Public Safety,Drain Blockage,Medium,Yes,Normal
