<a href="https://colab.research.google.com/github/kssandraeshwar/trucs-personnels/blob/main/Vaccination%20Insights%20%26%20Age%20Prediction%20System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle

# Load Dataset
df = pd.read_csv("https://raw.githubusercontent.com/kssandraeshwar/trucs-personnels/refs/heads/main/opendata_covid19_vaccination_agegroup.csv")

# Select relevant columns for classification
df = df[['AgeGroup', 'VaccinationSeries', 'DailyCount', 'TotalCount', 'PopulationCoverage']]

# Drop missing values
df.dropna(inplace=True)

# Encode categorical variables
label_encoder = LabelEncoder()
df['AgeGroup'] = label_encoder.fit_transform(df['AgeGroup'])

# Define features and target variable
X = df[['VaccinationSeries', 'DailyCount', 'TotalCount', 'PopulationCoverage']]
y = df['AgeGroup']

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save the model for web integration
with open("age_group_classifier.pkl", "wb") as f:
    pickle.dump(model, f)


Accuracy: 0.49700710811821924
Classification Report:
               precision    recall  f1-score   support

           0       0.25      0.66      0.37      2959
           1       0.36      0.51      0.42      2859
           2       0.33      0.50      0.39      2905
           3       0.75      0.48      0.59      2960
           4       0.91      0.47      0.62      2957
           5       0.88      0.47      0.61      3001
           6       0.73      0.33      0.46      2865
           7       0.83      0.47      0.60      2888
           8       0.84      0.45      0.59      2869
           9       0.38      0.65      0.48      2924
          10       0.94      0.48      0.63      2889

    accuracy                           0.50     32076
   macro avg       0.65      0.50      0.52     32076
weighted avg       0.65      0.50      0.52     32076



In [2]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(max_depth=10, min_samples_split=5, random_state=42)
dt_model.fit(X_train, y_train)


In [3]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=200, max_depth=15, random_state=42)
rf_model.fit(X_train, y_train)


In [4]:
from xgboost import XGBClassifier

xgb_model = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=10)
xgb_model.fit(X_train, y_train)


In [7]:
from imblearn.over_sampling import SMOTE

smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)
print(X_resampled, y_resampled)


        VaccinationSeries  DailyCount  TotalCount  PopulationCoverage
0                      10           0           0            0.000000
1                       2           0        1736            5.920000
2                       8           0           0            0.000000
3                       8           0           0            0.000000
4                       1           0       16199           55.220000
...                   ...         ...         ...                 ...
128926                  4           0           0            0.000000
128927                  4           0           2            0.000000
128928                  7           0           0            0.000000
128929                  4           9        1044            1.307917
128930                  8           0           0            0.000000

[128931 rows x 4 columns] 0          1
1          2
2         10
3          4
4          2
          ..
128926    10
128927    10
128928    10
128929    10
128

In [10]:
from sklearn.metrics import classification_report, accuracy_score

models = {
    "Decision Tree": dt_model,
    "Random Forest": rf_model,
    "XGBoost": xgb_model
}

for name, model in models.items():
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred))

from sklearn.metrics import accuracy_score

models = {
    "Decision Tree": dt_model,
    "Random Forest": rf_model,
    "XGBoost": xgb_model
}

best_model_name = None
best_accuracy = 0
best_model = None

for name, model in models.items():
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.4f}")

    if acc > best_accuracy:  # Select the highest accuracy model
        best_accuracy = acc
        best_model = model
        best_model_name = name

print(f"\nBest Model: {best_model_name} with Accuracy: {best_accuracy:.4f}")



Decision Tree Accuracy: 0.2927
              precision    recall  f1-score   support

           0       0.57      0.20      0.29      2959
           1       0.12      0.95      0.22      2859
           2       1.00      0.24      0.38      2905
           3       1.00      0.16      0.27      2960
           4       0.63      0.19      0.30      2957
           5       0.32      0.37      0.34      3001
           6       0.91      0.13      0.23      2865
           7       0.99      0.24      0.38      2888
           8       0.87      0.22      0.35      2869
           9       0.98      0.26      0.42      2924
          10       1.00      0.27      0.42      2889

    accuracy                           0.29     32076
   macro avg       0.76      0.29      0.33     32076
weighted avg       0.76      0.29      0.33     32076

Random Forest Accuracy: 0.4869
              precision    recall  f1-score   support

           0       0.25      0.66      0.37      2959
           1    

In [13]:
import pickle

with open("best_model.pkl", "wb") as f:
    pickle.dump(best_model, f)
import pickle

# Save the best model
with open("best_model.pkl", "wb") as f:
    pickle.dump(best_model, f)

print("Model saved as best_model.pkl")



Model saved as best_model.pkl


In [14]:
!pip install flask flask-ngrok


Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25


In [None]:
from flask import Flask, request, jsonify
import pickle
import numpy as np
from flask_ngrok import run_with_ngrok

# Load the trained model
with open("best_model.pkl", "rb") as f:
    model = pickle.load(f)

app = Flask(__name__)
run_with_ngrok(app)  # Start ngrok when the app runs

@app.route("/predict", methods=["POST"])
def predict():
    data = request.json  # Receive JSON input
    features = np.array([
        data["VaccinationSeries"],
        data["DailyCount"],
        data["TotalCount"],
        data["PopulationCoverage"]
    ]).reshape(1, -1)

    prediction = model.predict(features)[0]

    return jsonify({"Predicted Age Group": int(prediction)})

if __name__ == "__main__":
    app.run()


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
Exception in thread Thread-8:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/urllib3/connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/urllib3/util/connection.py", line 85, in create_connection
    raise err
  File "/usr/local/lib/python3.11/dist-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/urllib3/connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/urllib3/connectionpool.py", line 493, in _make_reques