**MILESTONE-1**

In [2]:
import easyocr
import cv2
import pandas as pd
from IPython.display import display

In [5]:
csv_path = "diabetes.csv" 
df = pd.read_csv(csv_path)  

In [4]:
print("Diabetes Dataset (First 5 Rows):")
display(df.head())

Diabetes Dataset (First 5 Rows):


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
# Load image
image_path = "medical_report.png"
image = cv2.imread(image_path)

In [6]:
reader = easyocr.Reader(['en'])

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


In [8]:
results = reader.readtext(image)

In [9]:
print("\nExtracted Text from Medical Report:\n")
for bbox, text, confidence in results:
    print(f"{text} (Confidence: {confidence:.2f})")


Extracted Text from Medical Report:

AIG (Confidence: 0.98)
HOSPITALS (Confidence: 0.90)
VAA H (Confidence: 0.13)
LABORATORY INVESTIGATION REPORT (Confidence: 0.91)
Patientl (Confidence: 0.60)
UHID (Confidence: 0.60)
AIGG,20825010 (Confidence: 0.77)
O0;17 (Confidence: 0.30)
Episode (Confidence: 1.00)
IP (Confidence: 0.96)
113160 (Confidence: 0.85)
806 (Confidence: 0.57)
Ref: Doctor (Confidence: 0.91)
Dr; ARABIND PANDA (Confidence: 0.82)
Facility (Confidence: 1.00)
AIG Hospitals; Gachibowl (Confidence: 0.63)
Haematology (Confidence: 1.00)
TEST (Confidence: 0.85)
RESULT (Confidence: 1.00)
UNIT (Confidence: 0.97)
BIOLOGICAL PEF INTERVAL (Confidence: 0.85)
Sumple %0 (Confidence: 0.37)
AG103291J4 (Confidence: 0.21)
Ccllectlon (Confidence: 0.85)
18/04,25 04 53 (Confidence: 0.59)
Ack Date (Confidence: 0.89)
18,0412025 (Confidence: 0.14)
10.59 (Confidence: 0.68)
Ruport Date (Confidence: 0.30)
18/04,25 1J*42 (Confidence: 0.22)
CBP (COMPLETE BLOOD PICTURE) (Confidence: 0.86)
Sarple Typo - (Conf

**MILESTONE-2**

**XG BOOST**

In [15]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

In [16]:
df = pd.read_csv('diabetes.csv')
print("Diabetes Dataset (First 5 Rows):")
display(df.head())

cols_to_fix = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
for col in cols_to_fix:
    median_val = df[df[col] != 0][col].median()
    df[col] = df[col].replace(0, median_val)

Diabetes Dataset (First 5 Rows):


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [17]:
X = df.drop('Outcome', axis=1)
y = df['Outcome']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

In [18]:
model = XGBClassifier(
    n_estimators=100,
    max_depth=3,           
    learning_rate=0.05,    
    reg_alpha=1,           
    reg_lambda=1.5,       
    subsample=0.7,         
    colsample_bytree=0.7,  
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)

model.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.7
,device,
,early_stopping_rounds,
,enable_categorical,False


In [20]:
train_preds = model.predict(X_train)
test_preds = model.predict(X_test)

print(f"Training Accuracy: {accuracy_score(y_train, train_preds) * 100:.2f}%")
print(f"Testing Accuracy:  {accuracy_score(y_test, test_preds) * 100:.2f}%")
print("\n--- Classification Report ---")
print(classification_report(y_test, test_preds))

Training Accuracy: 84.36%
Testing Accuracy:  75.32%

--- Classification Report ---
              precision    recall  f1-score   support

           0       0.79      0.84      0.82       100
           1       0.67      0.59      0.63        54

    accuracy                           0.75       154
   macro avg       0.73      0.72      0.72       154
weighted avg       0.75      0.75      0.75       154



In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

style = {'description_width': 'initial'}

# Inputs for all 8 features
inputs = {
    'Pregnancies': widgets.IntText(value=0, description="Pregnancies:", style=style),
    'Glucose': widgets.IntText(value=100, description="Glucose (mg/dL):", style=style),
    'BloodPressure': widgets.IntText(value=70, description="Blood Pressure:", style=style),
    'SkinThickness': widgets.IntText(value=20, description="Skin Thickness:", style=style),
    'Insulin': widgets.IntText(value=80, description="Insulin:", style=style),
    'BMI': widgets.FloatText(value=25.0, description="BMI:", style=style),
    'DPF': widgets.FloatText(value=0.47, description="Pedigree Function:", style=style),
    'Age': widgets.IntText(value=30, description="Age:", style=style)
}

btn = widgets.Button(description="Check Alerts", button_style='warning')
output = widgets.Output()

#THRESHOLD ALERT FUNCTION
def check_thresholds(b):
    with output:
        clear_output()
        print("="*35)
        print("       MEDICAL RISK ALERTS      ")
        print("="*35)
        
        any_alert = False

        # Glucose Logic
        if inputs['Glucose'].value >= 126:
            print("❌ ALERT: High Glucose (Diabetic)")
            any_alert = True
        elif inputs['Glucose'].value >= 100:
            print("⚠️ WARNING: Elevated Glucose (Pre-diabetic)")
            any_alert = True

        # BMI Logic
        if inputs['BMI'].value >= 30:
            print("❌ ALERT: Obesity Detected")
            any_alert = True
        elif inputs['BMI'].value >= 25:
            print("⚠️ WARNING: Overweight")
            any_alert = True

        # Blood Pressure Logic
        if inputs['BloodPressure'].value >= 90:
            print("❌ ALERT: High Blood Pressure")
            any_alert = True

        if not any_alert:
            print("✅ Clinical values are within normal limits.")
        
        print("="*35)
        print("Note: Click again after updating values.")


btn.on_click(check_thresholds)
ui_layout = widgets.VBox(list(inputs.values()) + [btn])
display(ui_layout, output)

VBox(children=(IntText(value=0, description='Pregnancies:', style=DescriptionStyle(description_width='initial'…

Output()