In [8]:

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.impute import KNNImputer
import gradio as gr
import joblib

In [9]:
url="https://raw.githubusercontent.com/jbrownlee/Datasets/refs/heads/master/pima-indians-diabetes.data.csv"
column_names=['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age','Outcome']

df=pd.read_csv(url,names=column_names)

medical_col=['Glucose','BloodPressure','DiabetesPedigreeFunction','Insulin','BMI']
target_col='Outcome'
df_medical=df[medical_col+[target_col]].copy()
df[medical_col].to_csv('diabetes_data.csv',index=False)

print(df.shape)
print("value count",df['Outcome'].value_counts())
print("mean outcome",df.groupby('Outcome').mean())
print("medical solumns",df_medical.head())

#problem columns with value 0 in it
problem_col=[col for col in medical_col if (df[col]==0).any()]
print(f"problem columns:{problem_col}")

(768, 9)
value count Outcome
0    500
1    268
Name: count, dtype: int64
mean outcome          Pregnancies     Glucose  BloodPressure  SkinThickness     Insulin  \
Outcome                                                                      
0           3.298000  109.980000      68.184000      19.664000   68.792000   
1           4.865672  141.257463      70.824627      22.164179  100.335821   

               BMI  DiabetesPedigreeFunction        Age  
Outcome                                                  
0        30.304200                  0.429734  31.190000  
1        35.142537                  0.550500  37.067164  
medical solumns    Glucose  BloodPressure  DiabetesPedigreeFunction  Insulin   BMI  Outcome
0      148             72                     0.627        0  33.6        1
1       85             66                     0.351        0  26.6        0
2      183             64                     0.672        0  23.3        1
3       89             66                     0.1

In [10]:

df_medical=df[medical_col+[target_col]].copy()
for col in medical_col:
    df_medical[col]=df_medical[col].replace(0,np.nan)

imputer=KNNImputer(n_neighbors=5)
print("missing values before imputation:", df_medical[medical_col].isnull().sum().sum())
df_medical[medical_col]=imputer.fit_transform(df_medical[medical_col])
print("missing values after imputation:", df_medical[medical_col].isnull().sum().sum())

missing values before imputation: 425
missing values after imputation: 0


In [11]:
x=df_medical[medical_col]
y=df_medical[target_col]

x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.2,random_state=42,stratify=y)

scaler=StandardScaler()
x_train_scaled=scaler.fit_transform(x_train)
x_test_scaled=scaler.transform(x_test)
print(f"training set: {x_train.shape}, test set:{x_test.shape}")

training set: (614, 5), test set:(154, 5)


In [12]:
models={
    'Logistic Regression': LogisticRegression(),
    'KNN':KNeighborsClassifier(n_neighbors=5),
    'svm':svm.SVC()
}
results={}
for name,model in models.items():
    model.fit(x_train_scaled,y_train)
    y_pred=model.predict(x_test_scaled)
    accuracy=accuracy_score(y_test,y_pred)
    results[name]=accuracy
    print(f"{name}:{accuracy:.4f}")

best_model=max(results,key=results.get)
print(f"\nbest model:{best_model} with accuracy:{results[best_model]:.4f}")

best_algo=models[best_model]
joblib.dump(best_algo,'best_diabetes_model.pkl')
print(" Best model saved as 'best_diabetes_model.pkl'")

joblib.dump(scaler,'scaler.pkl')
print("Scaler saved as 'scaler.pkl'")



Logistic Regression:0.7273
KNN:0.7078
svm:0.7273

best model:Logistic Regression with accuracy:0.7273
 Best model saved as 'best_diabetes_model.pkl'
Scaler saved as 'scaler.pkl'


In [13]:
df_medical.to_csv('diabetes_medical_cleaned.csv',index=False)
print("cleaned medical data saved as 'diabetes-medical_cleaned.csv'")

cleaned medical data saved as 'diabetes-medical_cleaned.csv'


In [14]:
import joblib
best_algo=joblib.load('best_diabetes_model.pkl')
scaler=joblib.load('scaler.pkl')

def predict_diabetes(glucose, bp, dpf, insulin, bmi):
    features = np.array([[glucose, bp, dpf, insulin, bmi]])
    features_scaled = scaler.transform(features)  
    prediction = best_algo.predict(features_scaled)[0] 
    probability = best_algo.predict_proba(features_scaled)[0]

    if prediction == 1:
      return f" DIABETIC ({probability[1]*100:.1f}% confidence)"
    else:
      return f" HEALTHY ({probability[0]*100:.1f}% confidence)"
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    
    gr.Markdown(
    """
     Diabetes Prediction test
    
     Welcome! Click the button below and enter your medical details to begin your diabetes prediction.
    
    """)
    
    start_btn = gr.Button(" Test Begin ", size="lg", variant="primary")
    
    with gr.Row(visible=False) as assessment_section:
        
        # LEFT COLUMN - Medical Inputs
        with gr.Column():
            gr.Markdown("Medical Parameters")
            
            glucose = gr.Slider(70, 200, value=120, label="Glucose Level")
            bp = gr.Slider(60, 120, value=80, label="Blood Pressure")
            
            # MIDDLE SECTION - Diabetes Pedigree
            gr.Markdown("---")
            gr.Markdown("Diabetes Pedigree Function")
            dpf = gr.Slider(0.0, 2.0, value=0.5, label="Genetic Risk Score")
            gr.Markdown("---")
            
            insulin = gr.Slider(0, 300, value=100, label="Insulin Level")
            bmi = gr.Slider(15, 40, value=25, label="BMI")
        
        # RIGHT COLUMN - Results
        with gr.Column():
            gr.Markdown("Results")
            predict_btn = gr.Button("Analyze Diabetes Risk", size="lg", variant="secondary")
            result = gr.Textbox(label="Risk Assessment", interactive=False, lines=4)

    
        with gr.Column(visible=True) as note_section:
            gr.Markdown(
                """
                Important Note:
                -This test is for eductional and testing purpose only hence this tool provides bsic assessment based on machine learning, it doesnt replace experts for accurate diagnosis.Always consult with healthcare provider for accurate diagnosis.
                """)
            
    def show_assessment():
        return gr.update(visible=True)
    
    start_btn.click(
        fn=show_assessment,
        outputs=assessment_section
    )
    
    predict_btn.click(
        fn=predict_diabetes,
        inputs=[glucose, bp, dpf, insulin, bmi],
        outputs=result
    )

demo.launch(share=True)


* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://bedd7431a7576ccdba.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [1]:
import os
print("your proect is in:",os.getcwd())

your proect is in: C:\Users\Sarika
