In [87]:
! pip install xgboost



In [88]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from xgboost import XGBClassifier


In [89]:
df = pd.read_csv('diabetes.csv')
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [90]:
df.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [91]:
X = df.drop('Outcome',axis=1)
y = df['Outcome']

In [92]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42,stratify = y) 

In [93]:
xgb = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=0)
xgb.fit(X_train,y_train)

In [94]:
y_pred = xgb.predict(X_test)

In [95]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.7532467532467533
Confusion Matrix:
 [[84 16]
 [22 32]]


In [96]:
print("Train class distribution:\n", y_train.value_counts(normalize=True))
print("Test class distribution:\n", y_test.value_counts(normalize=True))

Train class distribution:
 Outcome
0    0.651466
1    0.348534
Name: proportion, dtype: float64
Test class distribution:
 Outcome
0    0.649351
1    0.350649
Name: proportion, dtype: float64


In [97]:
# in decision tree

In [98]:
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier 

dt = DecisionTreeClassifier(random_state=0)
dt.fit(X_train, y_train)        
y_pred_dt = dt.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt)) 

rf = RandomForestClassifier(n_estimators=100, random_state=0)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))

Decision Tree Accuracy: 0.7337662337662337
Random Forest Accuracy: 0.7467532467532467


In [99]:
from joblib import dump, load #dump is used to save the model and load is used to load the model
with open('models/xgboost.pkl', 'wb') as f:
    dump(rf, f)  #dumping the model onto the file 
    print("Model saved successfully!")

Model saved successfully!


In [100]:
import pandas as pd
import gradio as gr

# Prediction function
def prediction(Pregnancies, Glucose, BloodPressure, SkinThickness,
            Insulin, BMI, DiabetesPedigreeFunction, Age):
    
    # Create a DataFrame for the single input
    df = pd.DataFrame({
        'Pregnancies': [Pregnancies],
        'Glucose': [Glucose],
        'BloodPressure': [BloodPressure],
        'SkinThickness': [SkinThickness],
        'Insulin': [Insulin],
        'BMI': [BMI],
        'DiabetesPedigreeFunction': [DiabetesPedigreeFunction],
        'Age': [Age]
    })

    # Load the model
    with open('models/xgboost.pkl', 'rb') as f:
        model = load(f)

    # Predict outcome
    Outcome = model.predict(df)
    
    # Return prediction
    return f"Predicted Outcome: {'Diabetic (1)' if Outcome[0] == 1 else 'Non-Diabetic (0)'}"

# Build Gradio interface
ui = gr.Interface(
    fn=prediction,
    inputs = ['number', 'number', 'number', 'number', 'number', 'number', 'number', 'number'],
    outputs =  gr.Text(),
    title="Diabetes Outcome Prediction",
    examples=[[2, 197, 70, 45, 543, 30.5, 0.158, 53]]
)

# Launch app
ui.launch()

* Running on local URL:  http://127.0.0.1:7867
* To create a public link, set `share=True` in `launch()`.


