<a href="https://colab.research.google.com/github/karmveer049/cancer-prediction/blob/main/diabetes_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import pandas as pd
import numpy as np
import joblib
from google.colab import files
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Upload the dataset
uploaded = files.upload()
file_path = 'diabetes.csv'  # File will be in the same directory after upload
df = pd.read_csv(file_path)

# Assuming the last column is the target (Outcome)
X = df.iloc[:, :-1]  # Features
y = df.iloc[:, -1]   # Target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy * 100:.2f}%')

# Save the model and scaler
joblib.dump(model, 'diabetes_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

# Function for user input and prediction
def predict_diabetes():
    print("Enter the values for the following features:")
    user_input = []
    for col in X.columns:
        val = float(input(f'{col}: '))
        user_input.append(val)

    user_array = np.array(user_input).reshape(1, -1)
    user_array_scaled = scaler.transform(user_array)
    prediction = model.predict(user_array_scaled)

    if prediction[0] == 1:
        print("The person is diabetic.")
    else:
        print("The person is not diabetic.")

# Take user input immediately after training
predict_diabetes()


Saving diabetes.csv to diabetes (5).csv
Model Accuracy: 72.08%
Enter the values for the following features:
Pregnancies: 34
Glucose: 768
BloodPressure: 35
SkinThickness: 78
Insulin: 98
BMI: 76
DiabetesPedigreeFunction: .9
Age: 78
The person is diabetic.


