In [None]:
# Step 1: Setup and Data Loading

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# Load dataset (replace 'healthcare_data.csv' with your dataset file)
# This dataset should have features as columns and the target variable (diagnosis) in the last column
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv'
column_names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
data = pd.read_csv(url, names=column_names)

# Display the first few rows of the dataset
data.head()

# Step 2: Data Preprocessing

# Check for missing values
print(data.isnull().sum())

# Handle missing values (if any)
# For simplicity, we'll fill missing values with the mean of the column
data.fillna(data.mean(), inplace=True)

# Split the data into features (X) and target (y)
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target variable (diagnosis)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Model Training

# Initialize the RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Step 4: Evaluation

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Print classification report
print(classification_report(y_test, y_pred))

# Step 5: Prediction

# Make predictions with new data (replace this with new patient data)
# For example, let's create a new sample patient data
new_patient_data = np.array([[5, 116, 74, 0, 0, 25.6, 0.201, 30]])

# Make prediction
new_prediction = model.predict(new_patient_data)
print(f"New patient diagnosis: {'Disease' if new_prediction[0] == 1 else 'No Disease'}")
