<a href="https://colab.research.google.com/github/sujalkumeriya59/Machine-Learning/blob/main/Practical_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
df = pd.read_csv(url, header=None, names=columns)

# Features and target variable
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature scaling (important for algorithms like boosting)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#Implement Bagging
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Initialize the base model (Decision Tree)
dt = DecisionTreeClassifier(random_state=42)

# Initialize BaggingClassifier
# The 'base_estimator' argument was renamed to 'estimator' in scikit-learn version 1.2.
bagging_model = BaggingClassifier(estimator=dt, n_estimators=100, random_state=42)

# Train the model
bagging_model.fit(X_train, y_train)

# Predictions and evaluation
y_pred_bagging = bagging_model.predict(X_test)
bagging_accuracy = accuracy_score(y_test, y_pred_bagging)
print(f"Bagging Accuracy: {bagging_accuracy:.4f}")

In [None]:
# Implement Boosting
from sklearn.ensemble import AdaBoostClassifier

# Initialize the base model (Decision Tree)
dt = DecisionTreeClassifier(max_depth=1, random_state=42)

# Initialize AdaBoostClassifier
boosting_model = AdaBoostClassifier(n_estimators=100, random_state=42)

# Train the model
boosting_model.fit(X_train, y_train)

# Predictions and evaluation
y_pred_boosting = boosting_model.predict(X_test)
boosting_accuracy = accuracy_score(y_test, y_pred_boosting)
print(f"Boosting Accuracy: {boosting_accuracy:.4f}")


In [None]:
print(f"Bagging Accuracy: {bagging_accuracy:.4f}")
print(f"Boosting Accuracy: {boosting_accuracy:.4f}")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction','Outcome']
df = pd.read_csv(url, header=None, names=columns)

# Create a copy of the DataFrame to avoid modifying the original
data = df.copy()

# Features and target variable
X = data.drop('Outcome', axis=1)  # Use the copy for dropping
y = data['Outcome']  # Use the copy for target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Models
models = {
    "AdaBoost": AdaBoostClassifier(random_state=42),  # AdaBoost is included here
    "Random Forest": RandomForestClassifier(random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(random_state=42)
}

# Train and evaluate models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy

    # Confusion Matrix Heatmap
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
    plt.title(f"{name} Confusion Matrix")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()

# Bar plot for Accuracy Comparison
plt.figure(figsize=(8, 6))
sns.barplot(x=list(results.keys()), y=list(results.values()))
plt.title("Model Accuracy Comparison")
plt.xlabel("Model")
plt.ylabel("Accuracy")
plt.show()

# Print classification reports
for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"\n{name} Classification Report:")
    print(classification_report(y_test, y_pred))