# Import Necessary Libraries


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
# Load the dataset
df = pd.read_csv('/kaggle/input/dataset-monkeypox/MonkeyPox.csv') 

# Define Base Models

In [None]:
# Define base models (Random Forest & Gradient Boosting)
base_models = [
    ('rf', RandomForestClassifier(n_estimators=200, random_state=42)),
    ('gb', GradientBoostingClassifier(n_estimators=100, random_state=42))
]

# Define Meta Model (Final Decision Maker)
meta_model = LogisticRegression()

In [None]:
# Create Stacking Classifier
stack_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)

# Train the model
stack_model.fit(X_train, y_train)

In [None]:
# Make predictions on test data
y_pred_stack = stack_model.predict(X_test)

In [None]:
# Calculate accuracy
accuracy_stack = accuracy_score(y_test, y_pred_stack)
print(f"Stacked Model Accuracy: {accuracy_stack * 100:.2f}%")

# Print classification report
print(classification_report(y_test, y_pred_stack))

# Handle Missing Values

In [None]:
# Fill missing numerical values with mean
df.fillna(df.mean(), inplace=True)

In [None]:
# Verify missing values are handled
print(df.isnull().sum())

# Encode Categorical Variables

In [None]:
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le  # Store encoders for later use

In [None]:
# Display dataset after encoding
print(df.head())

# Feature Selection & Target Variable

In [None]:
# Identify target column (last column)
target_column = df.columns[-1]  
print(f"Target Column: {target_column}")

In [None]:
# Define features (X) and target (y)
X = df.drop(columns=[target_column])
y = df[target_column]

In [None]:
# Display X and y shapes
print("Feature Matrix Shape:", X.shape)
print("Target Variable Shape:", y.shape)

# Train-Test Split

In [None]:
# Split data into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Display split sizes
print("Training Data Shape:", X_train.shape)
print("Testing Data Shape:", X_test.shape)

# Feature Scaling

In [None]:
# Scale features to improve model performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Verify transformation
print("Feature Scaling Done!")

# Train Machine Learning Model

In [None]:
# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
print("Model Training Completed!")

# Make Predictions

In [None]:
# Make predictions
y_pred = model.predict(X_test)

In [None]:
# Display first 10 predictions
print("Predictions:", y_pred[:10])

# Evaluate Model Performance

In [None]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

In [None]:
# Display detailed classification report
print(classification_report(y_test, y_pred))

In [None]:
# Perform Grid Search
rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

In [None]:
# Best model
best_model = grid_search.best_estimator_

In [None]:
# Train best model
best_model.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = best_model.predict(X_test)

In [None]:
# Evaluate new model
accuracy = accuracy_score(y_test, y_pred)
print(f"Improved Model Accuracy: {accuracy * 100:.2f}%")

In [None]:
print(classification_report(y_test, y_pred))