In [None]:
import pandas as pd//A2
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from scipy.stats import uniform, randint

# Load your dataset
data_path = '/mnt/data/image_datasets_1.csv'
df = pd.read_csv(data_path)

# Assuming 'label' is the target and the rest are features
X = df.drop('label', axis=1)  # Replace 'label' with the actual name of the label column
y = df['label']               # Replace 'label' with the actual name of the label column

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define parameter grids
perceptron_param_grid = {
    'penalty': [None, 'l2', 'l1', 'elasticnet'],
    'alpha': uniform(0.0001, 0.1),
    'max_iter': randint(100, 1000),
    'tol': [1e-4, 1e-3, 1e-2],
    'shuffle': [True, False]
}

mlp_param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 100)],
    'activation': ['tanh', 'relu'],
    'solver': ['adam', 'sgd'],
    'alpha': uniform(0.0001, 0.1),
    'learning_rate': ['constant', 'adaptive']
}

# Initialize models
perceptron = Perceptron()
mlp = MLPClassifier(max_iter=1000)

# RandomizedSearchCV for Perceptron
random_search_perceptron = RandomizedSearchCV(perceptron, perceptron_param_grid, n_iter=20, cv=5, random_state=42, n_jobs=-1)
random_search_perceptron.fit(X_train, y_train)

# RandomizedSearchCV for MLPClassifier
random_search_mlp = RandomizedSearchCV(mlp, mlp_param_grid, n_iter=20, cv=5, random_state=42, n_jobs=-1)
random_search_mlp.fit(X_train, y_train)

# Print best hyperparameters and test the models
print("Best Hyperparameters for Perceptron:", random_search_perceptron.best_params_)
print("Best Hyperparameters for MLPClassifier:", random_search_mlp.best_params_)

# Evaluate the models
y_pred_perceptron = random_search_perceptron.predict(X_test)
y_pred_mlp = random_search_mlp.predict(X_test)

print("\nClassification Report for Perceptron:")
print(classification_report(y_test, y_pred_perceptron))

print("\nClassification Report for MLPClassifier:")
print(classification_report(y_test, y_pred_mlp))


In [None]:
import pandas as pd //A3
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier

# Load your dataset
data_path = '/mnt/data/image_datasets_1.csv'
df = pd.read_csv(data_path)

# Assuming 'label' is the target and the rest are features
X = df.drop('label', axis=1)  # Replace 'label' with the actual name of the label column
y = df['label']               # Replace 'label' with the actual name of the label column

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize models
models = {
    'SVM': SVC(probability=True),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'AdaBoost': AdaBoostClassifier(),
    'XGBoost': XGBClassifier(eval_metric='logloss', use_label_encoder=False),
    'CatBoost': CatBoostClassifier(verbose=0),
    'Naive Bayes': GaussianNB(),
    'Perceptron': Perceptron(),
    'MLP': MLPClassifier(max_iter=1000)
}

# Define a function to compute multiple performance metrics
def evaluate_model(y_test, y_pred, y_pred_proba=None):
    metrics = {}
    metrics['accuracy'] = accuracy_score(y_test, y_pred)
    metrics['precision'] = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    metrics['recall'] = recall_score(y_test, y_pred, average='weighted')
    metrics['f1_score'] = f1_score(y_test, y_pred, average='weighted')
    
    if y_pred_proba is not None:
        metrics['roc_auc'] = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')
    else:
        metrics['roc_auc'] = None
    
    return metrics

# Initialize an empty results dataframe
results = pd.DataFrame(columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC'])

# Train, predict, and evaluate each model
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # For models that support probability prediction
    if hasattr(model, 'predict_proba'):
        y_pred_proba = model.predict_proba(X_test)
        metrics = evaluate_model(y_test, y_pred, y_pred_proba)
    else:
        metrics = evaluate_model(y_test, y_pred)
    
    results = results.append({
        'Model': model_name,
        'Accuracy': metrics['accuracy'],
        'Precision': metrics['precision'],
        'Recall': metrics['recall'],
        'F1 Score': metrics['f1_score'],
        'ROC AUC': metrics['roc_auc']
    }, ignore_index=True)

# Display the results
print(results)


In [None]:
import shap ///01
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import matplotlib.pyplot as plt

# Load your dataset
data_path = '/mnt/data/image_datasets_1.csv'
df = pd.read_csv(data_path)

# Assuming 'label' is the target and the rest are features
X = df.drop('label', axis=1)  # Replace 'label' with the actual name of the label column
y = df['label']               # Replace 'label' with the actual name of the label column

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize a RandomForestClassifier
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

# Initialize an XGBClassifier
xgb_model = XGBClassifier(eval_metric='logloss', use_label_encoder=False)
xgb_model.fit(X_train, y_train)

# SHAP Explanation for RandomForestClassifier
explainer_rf = shap.Explainer(rf_model, X_train)
shap_values_rf = explainer_rf(X_test)

# SHAP Explanation for XGBClassifier
explainer_xgb = shap.Explainer(xgb_model, X_train)
shap_values_xgb = explainer_xgb(X_test)

# Plot SHAP summary plot for RandomForest
shap.summary_plot(shap_values_rf, X_test, feature_names=X.columns)

# Plot SHAP summary plot for XGBoost
shap.summary_plot(shap_values_xgb, X_test, feature_names=X.columns)

# SHAP feature importance bar plot for RandomForest
shap.plots.bar(shap_values_rf, feature_names=X.columns)

# SHAP feature importance bar plot for XGBoost
shap.plots.bar(shap_values_xgb, feature_names=X.columns)


In [None]:
import pandas as pd //02
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from lime.lime_tabular import LimeTabularExplainer

# Load your dataset
data_path = '/mnt/data/image_datasets_1.csv'
df = pd.read_csv(data_path)

# Assuming 'label' is the target and the rest are features
X = df.drop('label', axis=1)  # Replace 'label' with the actual name of the label column
y = df['label']               # Replace 'label' with the actual name of the label column

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a RandomForestClassifier
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

# Initialize LIME Explainer
explainer = LimeTabularExplainer(X_train, feature_names=df.columns[:-1], class_names=np.unique(y),
                                 mode='classification', discretize_continuous=True)

# Choose a specific instance from the test set to explain
instance = X_test[0].reshape(1, -1)

# Generate explanation for the chosen instance
exp = explainer.explain_instance(instance.flatten(), rf_model.predict_proba, num_features=5)

# Show the explanation in text
exp.show_in_notebook(show_table=True)

# Or you can display the explanation as a graph
exp.as_pyplot_figure()


In [None]:
import pandas as pd //03
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
import time

# Load your dataset
data_path = '/mnt/data/image_datasets_1.csv'
df = pd.read_csv(data_path)

# Assuming 'label' is the target and the rest are features
X = df.drop('label', axis=1)  # Replace 'label' with the actual name of the label column
y = df['label']               # Replace 'label' with the actual name of the label column

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize a RandomForestClassifier
rf = RandomForestClassifier()

# Hyperparameter grids
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=rf, param_distributions=param_grid, n_iter=10, 
                                   cv=3, verbose=1, random_state=42, n_jobs=-1)

start_time = time.time()
random_search.fit(X_train, y_train)
end_time = time.time()
random_search_time = end_time - start_time
random_best_params = random_search.best_params_
random_best_score = random_search.best_score_

# GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, verbose=1, n_jobs=-1)

start_time = time.time()
grid_search.fit(X_train, y_train)
end_time = time.time()
grid_search_time = end_time - start_time
grid_best_params = grid_search.best_params_
grid_best_score = grid_search.best_score_

# Predict on test data
y_pred_random = random_search.predict(X_test)
y_pred_grid = grid_search.predict(X_test)

# Performance metrics
random_accuracy = accuracy_score(y_test, y_pred_random)
grid_accuracy = accuracy_score(y_test, y_pred_grid)

random_f1 = f1_score(y_test, y_pred_random, average='weighted')
grid_f1 = f1_score(y_test, y_pred_grid, average='weighted')

# Tabulate the results
results = pd.DataFrame({
    'Search Method': ['RandomizedSearchCV', 'GridSearchCV'],
    'Best Score (CV)': [random_best_score, grid_best_score],
    'Best Params': [random_best_params, grid_best_params],
    'Test Accuracy': [random_accuracy, grid_accuracy],
    'Test F1-Score': [random_f1, grid_f1],
    'Search Time (seconds)': [random_search_time, grid_search_time]
})

print(results)
