A2.


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
import numpy as np

# Load dataset
df = pd.read_excel("/content/markrting campain.xlsx")  # Update with actual file path

# Handle missing values in 'Income'
df['Income'].fillna(df['Income'].median(), inplace=True)

# Encode categorical variables
categorical_features = ['Education', 'Marital_Status']
df[categorical_features] = df[categorical_features].apply(LabelEncoder().fit_transform)

# Define features and target
X = df.drop(columns=['ID', 'Dt_Customer', 'Response', 'Z_CostContact', 'Z_Revenue'])
y = df['Response']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define hyperparameter grid for RandomizedSearchCV
param_dist = {
    'n_estimators': [50, 100, 200, 300, 400],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Create Random Forest classifier
rf = RandomForestClassifier(random_state=42)

# Perform RandomizedSearchCV
tuner = RandomizedSearchCV(rf, param_distributions=param_dist, n_iter=20, cv=5, n_jobs=-1, verbose=1, random_state=42)
tuner.fit(X_train, y_train)

# Output best parameters and best score
print("Best Parameters:", tuner.best_params_)
print("Best Score:", tuner.best_score_)


Fitting 5 folds for each of 20 candidates, totalling 100 fits


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Income'].fillna(df['Income'].median(), inplace=True)


Best Parameters: {'n_estimators': 400, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': True}
Best Score: 0.8805807566019824


A3.

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Load dataset
df = pd.read_excel("/content/markrting campain.xlsx")  # Update with actual file path

# Handle missing values in 'Income'
df['Income'].fillna(df['Income'].median(), inplace=True)

# Encode categorical variables
categorical_features = ['Education', 'Marital_Status']
df[categorical_features] = df[categorical_features].apply(LabelEncoder().fit_transform)

# Define features and target
X = df.drop(columns=['ID', 'Dt_Customer', 'Response', 'Z_CostContact', 'Z_Revenue'])
y = df['Response']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "SVM": SVC(random_state=42),
    "Naïve Bayes": GaussianNB(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
    "AdaBoost": AdaBoostClassifier(random_state=42),
    "MLP": MLPClassifier(random_state=42)
}

# Evaluate classifiers
results = []
for name, model in classifiers.items():
    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    results.append({
        "Model": name,
        "Train Accuracy": accuracy_score(y_train, y_train_pred),
        "Test Accuracy": accuracy_score(y_test, y_test_pred),
        "Train Precision": precision_score(y_train, y_train_pred),
        "Test Precision": precision_score(y_test, y_test_pred),
        "Train Recall": recall_score(y_train, y_train_pred),
        "Test Recall": recall_score(y_test, y_test_pred),
        "Train F1 Score": f1_score(y_train, y_train_pred),
        "Test F1 Score": f1_score(y_test, y_test_pred)
    })

# Create a DataFrame for results
results_df = pd.DataFrame(results)
print(results_df)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Income'].fillna(df['Income'].median(), inplace=True)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.



           Model  Train Accuracy  Test Accuracy  Train Precision  \
0  Random Forest        0.994978       0.881696         0.992366   
1  Decision Tree        0.994978       0.812500         1.000000   
2            SVM        0.851004       0.850446         0.000000   
3    Naïve Bayes        0.781250       0.774554         0.335092   
4        XGBoost        0.994420       0.868304         1.000000   
5       AdaBoost        0.889509       0.875000         0.725490   
6            MLP        0.851562       0.854911         0.513514   

   Test Precision  Train Recall  Test Recall  Train F1 Score  Test F1 Score  
0        0.818182      0.973783     0.268657        0.982987       0.404494  
1        0.380282      0.966292     0.402985        0.982857       0.391304  
2        0.000000      0.000000     0.000000        0.000000       0.000000  
3        0.339623      0.475655     0.537313        0.393189       0.416185  
4        0.617647      0.962547     0.313433        0.980916     