In [11]:
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from scipy.stats import uniform
from sklearn.metrics import accuracy_score

class Perceptron(BaseEstimator):
    def __init__(self, num_features, learning_rate=0.01, epochs=1000):
        self.num_features = num_features
        self.learning_rate = learning_rate
        self.epochs = epochs

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def predict(self, inputs):
        weighted_sum = np.dot(inputs, self.weights) + self.bias
        return self.sigmoid(weighted_sum)

    def train(self, inputs, labels):
        self.weights = np.random.rand(self.num_features)
        self.bias = np.random.rand()
        for _ in range(self.epochs):
            for x, y in zip(inputs, labels):
                prediction = self.predict(x)
                error = y - prediction
                self.weights += self.learning_rate * error * x
                self.bias += self.learning_rate * error

    def fit(self, X, y):
        self.train(X, y)
        return self

    def predict(self, X):
        return np.where(self.sigmoid(np.dot(X, self.weights) + self.bias) >= 0.5, 1, 0)

    def get_params(self, deep=True):
        return {
            'num_features': self.num_features,
            'learning_rate': self.learning_rate,
            'epochs': self.epochs
        }

# Load data from Excel sheet into DataFrame
df = pd.read_excel('customerdata.xlsx')

# Encoding labels to numeric values
label_encoding = {'Yes': 1, 'No': 0}
df['High Value Tx'] = df['High Value Tx'].map(label_encoding)

# Extracting features and labels
inputs = df.drop(columns=['Customer', 'High Value Tx']).values.astype(float)
labels = df['High Value Tx'].values

# Normalize inputs
inputs = inputs / inputs.max(axis=0)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(inputs, labels, test_size=0.2, random_state=42)

# Define the parameter grid
param_grid = {
    'learning_rate': uniform(0.001, 0.1),
    'epochs': [100, 500, 1000, 2000]
}

# Create an instance of the Perceptron class
perceptron = Perceptron(num_features=X_train.shape[1])

# Create RandomizedSearchCV instance with a dummy scoring function
random_search = RandomizedSearchCV(estimator=perceptron, param_distributions=param_grid, n_iter=10, cv=5, scoring='accuracy')

# Fit RandomizedSearchCV to training data
random_search.fit(X_train, y_train)

# Get the best parameters
best_params = random_search.best_params_
print("Best Parameters:", best_params)

# Initialize perceptron with best parameters
best_perceptron = Perceptron(num_features=X_train.shape[1], **best_params)


# Train the perceptron with best parameters
best_perceptron.fit(X_train, y_train)

# Test the perceptron with best parameters on test data
y_pred = best_perceptron.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of best perceptron on test data: {accuracy}")


Best Parameters: {'epochs': 100, 'learning_rate': 0.07917120491046367}
Accuracy of best perceptron on test data: 0.5


In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_classifier(classifier, X, y):
    accuracy = cross_val_score(classifier, X, y, cv=5, scoring='accuracy').mean()
    precision = cross_val_score(classifier, X, y, cv=5, scoring='precision').mean()
    recall = cross_val_score(classifier, X, y, cv=5, scoring='recall').mean()
    f1 = cross_val_score(classifier, X, y, cv=5, scoring='f1').mean()
    return accuracy, precision, recall, f1

# Load data from Excel sheet into DataFrame
df = pd.read_excel('customerdata.xlsx')

# Encoding labels to numeric values
label_encoding = {'Yes': 1, 'No': 0}
df['High Value Tx'] = df['High Value Tx'].map(label_encoding)

# Extracting features and labels
inputs = df.drop(columns=['Customer', 'High Value Tx']).values.astype(float)
labels = df['High Value Tx'].values

# Normalize inputs
inputs = inputs / inputs.max(axis=0)

# Define classifiers
classifiers = {
    "Support Vector Machine": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Naïve Bayes": GaussianNB(),
    "CatBoost": CatBoostClassifier(logging_level='Silent'),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# Evaluate each classifier
results = {}
for clf_name, clf in classifiers.items():
    accuracy, precision, recall, f1 = evaluate_classifier(clf, inputs, labels)
    results[clf_name] = {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 Score': f1}

# Display results in a tabular format
results_df = pd.DataFrame.from_dict(results, orient='index')
print(results_df)




                        Accuracy  Precision  Recall  F1 Score
Support Vector Machine       0.9        1.0     0.9  0.933333
Decision Tree                0.9        1.0     0.9  1.000000
Random Forest                1.0        1.0     0.9  1.000000
AdaBoost                     0.9        1.0     0.9  0.933333
Naïve Bayes                  0.8        0.9     0.9  0.866667
CatBoost                     0.9        1.0     0.9  0.933333
XGBoost                      0.5        0.6     0.9  0.666667
