In [13]:
import time
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score

In [14]:
# Step 1: Load the datasets
dataset_path = "/content/sample_data/Dataset.csv"
class_labels_path = "/content/sample_data/Target_Labels.csv"

In [15]:
# Read the dataset and class labels
X = pd.read_csv(dataset_path)
Y = pd.read_csv(class_labels_path)

# Step 2: Preprocess the data
# Convert class labels to 1D array if needed
Y = Y.values.ravel()

# Initialize LabelEncoder to encode any categorical features
label_encoder = LabelEncoder()

In [16]:
# Iterate over each column in X and encode non-numeric values
for column in X.columns:
    if X[column].dtype == object:
        X[column] = label_encoder.fit_transform(X[column])

In [17]:
# Step 3: Split the dataset into training and testing sets
test_size = 0.3
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=42)

In [18]:
# Step 4: Train and test with Neural Network
print("\nTraining with Neural Network...")
start_time = time.time()

nn_model = MLPClassifier(hidden_layer_sizes=(100), activation='logistic', random_state=42)
nn_model.fit(X_train, y_train)
y_pred_nn = nn_model.predict(X_test)

# Metrics for Neural Network
accuracy_nn = accuracy_score(y_test, y_pred_nn)
confusion_mat_nn = confusion_matrix(y_test, y_pred_nn)
f1_nn = f1_score(y_test, y_pred_nn, pos_label=1)

print(f"Neural Network Accuracy: {round(accuracy_nn * 100, 2)}%")
print("Confusion Matrix (Neural Network):")
print(confusion_mat_nn)
print(f"F1 Score (Neural Network): {f1_nn}")
print(f"Runtime (Neural Network): {time.time() - start_time} seconds")


Training with Neural Network...
Neural Network Accuracy: 89.03%
Confusion Matrix (Neural Network):
[[1246  209]
 [ 155 1707]]
F1 Score (Neural Network): 0.9036527263102171
Runtime (Neural Network): 13.484713315963745 seconds




In [19]:
# Step 5: Train and test with Random Forests
print("\nTraining with Random Forests...")
start_time = time.time()

rf_model = RandomForestClassifier(n_estimators=5, criterion='entropy', random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

# Metrics for Random Forests
accuracy_rf = accuracy_score(y_test, y_pred_rf)
confusion_mat_rf = confusion_matrix(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf, pos_label=1)

print(f"Random Forests Accuracy: {round(accuracy_rf * 100, 2)}%")
print("Confusion Matrix (Random Forests):")
print(confusion_mat_rf)
print(f"F1 Score (Random Forests): {f1_rf}")
print(f"Runtime (Random Forests): {time.time() - start_time} seconds")


Training with Random Forests...
Random Forests Accuracy: 89.63%
Confusion Matrix (Random Forests):
[[1293  162]
 [ 182 1680]]
F1 Score (Random Forests): 0.9071274298056156
Runtime (Random Forests): 0.05184650421142578 seconds


In [27]:
# Step 6: Train and test with Support Vector Machine
print("\nTraining with Support Vector Machine...")
start_time = time.time()

svm_model = svm.SVC(kernel='linear', C=2.0)
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)

# Metrics for SVM
accuracy_svm = accuracy_score(y_test, y_pred_svm)
confusion_mat_svm = confusion_matrix(y_test, y_pred_svm)
f1_svm = f1_score(y_test, y_pred_svm, pos_label=1)

print(f"Support Vector Machine Accuracy: {round(accuracy_svm * 100, 2)}%")
print("Confusion Matrix (SVM):")
print(confusion_mat_svm)
print(f"F1 Score (SVM): {f1_svm}")
print(f"Runtime (SVM): {time.time() - start_time} seconds")


Training with Support Vector Machine...
Support Vector Machine Accuracy: 87.43%
Confusion Matrix (SVM):
[[1209  246]
 [ 171 1691]]
F1 Score (SVM): 0.8902342721768886
Runtime (SVM): 4.644014358520508 seconds


In [21]:
# After fitting the neural network model
nn_model.coefs_


[array([[-0.22285193,  0.07203352, -0.02436779, ...,  0.03820019,
         -0.20960371, -0.64068575],
        [-0.35722787,  0.07111934, -0.13040664, ...,  0.1228127 ,
          0.31711433, -0.53175581],
        [-0.44260491,  0.18004558,  0.37179717, ...,  0.27752293,
          0.54784388, -0.72967862],
        ...,
        [ 0.02880598,  0.06295194, -0.24522283, ..., -0.08113015,
         -0.27579689,  0.1711363 ],
        [-0.05980668, -0.02353248,  0.00840146, ...,  0.01617994,
         -0.1128962 ,  0.19945874],
        [ 0.11498063,  0.0272232 , -0.07810823, ..., -0.05637162,
         -0.36159462,  0.11086848]]),
 array([[ 0.48164025],
        [-0.21910735],
        [-0.3103154 ],
        [ 0.40498169],
        [ 0.78416145],
        [-0.44660042],
        [ 0.43760243],
        [-0.18115391],
        [ 0.48399996],
        [ 1.01762589],
        [-0.37562501],
        [-0.29145377],
        [ 0.39146017],
        [-1.29865376],
        [-0.37248126],
        [ 0.43128389],
     

In [25]:
# For logistic regression or a linear SVM, you would get the weights as:
svm_model.coef_


array([[ 3.52073651e-01, -1.11356397e-01, -7.58895125e-01,
         1.11165761e-01,  4.06730416e-01,  1.99950755e+00,
         4.81614544e-01, -2.40994335e-01,  1.03045452e-04,
         3.51924594e-01,  2.70360244e+00,  7.03678912e-01,
         5.18319102e-01,  3.70331547e-02,  1.85824617e-01,
        -1.48186928e-01]])