# Neural Network 

In [297]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.activations import  relu, sigmoid
from sklearn.metrics import accuracy_score , precision_score , f1_score , recall_score

## Extracting the data and applying one hot encoding for required columns 

In [298]:
data = pd.read_csv('customer_behavior_train.csv')
data_test = pd.read_csv('customer_behavior_test.csv')
# implementing one hot encoding 
data_encoded = pd.get_dummies(data , columns=['Referral','Last_Ad_Seen'])
data_test_encoded = pd.get_dummies(data_test , columns=['Referral','Last_Ad_Seen'])

## Processing the data ƒor further use according to our own use 

### Scaling , Dividing the data into train and cross validation set , doing the same for the test data 

In [299]:
cols = ['Time_on_site','Pages_viewed','Clicked_ad','Cart_value','Browser_Refresh_Rate','Referral_Direct','Referral_Facebook','Referral_Google','Referral_Instagram','Last_Ad_Seen_A','Last_Ad_Seen_B','Last_Ad_Seen_C','Last_Ad_Seen_D']
x = data_encoded[cols].astype(np.float32)
y = data_encoded['Purchase'].astype(np.float32)

features_to_scale = ['Time_on_site','Pages_viewed','Cart_value','Browser_Refresh_Rate']
preprocessor = ColumnTransformer([
    ('scale', StandardScaler(), features_to_scale)
], remainder='passthrough')

x_processed = preprocessor.fit_transform(x)






x_train , x_cv , y_train , y_cv = train_test_split(x_processed,y,test_size=0.2,random_state=1)
x_unprocessed_test = data_test_encoded[cols]
x_test = preprocessor.fit_transform(x_unprocessed_test).astype(np.float32)

y_test = data_test_encoded['Purchase'].astype(np.float32)
x.head()

Unnamed: 0,Time_on_site,Pages_viewed,Clicked_ad,Cart_value,Browser_Refresh_Rate,Referral_Direct,Referral_Facebook,Referral_Google,Referral_Instagram,Last_Ad_Seen_A,Last_Ad_Seen_B,Last_Ad_Seen_C,Last_Ad_Seen_D
0,2.29,5.37,1.0,10.0,143.139999,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
1,7.49,16.360001,0.0,25.52,136.660004,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
2,5.71,13.69,0.0,10.0,67.129997,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,1.23,3.22,1.0,41.490002,92.120003,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
4,3.55,10.72,0.0,21.57,70.309998,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


## Making the model and setting the weights that were most appropriate 

In [300]:
model = Sequential([
    tf.keras.Input(shape=(13,)),
    Dense(4,activation='relu',name='layer_1'),
    Dense(2,activation='relu',name='layer_2'),
    Dense(1,activation='sigmoid',name='layer_3')
])

layer_1_weights = np.array([[-1.2465922 , -0.8412034 , -0.03011465,  1.0739251 ],
       [ 0.42299354,  0.04849945, -0.08570717,  0.80661786],
       [ 0.19528103, -0.3844517 ,  0.54315555, -0.00480488],
       [ 0.14708352,  0.6496642 , -0.4020387 ,  0.37356296],
       [ 2.3264956 ,  0.4342505 ,  1.8965672 , -0.09850361],
       [-0.29876235, -2.307847  ,  0.9208382 , -0.2614297 ],
       [-1.025613  , -0.9374569 ,  0.10518838,  0.6693792 ],
       [ 0.34839314,  0.7743884 , -0.2756555 ,  0.45653376],
       [ 0.43657258, -0.94597447, -0.2809164 ,  0.34282422],
       [-1.3866626 ,  0.0534031 ,  0.2094146 ,  0.802539  ],
       [-0.07642906,  0.30425742, -0.17437656,  0.712951  ],
       [ 0.19732405, -0.18622875,  0.30316842, -0.06109132],
       [ 0.24420382, -0.1712201 , -0.19226354,  0.35593337]])
layer_1_bias = np.array([-0.38850376, -0.01932234,  0.0260011 ,  0.6396868 ])
layer_2_weights = np.array([[-0.8180014 ,  0.79656917],
       [ 0.04195736, -0.56629217],
       [-0.07412225,  0.74968946],
       [-0.7258616 ,  1.0130503 ]])
layer_2_bias = np.array([-0.24327292, -0.01304914])
layer_3_weights = np.array([[0.30787802],
       [0.7897412 ]])
layer_3_bias = np.array([-3.3231096])

model.layers[0].set_weights([layer_1_weights,layer_1_bias])
model.layers[1].set_weights([layer_2_weights,layer_2_bias])
model.layers[2].set_weights([layer_3_weights,layer_3_bias])

## Performance Metrics 

In [301]:



# Get predictions and threshold
y_pred = model.predict(x_train)
y_pred_binary = (y_pred >= 0.4).astype(int).flatten()  # Convert to 0/1

# Calculate misclassifications
misclassified = np.sum(y_pred_binary != y_train)
print("Number of misclassified_train samples:", misclassified)

# Calculate performance metrics 
print("Accuracy train:", accuracy_score(y_train, y_pred_binary))
print("Precision train:", precision_score(y_train, y_pred_binary))
print("Recall train:", recall_score(y_train, y_pred_binary))
print("F1 Score train:", f1_score(y_train, y_pred_binary))


# Get predictions and threshold
y_pred_cv = model.predict(x_cv)
y_pred_binary_cv = (y_pred_cv >= 0.4).astype(int).flatten()  # Convert to 0/1

# Calculate misclassifications
misclassified_cv = np.sum(y_pred_binary_cv != y_cv)
print("Number of misclassified_cv samples:", misclassified_cv)

# Calculate accuracy
print("Accuracy cross validation:", accuracy_score(y_cv, y_pred_binary_cv))
print("Precision cross validation:", precision_score(y_cv, y_pred_binary_cv))
print("Recall cross validation:", recall_score(y_cv, y_pred_binary_cv))
print("F1 Score cross validation:", f1_score(y_cv, y_pred_binary_cv))

# Get predictions and threshold

y_pred_test = model.predict(x_test)
y_pred_binary_test = (y_pred_test >= 0.4).astype(int).flatten()  # Convert to 0/1

# Calculate misclassifications
misclassified_test = np.sum(y_pred_binary_test != y_test)
print("Number of misclassified_test samples:", misclassified_test)

# Calculate accuracy
print("Accuracy test:", accuracy_score(y_test, y_pred_binary_test))
print("Precision test:", precision_score(y_test, y_pred_binary_test))
print("Recall test:", recall_score(y_test, y_pred_binary_test))
print("F1 Score test:", f1_score(y_test, y_pred_binary_test))






[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 538us/step
Number of misclassified_train samples: 320
Accuracy train: 0.7777777777777778
Precision train: 0.6306532663316583
Recall train: 0.5919811320754716
F1 Score train: 0.610705596107056
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Number of misclassified_cv samples: 96
Accuracy cross validation: 0.7333333333333333
Precision cross validation: 0.5272727272727272
Recall cross validation: 0.5686274509803921
F1 Score cross validation: 0.5471698113207547
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Number of misclassified_test samples: 110
Accuracy test: 0.7555555555555555
Precision test: 0.5902777777777778
Recall test: 0.625
F1 Score test: 0.6071428571428571
