In [292]:
import numpy as np
import pandas as pd 
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint

In [293]:
df = pd.read_csv('dataset.csv')
df.head()

Unnamed: 0,Transaction_ID,Timestamp,Vehicle_Type,FastagID,TollBoothID,Lane_Type,Vehicle_Dimensions,Transaction_Amount,Amount_paid,Geographical_Location,Vehicle_Speed,Vehicle_Plate_Number,Fraud_indicator
0,1,1/6/2023 11:20,Bus,FTG-001-ABC-121,A-101,Express,Large,350,120,"13.059816123454882, 77.77068662374292",65,KA11AB1234,Fraud
1,2,1/7/2023 14:55,Car,FTG-002-XYZ-451,B-102,Regular,Small,120,100,"13.059816123454882, 77.77068662374292",78,KA66CD5678,Fraud
2,3,1/8/2023 18:25,Motorcycle,,D-104,Regular,Small,0,0,"13.059816123454882, 77.77068662374292",53,KA88EF9012,Not Fraud
3,4,1/9/2023 2:05,Truck,FTG-044-LMN-322,C-103,Regular,Large,350,120,"13.059816123454882, 77.77068662374292",92,KA11GH3456,Fraud
4,5,1/10/2023 6:35,Van,FTG-505-DEF-652,B-102,Express,Medium,140,100,"13.059816123454882, 77.77068662374292",60,KA44IJ6789,Fraud


In [294]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Transaction_ID         5000 non-null   int64 
 1   Timestamp              5000 non-null   object
 2   Vehicle_Type           5000 non-null   object
 3   FastagID               4451 non-null   object
 4   TollBoothID            5000 non-null   object
 5   Lane_Type              5000 non-null   object
 6   Vehicle_Dimensions     5000 non-null   object
 7   Transaction_Amount     5000 non-null   int64 
 8   Amount_paid            5000 non-null   int64 
 9   Geographical_Location  5000 non-null   object
 10  Vehicle_Speed          5000 non-null   int64 
 11  Vehicle_Plate_Number   5000 non-null   object
 12  Fraud_indicator        5000 non-null   object
dtypes: int64(4), object(9)
memory usage: 507.9+ KB


In [295]:
df.shape

(5000, 13)

In [296]:
df.describe()

Unnamed: 0,Transaction_ID,Transaction_Amount,Amount_paid,Vehicle_Speed
count,5000.0,5000.0,5000.0,5000.0
mean,2500.5,161.062,141.261,67.8512
std,1443.520003,112.44995,106.480996,16.597547
min,1.0,0.0,0.0,10.0
25%,1250.75,100.0,90.0,54.0
50%,2500.5,130.0,120.0,67.0
75%,3750.25,290.0,160.0,82.0
max,5000.0,350.0,350.0,118.0


In [297]:
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df.head()

Unnamed: 0,Transaction_ID,Timestamp,Vehicle_Type,FastagID,TollBoothID,Lane_Type,Vehicle_Dimensions,Transaction_Amount,Amount_paid,Geographical_Location,Vehicle_Speed,Vehicle_Plate_Number,Fraud_indicator
0,1,2023-01-06 11:20:00,Bus,FTG-001-ABC-121,A-101,Express,Large,350,120,"13.059816123454882, 77.77068662374292",65,KA11AB1234,Fraud
1,2,2023-01-07 14:55:00,Car,FTG-002-XYZ-451,B-102,Regular,Small,120,100,"13.059816123454882, 77.77068662374292",78,KA66CD5678,Fraud
2,3,2023-01-08 18:25:00,Motorcycle,,D-104,Regular,Small,0,0,"13.059816123454882, 77.77068662374292",53,KA88EF9012,Not Fraud
3,4,2023-01-09 02:05:00,Truck,FTG-044-LMN-322,C-103,Regular,Large,350,120,"13.059816123454882, 77.77068662374292",92,KA11GH3456,Fraud
4,5,2023-01-10 06:35:00,Van,FTG-505-DEF-652,B-102,Express,Medium,140,100,"13.059816123454882, 77.77068662374292",60,KA44IJ6789,Fraud


In [298]:
df = df.dropna(subset=['FastagID'])
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4451 entries, 0 to 4999
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   Transaction_ID         4451 non-null   int64         
 1   Timestamp              4451 non-null   datetime64[ns]
 2   Vehicle_Type           4451 non-null   object        
 3   FastagID               4451 non-null   object        
 4   TollBoothID            4451 non-null   object        
 5   Lane_Type              4451 non-null   object        
 6   Vehicle_Dimensions     4451 non-null   object        
 7   Transaction_Amount     4451 non-null   int64         
 8   Amount_paid            4451 non-null   int64         
 9   Geographical_Location  4451 non-null   object        
 10  Vehicle_Speed          4451 non-null   int64         
 11  Vehicle_Plate_Number   4451 non-null   object        
 12  Fraud_indicator        4451 non-null   object        
dtypes: 

In [299]:
features = ['Transaction_Amount', 'Amount_paid']
X = df[features]
y = df['Fraud_indicator']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.75, random_state=42)

In [300]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [301]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [302]:
model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.add(Dropout(0.2))

In [303]:
model.summary()

Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_50 (Dense)            (None, 32)                96        
                                                                 
 dense_51 (Dense)            (None, 16)                528       
                                                                 
 dense_52 (Dense)            (None, 1)                 17        
                                                                 
 dropout_15 (Dropout)        (None, 1)                 0         
                                                                 
Total params: 641 (2.50 KB)
Trainable params: 641 (2.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [304]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [305]:
checkpoint = ModelCheckpoint("neural_networks.h5", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [306]:
history = model.fit(X_train_scaled, y_train_encoded, epochs=10, batch_size=16, validation_split=0.25, callbacks=callbacks_list)

Epoch 1/10
 1/53 [..............................] - ETA: 7s - loss: 3.2616 - accuracy: 0.7500
Epoch 1: val_accuracy improved from -inf to 0.82014, saving model to neural_networks.h5
Epoch 2/10
 1/53 [..............................] - ETA: 0s - loss: 3.1604 - accuracy: 0.6875
Epoch 2: val_accuracy improved from 0.82014 to 0.87410, saving model to neural_networks.h5
Epoch 3/10
 1/53 [..............................] - ETA: 0s - loss: 3.2093 - accuracy: 0.5625
Epoch 3: val_accuracy improved from 0.87410 to 0.92086, saving model to neural_networks.h5
Epoch 4/10
 1/53 [..............................] - ETA: 0s - loss: 5.9528 - accuracy: 0.6250
Epoch 4: val_accuracy did not improve from 0.92086
Epoch 5/10
 1/53 [..............................] - ETA: 0s - loss: 4.9403 - accuracy: 0.6250
Epoch 5: val_accuracy improved from 0.92086 to 0.95683, saving model to neural_networks.h5
Epoch 6/10
 1/53 [..............................] - ETA: 0s - loss: 1.2431 - accuracy: 0.7500
Epoch 6: val_accuracy im

  saving_api.save_model(



Epoch 7: val_accuracy did not improve from 0.97842
Epoch 8/10
 1/53 [..............................] - ETA: 0s - loss: 4.9404 - accuracy: 0.6250
Epoch 8: val_accuracy improved from 0.97842 to 0.98561, saving model to neural_networks.h5
Epoch 9/10
 1/53 [..............................] - ETA: 0s - loss: 2.9423 - accuracy: 0.8125
Epoch 9: val_accuracy improved from 0.98561 to 0.98921, saving model to neural_networks.h5
Epoch 10/10
 1/53 [..............................] - ETA: 0s - loss: 3.1371 - accuracy: 0.6875
Epoch 10: val_accuracy did not improve from 0.98921


In [307]:
y_pred_prob = model.predict(X_test_scaled)
y_pred = np.round(y_pred_prob)



In [308]:
confusion = confusion_matrix(y_test_encoded, y_pred)
confusion

array([[ 668,   57],
       [   0, 2614]])

In [309]:
accuracy = accuracy_score(y_test_encoded, y_pred)
precision = precision_score(y_test_encoded, y_pred)
recall = recall_score(y_test_encoded, y_pred)
f1score = f1_score(y_test_encoded, y_pred)

In [310]:
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1score)

Accuracy:  0.9829290206648698
Precision:  0.9786596780232123
Recall:  1.0
F1 Score:  0.9892147587511827
