In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
zip_path = '/content/drive/MyDrive/CTS_dataset.zip'
extract_path = '/content/CTS_dataset/'

In [3]:
import zipfile, os

os.makedirs(extract_path, exist_ok=True)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Dataset extracted!")

✅ Dataset extracted!


In [4]:
train_dir = '/content/CTS_dataset/Insurance-Fraud-Detection/Insurance-Fraud-Detection/train'
test_dir  = '/content/CTS_dataset/Insurance-Fraud-Detection/Insurance-Fraud-Detection/test'

In [13]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=25,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.25,
    horizontal_flip=True,
    fill_mode='nearest'
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150,150),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150,150),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

Found 5200 images belonging to 2 classes.
Found 1416 images belonging to 2 classes.


In [14]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

classes = np.array([0, 1])
weights = compute_class_weight(
    class_weight='balanced',
    classes=classes,
    y=train_generator.classes
)
class_weights = {0: weights[0], 1: weights[1]}
print("✅ Class weights:", class_weights)

✅ Class weights: {0: np.float64(13.0), 1: np.float64(0.52)}


In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(150,150,3)),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(128, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(256, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

checkpoint_path = "/content/best_model.keras"
callbacks = [
    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
    ModelCheckpoint(checkpoint_path, monitor="val_loss", save_best_only=True)
]

history = model.fit(
    train_generator,
    validation_data=test_generator,
    epochs=30,
    class_weight=class_weights,
    callbacks=callbacks
)


  self._warn_if_super_not_called()


Epoch 1/30
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 416ms/step - accuracy: 0.6447 - loss: 0.9467 - val_accuracy: 0.0692 - val_loss: 1.3338
Epoch 2/30
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 363ms/step - accuracy: 0.5807 - loss: 0.7887 - val_accuracy: 0.3623 - val_loss: 0.7438
Epoch 3/30
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 367ms/step - accuracy: 0.5509 - loss: 0.7536 - val_accuracy: 0.3962 - val_loss: 1.1880
Epoch 4/30
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 360ms/step - accuracy: 0.6149 - loss: 0.6924 - val_accuracy: 0.7945 - val_loss: 0.5029
Epoch 5/30
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 372ms/step - accuracy: 0.6146 - loss: 0.7426 - val_accuracy: 0.4710 - val_loss: 0.8675
Epoch 6/30
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 360ms/step - accuracy: 0.5700 - loss: 0.6762 - val_accuracy: 0.5833 - val_loss: 0.8140
Epoch 7/30

In [17]:
model.save("/content/fraud_detection_cnn.keras")
print("✅ Model saved at /content/fraud_detection_cnn.keras")

✅ Model saved at /content/fraud_detection_cnn.keras


In [18]:
loss, accuracy = model.evaluate(test_generator)
print(f"✅ Test Accuracy: {accuracy*100:.2f}%")

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 144ms/step - accuracy: 0.7518 - loss: 0.5496
✅ Test Accuracy: 79.45%


In [19]:
import shutil

pred_probs = model.predict(test_generator, verbose=1)
pred_classes = (pred_probs > 0.5).astype("int32").flatten()

true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

misclassified_idx = np.where(pred_classes != true_classes)[0]
print(f"❌ Total misclassified images: {len(misclassified_idx)}")

misclassified_dir = "/content/misclassified_images_improved"
os.makedirs(misclassified_dir, exist_ok=True)

for idx in misclassified_idx:
    img_path = test_generator.filepaths[idx]
    true_label = class_labels[true_classes[idx]]
    pred_label = class_labels[pred_classes[idx]]

    subfolder = os.path.join(misclassified_dir, f"True_{true_label}_Pred_{pred_label}")
    os.makedirs(subfolder, exist_ok=True)

    shutil.copy(img_path, subfolder)

print(f"✅ Misclassified images saved at: {misclassified_dir}")

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 167ms/step
❌ Total misclassified images: 291
✅ Misclassified images saved at: /content/misclassified_images_improved
