In [2]:
import csv
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

In [3]:
import os
os.chdir('C:/Users/raja/Documents')

In [4]:
df = pd.read_csv("creditcard.csv")
df.sample(5)

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
257720,158313.0,2.030381,-0.235965,-1.541734,-0.006188,0.629463,0.351813,-0.10015,0.002545,0.611721,...,0.066421,0.332036,-0.007687,-0.323679,0.2021,-0.265369,-0.005841,-0.059804,24.29,0
119028,75302.0,-1.176184,1.090122,0.755149,0.918193,0.175064,-0.867734,0.729134,-0.431768,-0.092617,...,0.115533,0.524623,0.022526,0.418536,-0.143252,-0.388129,-0.447202,0.303582,21.5,0
103512,68691.0,1.054088,-0.698552,1.0237,0.326059,-1.380431,-0.381728,-0.624121,0.100041,1.373616,...,-0.198504,-0.552381,0.022365,0.438922,0.067625,0.937732,-0.056733,0.025991,89.72,0
87154,61559.0,1.245674,0.166975,0.488306,0.635322,-0.562777,-1.011073,0.014953,-0.160211,0.170362,...,-0.262581,-0.816264,0.140304,0.357827,0.186423,0.096544,-0.035866,0.018495,8.99,0
85756,60928.0,0.956456,-1.228496,-0.057599,-0.376222,-1.316283,-1.084451,-0.081871,-0.287151,-0.799439,...,-0.259111,-0.838607,-0.146008,0.410292,0.153761,1.018919,-0.116562,0.042841,240.0,0


In [5]:
all_features = []
all_targets = []
with open("creditcard.csv") as f:
    for i, line in enumerate(f):
        if i == 0:
            print("HEADER:", line.strip())
            continue  # Skip header
        fields = line.strip().split(",")
        all_features.append([float(v.replace('"', "")) for v in fields[:-1]])
        all_targets.append([int(fields[-1].replace('"', ""))])
        if i == 1:
            print("EXAMPLE FEATURES:", all_features[-1])

features = np.array(all_features, dtype="float32")
targets = np.array(all_targets, dtype="uint8")
print("features.shape:", features.shape)
print("targets.shape:", targets.shape)

HEADER: "Time","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11","V12","V13","V14","V15","V16","V17","V18","V19","V20","V21","V22","V23","V24","V25","V26","V27","V28","Amount","Class"
EXAMPLE FEATURES: [0.0, -1.3598071336738, -0.0727811733098497, 2.53634673796914, 1.37815522427443, -0.338320769942518, 0.462387777762292, 0.239598554061257, 0.0986979012610507, 0.363786969611213, 0.0907941719789316, -0.551599533260813, -0.617800855762348, -0.991389847235408, -0.311169353699879, 1.46817697209427, -0.470400525259478, 0.207971241929242, 0.0257905801985591, 0.403992960255733, 0.251412098239705, -0.018306777944153, 0.277837575558899, -0.110473910188767, 0.0669280749146731, 0.128539358273528, -0.189114843888824, 0.133558376740387, -0.0210530534538215, 149.62]
features.shape: (284807, 30)
targets.shape: (284807, 1)


In [6]:
num_val_samples = int(len(features) * 0.2)
train_features = features[:-num_val_samples]
train_targets = targets[:-num_val_samples]
val_features = features[-num_val_samples:]
val_targets = targets[-num_val_samples:]

print("Number of training samples:", len(train_features))
print("Number of validation samples:", len(val_features))

Number of training samples: 227846
Number of validation samples: 56961


In [7]:
counts = np.bincount(train_targets[:, 0])
print(
    "Number of positive samples in training data: {} ({:.2f}% of total)".format(
        counts[1], 100 * float(counts[1]) / len(train_targets)
    )
)

weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]

Number of positive samples in training data: 417 (0.18% of total)


In [8]:
mean = np.mean(train_features, axis=0)
train_features -= mean
val_features -= mean
std = np.std(train_features, axis=0)
train_features /= std
val_features /= std

In [9]:
from tensorflow import keras

model = keras.Sequential(
    [
        keras.layers.Dense(
            256, activation="relu", input_shape=(train_features.shape[-1],)
        ),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation="sigmoid"),
    ]
)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 256)               7936      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 257       
Total params: 139,777
Trainable params: 139,777
Non-trainable params: 0
__________________________________________________

In [11]:
metrics = [
    keras.metrics.FalseNegatives(name="fn"),
    keras.metrics.FalsePositives(name="fp"),
    keras.metrics.TrueNegatives(name="tn"),
    keras.metrics.TruePositives(name="tp"),
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
]

model.compile(
    optimizer=keras.optimizers.Adam(1e-2), loss="binary_crossentropy", metrics=metrics
)

callbacks = [keras.callbacks.ModelCheckpoint("fraud_model_at_epoch_{epoch}.h5")]
class_weight = {0: weight_for_0, 1: weight_for_1}

model.fit(
    train_features,
    train_targets,
    batch_size=2048,
    epochs=5,
    verbose=2,
    callbacks=callbacks,
    validation_data=(val_features, val_targets),
    class_weight=class_weight,
)

Epoch 1/5
112/112 - 23s - loss: 8.8661e-07 - fn: 26.0000 - fp: 7144.0000 - tn: 220285.0000 - tp: 391.0000 - precision: 0.0519 - recall: 0.9376 - val_loss: 0.0460 - val_fn: 7.0000 - val_fp: 935.0000 - val_tn: 55951.0000 - val_tp: 68.0000 - val_precision: 0.0678 - val_recall: 0.9067
Epoch 2/5
112/112 - 19s - loss: 5.9535e-07 - fn: 14.0000 - fp: 5951.0000 - tn: 221478.0000 - tp: 403.0000 - precision: 0.0634 - recall: 0.9664 - val_loss: 0.0236 - val_fn: 8.0000 - val_fp: 527.0000 - val_tn: 56359.0000 - val_tp: 67.0000 - val_precision: 0.1128 - val_recall: 0.8933
Epoch 3/5
112/112 - 19s - loss: 5.2796e-07 - fn: 9.0000 - fp: 6487.0000 - tn: 220942.0000 - tp: 408.0000 - precision: 0.0592 - recall: 0.9784 - val_loss: 0.0505 - val_fn: 7.0000 - val_fp: 1335.0000 - val_tn: 55551.0000 - val_tp: 68.0000 - val_precision: 0.0485 - val_recall: 0.9067
Epoch 4/5
112/112 - 20s - loss: 4.5622e-07 - fn: 7.0000 - fp: 5133.0000 - tn: 222296.0000 - tp: 410.0000 - precision: 0.0740 - recall: 0.9832 - val_loss: 

<keras.callbacks.History at 0x31ebd15970>