# Import

In [73]:
# Data processing
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Model
from sklearn.mixture import GaussianMixture

# Load Dataset

In [74]:
data = pd.read_csv('/content/drive/MyDrive/Dataset/Numerical/creditcard.csv')
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


# Sampling Dataset

In [20]:

# Take a sample of data
data_sample = data.sample(frac=0.1, random_state=1)
# data_sample = data

data_sample.shape

(28481, 31)

# Format Data

In [75]:
y = data_sample['Class']
X = data_sample.drop(['Time','Class'], axis=1)

In [76]:
len(data_sample[data_sample['Class']==1])

49

# Normalize and scale

In [80]:
from sklearn.preprocessing import StandardScaler

In [81]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Declare Model

In [82]:
gmm = GaussianMixture(n_components=3, n_init=5, random_state=42)

# Fit the data into model

In [83]:
y_gmm = gmm.fit(X_scaled)

In [84]:
score = gmm.score_samples(X_scaled)

In [85]:
data_sample['score'] = score

In [86]:
threshold = np.percentile(score, 1)

In [87]:
print(f'The threshold of the score is {threshold:.2f}')

The threshold of the score is -70.45


In [88]:
len(data_sample[data_sample['score'] < -70])

294

In [89]:
data_sample[(data_sample['score'] < -55) & (data_sample['Class'] == 1)]

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V22,V23,V24,V25,V26,V27,V28,Amount,Class,score
235644,148479.0,-1.541678,3.8468,-7.604114,3.121459,-1.254924,-2.084875,-2.385027,1.47114,-2.530507,...,1.064222,0.06537,0.257209,-0.693654,-0.335702,0.577052,0.398348,122.68,1,-69.348766
192529,129741.0,-1.396204,2.618584,-6.03677,3.552454,1.030091,-2.950358,-1.528506,0.189319,-1.433554,...,-0.390176,0.356029,-0.762352,0.09651,-0.487861,0.062655,-0.240732,1.0,1,-87.768402
42769,41237.0,-10.281784,6.302385,-13.271718,8.925115,-9.975578,-2.832513,-12.703253,6.706846,-7.078424,...,0.366933,0.042805,0.478279,0.157771,0.329901,0.163504,-0.485552,118.3,1,-224.318182
23308,32686.0,0.287953,1.728735,-1.652173,3.813544,-1.090927,-0.984745,-2.202318,0.555088,-2.033892,...,-0.633528,0.092891,0.187613,0.368708,-0.132474,0.576561,0.309843,0.0,1,-56.071947
235616,148468.0,0.21881,2.715855,-5.111658,6.310661,-0.848345,-0.882446,-2.902079,0.939162,-3.627698,...,1.037324,0.062325,0.53249,-0.149145,0.63958,0.351568,-0.001817,0.76,1,-76.660852
191690,129371.0,1.183931,3.05725,-6.161997,5.543972,1.617041,-1.848006,-1.005508,0.339937,-2.959806,...,-0.931072,-0.064175,-0.007013,0.345419,0.064558,0.476629,0.32374,0.0,1,-99.609928
149600,91554.0,-5.100256,3.633442,-3.843919,0.183208,-1.183997,1.602139,-3.005953,-8.645038,1.285458,...,-2.79715,1.090707,-0.15926,0.532156,-0.497126,0.943622,0.553581,261.22,1,-78.249321
143334,85285.0,-7.030308,3.421991,-9.525072,5.270891,-4.02463,-2.865682,-6.989195,3.791551,-4.62273,...,-0.541855,0.036943,-0.355519,0.353634,1.042458,1.359516,-0.272188,0.0,1,-116.355278
120837,75978.0,-5.140723,3.568751,-5.896245,4.16472,-4.091193,-1.98996,-5.472436,2.422821,-2.909735,...,0.118022,-0.332704,0.139941,0.324758,-0.180769,0.17781,0.661555,99.9,1,-90.14667
128479,78725.0,-4.312479,1.886476,-2.338634,-0.475243,-1.185444,-2.112079,-2.122793,0.272565,0.290273,...,-0.06787,-1.114692,0.269069,-0.020572,-0.963489,-0.918888,0.001454,60.0,1,-57.750543
