In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score, precision_recall_curve
from sklearn.metrics import roc_auc_score, roc_curve, auc, average_precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from mlxtend.plotting import plot_confusion_matrix
import warnings
warnings.filterwarnings('ignore')

import os

In [2]:
def print_classification_report(y_test, y_predic):
    print('Classification report:')
    print(classification_report(y_test, y_predic))

    print('\nBinary F1 Score, Recall and Precision:')
    f = f1_score(y_test, y_predic, average = "binary")
    Recall = recall_score(y_test, y_predic, average = "binary")
    Precision = precision_score(y_test, y_predic, average = "binary")
    print('\tF1 Score %f' %f)
    print('\tRecall Score %f' %Recall)
    print('\tPrecision Score %f' %Precision)

    print('\nMicro F1 Score, Recall and Precision:')
    f = f1_score(y_test, y_predic, average = "micro")
    Recall = recall_score(y_test, y_predic, average = "micro")
    Precision = precision_score(y_test, y_predic, average = "micro")
    print('\tF1 Score %f' %f)
    print('\tRecall Score %f' %Recall)
    print('\tPrecision Score %f' %Precision)

In [3]:
data = pd.read_csv("../input/creditcard.csv")

No_of_frauds = len(data[data["Class"]==1])
No_of_normals = len(data[data["Class"]==0])
total = No_of_frauds + No_of_normals
Fraud_percent = (No_of_frauds / total)*100
Normal_percent = (No_of_normals / total)*100

# Print classes statistics
print("### Statistics:")
print("The number of normal transactions(Class 0) are: ", No_of_normals)
print("The number of fraudulent transactions(Class 1) are: ", No_of_frauds)
print("Class 0 percentage = ", Normal_percent)
print("Class 1 percentage = ", Fraud_percent)

# Standardize 'Amount' by removing the mean and scaling to unit variance
data['normAmount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1,1))
X = data.drop(['Time','Amount'],axis=1)
y = data['Class']

# Split the data into training and testing subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 12)

# show the dimensions of the train/test data
print("X_train.shape: ", X_train.shape)
print("X_test.shape: ", X_test.shape)
print("y_train.shape: ", y_train.shape)
print("y_test.shape: ", y_test.shape)

# Applying SVM Algorithm
print("")
print("-----------------------------------------------------------------------------------")
print("                                Support Vector Machine                             ")              
print("-----------------------------------------------------------------------------------")

# Fit into Model
classifier = svm.SVC(C= 1, kernel= 'linear', random_state= 0)
classifier.fit(X_train, y_train)

# Predict the class using X_test
y_pred = classifier.predict(X_test)

# Print Report with Classification Results
print_classification_report(y_test, y_pred)

### Statistics:
The number of normal transactions(Class 0) are:  284315
The number of fraudulent transactions(Class 1) are:  492
Class 0 percentage =  99.82725143693798
Class 1 percentage =  0.1727485630620034
X_train.shape:  (213605, 30)
X_test.shape:  (71202, 30)
y_train.shape:  (213605,)
y_test.shape:  (71202,)

-----------------------------------------------------------------------------------
                                Support Vector Machine                             
-----------------------------------------------------------------------------------
Classification report:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00     71078
          1       1.00      1.00      1.00       124

avg / total       1.00      1.00      1.00     71202


Binary F1 Score, Recall and Precision:
	F1 Score 1.000000
	Recall Score 1.000000
	Precision Score 1.000000

Micro F1 Score, Recall and Precision:
	F1 Score 1.000000
	Recall Score 1.000000
	Pre