## Credit Card Fraud Detection

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import accuracy_score,mean_squared_error
import seaborn as sns

#### Reading and Analysing the Data

In [2]:
#loading the dataset to pandas dataframe
data = pd.read_csv("creditcard.csv")

In [None]:
#print first five rows of the data
data.head()

In [None]:
data.tail()

In [None]:
data.describe()

In [None]:
#information about the dataset
#scanning data if there is any null value or the missing values.
data.info()

In [None]:
# data.isnull().sum()     #chekcing for the Null values.

In [None]:
sns.countplot(x='Class', data=data, hue='Class')

In [None]:
data['Class'].value_counts()  

###### Clearly, graph and the value counts indicates, the data is highly unbalanced, as the count of Normal transactions is much higher than the Fraudulent one.
###### here,  0 ---> the transaction is normal (legal transaction)
###### 1 ---> the transaction is fraud


## Splitting the data as (Fraud or Normal)


In [None]:
fraud = data[data['Class']==1]
normal = data[data['Class']==0]

In [None]:
print(fraud.shape)
print(normal.shape)

In [None]:
fraud.Amount.describe()

In [None]:
normal.Amount.describe()

In [None]:
data.groupby('Class').mean()

### Handling Imbalanced Data
the data credit card data is impbalanced as the normal transaction is much higher than the fraudulent one.

In [None]:
normal_sample = normal.sample(n=500)

In [None]:
#adding the sample data
new_data = pd.concat([normal_sample,fraud])

In [None]:
new_data.head()

In [None]:
new_data['Class'].value_counts()

In [None]:
# x---> features
# y---> Label
x = new_data.drop(['Class'],axis=1)
y = new_data['Class']

### Splitting data into trainig and testing data

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)  

In [None]:
print(x_train.shape)
print(x_test.shape)

### Creating model using LogisticRegression

In [None]:
model = LogisticRegression()

In [None]:
model.fit(x_train.values, y_train.values)    #training model

### Evaluating the Model

In [None]:
train_prediction = model.predict(x_train.values)
training_accuracy = accuracy_score(train_prediction, y_train.values)

In [None]:
test_prediction = model.predict(x_test.values)
test_accuracy = accuracy_score(test_prediction, y_test.values)

In [None]:
print("Accuracy score of training data:  ",training_accuracy)
print("Accuracy score of testing data:  ",test_accuracy)

In [None]:
print("Mean Squared error: ",mean_squared_error(y_test,test_prediction))

### Saving Model

In [None]:
from joblib import dump, load
dump(model, 'model_creditcardFraud.joblib')