# Task for Today  

***

## Bank Note Forgery Detection  
  
Given *data about images of bank notes*, let's try to predict if a given note is **forged** or not.  
  
We will use a logistic regression model to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import confusion_matrix, classification_report

In [None]:
data = pd.read_csv('../input/banknote-authenticationcsv/BankNote_Authentication.csv')

In [None]:
data

In [None]:
data.info()

# Exploratory Data Analysis

In [None]:
plt.figure(figsize=(12, 10))
sns.heatmap(data.corr(), annot=True, vmin=-1.0, cmap='mako')
plt.show()

In [None]:
plt.figure(figsize=(16, 16))
sns.pairplot(data)
plt.show()

In [None]:
y = data['class'].copy()
X = data.drop('class', axis=1).copy()

In [None]:
plt.figure(figsize=(10, 10))
for i in range(len(X.columns)):
    plt.subplot(2, 2, i + 1)
    sns.distplot(X[X.columns[i]])
plt.show()

In [None]:
plt.figure(figsize=(10, 10))
plt.pie(y.value_counts(), labels=["GENUINE", "FORGED"], autopct='%.2f%%', colors=['cornflowerblue', 'lightcoral'])
plt.title("Class Distribution")
plt.show()

# Preprocessing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=1)

In [None]:
scaler = StandardScaler()
scaler.fit(X_train)

X_train = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=X.columns)
X_test = pd.DataFrame(scaler.transform(X_test), index=X_test.index, columns=X.columns)

In [None]:
X_train

In [None]:
y_train

# Training

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)

# Results

In [None]:
print("Test Accuracy: {:.2f}%".format(model.score(X_test, y_test) * 100))

In [None]:
y_pred = model.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
clr = classification_report(y_test, y_pred, target_names=["GENUINE", "FORGED"])

plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt='g', vmin=0, cmap='Blues', cbar=False)
plt.xticks(np.arange(2) + 0.5, ["GENUINE", "FORGED"])
plt.yticks(np.arange(2) + 0.5, ["GENUINE", "FORGED"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

print("Classification Report:\n----------------------\n", clr)

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/UuyH60UIDPQ