In [None]:
# Importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import accuracy_score , precision_score , confusion_matrix , classification_report
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler

In [2]:
# loading the dataset to a Pandas DataFrame
credit_card_data = pd.read_csv('creditcard.csv')
     

In [None]:
# first 5 rows of the dataset
print('THE FIRST FIVE ROWS-')
credit_card_data.head()

     

In [None]:
print('THE LAST FIVE ROWS-')
credit_card_data.tail()

In [None]:

# dataset informations
print('DATASET INFORMATION')
credit_card_data.info()
     

In [None]:
# checking the number of missing values in each column
credit_card_data.isnull().sum()


In [None]:
# visualization of the above data 
print('COUNT-PLOT FOR LEGIT AND FRAUD TRANSACTIONS-')

sns.countplot(x='Class',data=credit_card_data)

In [None]:
# distribution of legit transactions & fraudulent transactions
credit_card_data['Class'].value_counts()

In [9]:
# separating the data for analysis
legit = credit_card_data[credit_card_data.Class == 0]
fraud = credit_card_data[credit_card_data.Class == 1]

In [None]:
print('SHAPE OF LEGIT AND FRAUD-')
print(legit.shape)
print(fraud.shape)

In [None]:
# statistical measures of the data
print('DESCRIPTION OF AMOUNT ON BASIS OF LEGIT TRANSACTION-')
legit.Amount.describe()

In [None]:
print('DESCRIPTION OF AMOUNT ON BASIS OF FRAUD TRANSACTION-')
fraud.Amount.describe()
     

In [None]:

# compare the values for both transactions
credit_card_data.groupby('Class').mean()

In [14]:
#under-sampling the data
#HANDLING IMBALANCE DATASET
legit_sample = legit.sample(n=492)

In [15]:
new_dataset = pd.concat([legit_sample, fraud], axis=0)

In [None]:
# visualization of the above data 
print('COUNT-PLOT FOR NEW DATASET-')
sns.countplot(x='Class',data=new_dataset)

In [None]:
#first five rows of new dataset
print(' FIRST FIVE ROWS OF NEW DATASET-')
new_dataset.head()

In [None]:
print('LAST FIVE ROWS OF NEW DATASET-')
new_dataset.tail()

In [None]:
new_dataset['Class'].value_counts()

In [None]:
new_dataset.groupby('Class').mean()

In [None]:
# Correlation matrix
print('CORRELATION MATRIX-')
corrmat = new_dataset.corr()
fig = plt.figure(figsize = (12, 9))
sns.heatmap(corrmat, vmax = .8, square = True)
plt.show()


In [None]:
# seperating features and targets

X = new_dataset.drop(columns='Class', axis=1)
Y = new_dataset['Class']

In [None]:
print('FEATURES-',X)

In [None]:

print('LABELS-',Y)

In [None]:
#SPILITTING THE NEW DATASET -
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)
scaler=StandardScaler()
x_train=scaler.fit_transform(X_train)
x_test=scaler.fit_transform(X_test)
     

In [None]:
print(X.shape, X_train.shape, X_test.shape)

In [None]:
# training the Logistic Regression Model with Training Data
model = LogisticRegression(max_iter=10000)
model.fit(X_train, Y_train)
     

In [None]:
# accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
     

In [None]:
print('Accuracy on Training data : ', training_data_accuracy)

In [None]:
# accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [None]:

print('Accuracy score on Test Data : ', test_data_accuracy)

In [None]:
prec= precision_score(X_test_prediction, Y_test)
print("The precision is {}".format(prec))

In [None]:
# classification report
class_reports=classification_report(Y_test,X_test_prediction)
print("CLASS REPORT IS ",class_reports)

In [None]:
# printing the confusion matrix # TP TN FP FN
LABELS = ['Legit', 'Fraud']
conf_matrix = confusion_matrix(Y_test, X_test_prediction)
plt.figure(figsize =(12, 12))
sns.heatmap(conf_matrix, xticklabels = LABELS, 
            yticklabels = LABELS, annot = True, fmt ="d")
plt.title("Confusion matrix")
plt.ylabel('True class')
plt.xlabel('Predicted class')
plt.show()


In [None]:
# Function to collect user input
def collect_transaction_data():
    transaction_data = {}
    for column in X.columns:
        transaction_data[column] = float(input(f"Enter value for {column}: "))
    return transaction_data

# Function to predict fraud
def predict_fraud(transaction_data, model):
    transaction_df = pd.DataFrame([transaction_data])
    prediction = model.predict(transaction_df)
    return "Fraudulent" if prediction[0] == 1 else "Legitimate"

# Example usage
transaction_data = collect_transaction_data()
fraud_prediction = predict_fraud(transaction_data, model)
print('DETAILS OF TRANSACTION-',transaction_data)
print(f"This transaction is potentially {fraud_prediction}.")