#  Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from xgboost import XGBClassifier
import seaborn as sns
import matplotlib.pyplot as plt

# Read the Data

In [None]:
df = pd.read_csv("/content/PS_20174392719_1491204439457_log.csv")
df.head()

# Data Exploration

In [None]:
print(df.info())

# Data Preprocessing

## Data Cleaning

## Handling Missing Values

In [None]:
df['isFlaggedFraud'].value_counts()

In [None]:
df.drop(columns=['isFlaggedFraud'],inplace=True)

## Handling Duplicate Values

In [None]:
df.describe()

In [None]:
print(df.isnull().sum())

In [None]:
print(df.duplicated().sum())

### Handling Outliers

In [None]:
plt.figure(figsize = (10, 7))
for col in df.select_dtypes([int,float]).columns:
  sns.boxplot(df[col])
  plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.countplot(x='type', data=df)
plt.title('Transaction Types')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['amount'], bins=50, kde=True)
plt.title('Transaction Amount Distribution')
plt.show()

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

## Feature Engineering

### Feature Selection

### Feature Transformation

#### Encoding Categorical Variables

In [None]:
df['type'] = LabelEncoder().fit_transform(df['type'])
df['nameOrig'] = LabelEncoder().fit_transform(df['nameOrig'])
df['nameDest'] = LabelEncoder().fit_transform(df['nameDest'])

#### Creating New Features [Optional]

In [None]:
df['balancediff'] = df['newbalanceDest'] - df['oldbalanceDest']
df['Origdiff'] = df['newbalanceOrig'] - df['oldbalanceOrg']

In [None]:
df.drop(columns = ['newbalanceDest', 'oldbalanceDest'], inplace = True)
df.drop(columns = ['newbalanceOrig', 'oldbalanceOrg'], inplace = True)

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

## Data Splitting

### Split into features (x) and target (y)

In [None]:
x = df.drop(columns = ['isFraud'])
y = df['isFraud']

### Split into training and testing

In [None]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(x,y,train_size=0.8)

# Build the Model

## Model Selection

Train and evaluate different with different hyperparameters

In [None]:
def eval_model(model,xtrain,ytrain,xtest,ytest):
  model.fit(xtrain,ytrain)
  print(f'training score: {model.score(xtrain,ytrain)}')
  print(f'testing score: {model.score(xtest,ytest)}')

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
eval_model(model,xtrain,ytrain,xtest,ytest)

In [None]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
eval_model(model,xtrain,ytrain,xtest,ytest)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
eval_model(model,xtrain,ytrain,xtest,ytest)

In [None]:
from sklearn.svm import SVC
model = SVC()
eval_model(model,xtrain,ytrain,xtest,ytest)

In [None]:
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()
eval_model(model,xtrain,ytrain,xtest,ytest)

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
eval_model(model,xtrain,ytrain,xtest,ytest)

In [None]:
from sklearn.ensemble import AdaBoostClassifier
model = AdaBoostClassifier()
eval_model(model,xtrain,ytrain,xtest,ytest)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier()
eval_model(model,xtrain,ytrain,xtest,ytest)

## Train the Dession Tree Classifer Model

In [None]:
model.fit(xtrain,ytrain)

## Predictions

Training as well as testing data

In [None]:
trainpred = model.predict(xtrain)
testpred = model.predict(xtest)

## Evaluate the Model

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(ytrain,trainpred))

In [None]:
print(classification_report(ytest,testpred))