##Loading the Dataset

In [4]:
import pandas as pd

# Load the dataset
file_path = 'Fraud detection dataset.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())

   step      type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1   PAYMENT   9839.64  C1231006815       170136.0       160296.36   
1     1   PAYMENT   1864.28  C1666544295        21249.0        19384.72   
2     1  TRANSFER    181.00  C1305486145          181.0            0.00   
3     1  CASH_OUT    181.00   C840083671          181.0            0.00   
4     1   PAYMENT  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest  isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0      0.0             0.0  
1  M2044282225             0.0             0.0      0.0             0.0  
2   C553264065             0.0             0.0      1.0             0.0  
3    C38997010         21182.0             0.0      1.0             0.0  
4  M1230701703             0.0             0.0      0.0             0.0  


##Handling Missing Values

In [10]:
# Handling missing values in features by filling with mean
data['oldbalanceOrg'].fillna(data['oldbalanceOrg'].mean(), inplace=True)
data['newbalanceOrig'].fillna(data['newbalanceOrig'].mean(), inplace=True)
data['oldbalanceDest'].fillna(data['oldbalanceDest'].mean(), inplace=True)
data['newbalanceDest'].fillna(data['newbalanceDest'].mean(), inplace=True)

# Handling missing values in the target variable by filling with the mode (most common value)
data['isFraud'].fillna(data['isFraud'].mode()[0], inplace=True)


##Preparing The Data

In [11]:
from sklearn.model_selection import train_test_split

# Features and target variable
features = ['step', 'type', 'amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']
X = data[features]
y = data['isFraud']

# Convert categorical 'type' to numeric
X = pd.get_dummies(X, columns=['type'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


##Trainging The Logistics Regression Model

In [12]:
from sklearn.linear_model import LogisticRegression

# Create a logistic regression model
model = LogisticRegression(max_iter=1000)

# Train the model
model.fit(X_train, y_train)


In [13]:
from sklearn.metrics import accuracy_score, classification_report

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')


Accuracy: 0.9993754029658285
Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     49603
         1.0       0.43      0.21      0.28        29

    accuracy                           1.00     49632
   macro avg       0.71      0.60      0.64     49632
weighted avg       1.00      1.00      1.00     49632

