In [8]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

In [11]:
# Step 2: Load the data
data = pd.read_csv("onlinefraud.csv")

# Check if the data loaded correctly
print(data.head())

   step      type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1   PAYMENT   9839.64  C1231006815       170136.0       160296.36   
1     1   PAYMENT   1864.28  C1666544295        21249.0        19384.72   
2     1  TRANSFER    181.00  C1305486145          181.0            0.00   
3     1  CASH_OUT    181.00   C840083671          181.0            0.00   
4     1   PAYMENT  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest  isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0        0               0  
1  M2044282225             0.0             0.0        0               0  
2   C553264065             0.0             0.0        1               0  
3    C38997010         21182.0             0.0        1               0  
4  M1230701703             0.0             0.0        0               0  


In [12]:
# Step 4: Define the feature matrix 'X' and target vector 'y'
le = LabelEncoder()

# Convert 'type' column to numeric
data['type'] = le.fit_transform(data['type'])

# Verify that the 'type' column is numeric
print(data['type'].head())
X = data[["type", "amount", "oldbalanceOrg", "newbalanceOrig"]]
y = data["isFraud"]

0    3
1    3
2    4
3    1
4    3
Name: type, dtype: int64


In [13]:
# Step 5: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Step 6: Train the model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

In [15]:
# Step 7: Evaluate the model
y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Model Accuracy: 0.9997139543144177
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00   1270904
           1       0.89      0.89      0.89      1620

    accuracy                           1.00   1272524
   macro avg       0.94      0.94      0.94   1272524
weighted avg       1.00      1.00      1.00   1272524



In [16]:
# Step 8: Function to predict fraud for new transactions
def predict_fraud(transaction_type, amount, oldbalanceOrg, newbalanceOrig):
    # Convert transaction_type to numeric using the same LabelEncoder
    transaction_type_encoded = le.transform([transaction_type])[0]
    # Create the feature DataFrame for the new transaction
    new_transaction = pd.DataFrame({
        "type": [transaction_type_encoded],
        "amount": [amount],
        "oldbalanceOrg": [oldbalanceOrg],
        "newbalanceOrig": [newbalanceOrig]
    })
    # Predict using the trained model
    prediction = model.predict(new_transaction)
    # Return the result
    return "Fraud" if prediction[0] == 1 else "Not Fraud"

# Example usage of the function
new_transaction_example = {
    "transaction_type": "PAYMENT",
    "amount": 1000.0,
    "oldbalanceOrg": 5000.0,
    "newbalanceOrig": 4000.0
}

result = predict_fraud(
    new_transaction_example["transaction_type"],
    new_transaction_example["amount"],
    new_transaction_example["oldbalanceOrg"],
    new_transaction_example["newbalanceOrig"]
)

print("The new transaction is:", result)

The new transaction is: Not Fraud
