In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [3]:
data = pd.read_csv('credit_card_transactions.csv')
data.head()

Unnamed: 0,TransactionID,TransactionAmount,TransactionDate,CardType,MerchantCategory,CardholderRegion,TimeOfDay,IsInternational,Fraudulent
0,T001,200.5,2024-08-01,Credit,Electronics,North America,Afternoon,No,No
1,T002,15.75,2024-08-01,Debit,Groceries,Europe,Morning,No,No
2,T003,5000.0,2024-08-02,Credit,Travel,Asia,Night,Yes,Yes
3,T004,45.0,2024-08-02,Debit,Clothing,North America,Evening,No,No
4,T005,1200.0,2024-08-03,Credit,Luxury,Europe,Night,Yes,Yes


In [4]:
# Preprocess the data
# Convert categorical columns to numeric using Label Encoding
label_encoder = LabelEncoder()
data['CardType'] = label_encoder.fit_transform(data['CardType'])  # Credit=0, Debit=1
data['MerchantCategory'] = label_encoder.fit_transform(data['MerchantCategory'])  # Encoding categories
data['CardholderRegion'] = label_encoder.fit_transform(data['CardholderRegion'])  # Encoding regions
data['TimeOfDay'] = label_encoder.fit_transform(data['TimeOfDay'])  # Morning=1, Afternoon=0, Evening=2, Night=3
data['IsInternational'] = label_encoder.fit_transform(data['IsInternational'])  # Yes=1, No=0
data['Fraudulent'] = label_encoder.fit_transform(data['Fraudulent'])  # Yes=1, No=0

In [8]:
# Drop unnecessary columns
data = data.drop(columns=['TransactionID', 'TransactionDate'])

In [9]:
data.head(3)

Unnamed: 0,TransactionAmount,CardType,MerchantCategory,CardholderRegion,TimeOfDay,IsInternational,Fraudulent
0,200.5,0,1,2,0,0,0
1,15.75,1,3,1,2,0,0
2,5000.0,0,6,0,3,1,1


In [10]:
# Split the data into training and testing sets
X = data.drop('Fraudulent', axis=1)
y = data['Fraudulent']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [11]:
# Train a Random Forest Classifier model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [28]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred, labels=[0, 1])

In [25]:
print(f'Accuracy: {accuracy}')
print('Confusion Matrix:')
print(conf_matrix)

Accuracy: 1.0
Confusion Matrix:
[[6 0]
 [0 0]]


In [26]:
# Predict Fraudulent on new data
new_data = pd.DataFrame({
    'TransactionAmount': [750.00],
    'CardType': [0],  # Credit
    'MerchantCategory': [2],  # Luxury (Example encoding)
    'CardholderRegion': [1],  # Europe (Example encoding)
    'TimeOfDay': [3],  # Night
    'IsInternational': [1]  # Yes
})

In [27]:
fraud_prediction = model.predict(new_data)
print(f'Fraudulent Prediction: {"Yes" if fraud_prediction[0] == 1 else "No"}')

Fraudulent Prediction: No
