<a href="https://colab.research.google.com/github/lavanya9739/credit-card-fraud-detection/blob/main/Untitled13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
import joblib


In [3]:
# Load the dataset
file_path = "/content/output_file.csv"  # Replace with your file path
data = pd.read_csv(file_path)

# Drop unnecessary columns
data_cleaned = data.drop(columns=["Account Number", "Card Number", "Transaction Time", "Transaction Date",
                                  "Merchant Number", "Approval Code"])

# Encode categorical variables
label_encoder = LabelEncoder()
categorical_cols = ["Transaction Type", "Currency Code", "Transaction Country", "Transaction City", "Fraud Label"]
for col in categorical_cols:
    data_cleaned[col] = label_encoder.fit_transform(data_cleaned[col])

# Separate features and target
X = data_cleaned.drop(columns=["Fraud Label"])
y = data_cleaned["Fraud Label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [4]:
# Train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = rf_model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")


Model Accuracy: 0.9500


In [5]:
# Save the scaler
joblib.dump(scaler, "scaler.pkl")
print("Scaler saved as scaler.pkl")

# Save the model
joblib.dump(rf_model, "fraud_detection_model.pkl")
print("Model saved as fraud_detection_model.pkl")


Scaler saved as scaler.pkl
Model saved as fraud_detection_model.pkl


In [8]:
# Example new data with all required features
new_data = pd.DataFrame([{
    "Transaction Type": "Refund",
    "Currency Code": "INR",
    "Transaction Country": "IN",
    "Transaction City": "Hyderabad",
    "Credit Limit": 143194.285,  # Replace with an actual value
    "Merchant Category Code": 4044,  # Replace with an actual value
    "Open to Buy": 33994.67546,  # Replace with an actual value
    "Transaction Amount": 4487.46083  # Replace with an actual value
}])


In [9]:
# Encode categorical columns
for col in categorical_cols[:-1]:  # Skip "Fraud Label" as it's the target
    if col in new_data.columns:
        new_data[col] = label_encoder.fit_transform(new_data[col])

# Ensure column order matches training data
new_data = new_data[X.columns]  # Match the order of features during training

# Scale the data
new_data_scaled = scaler.transform(new_data)


In [10]:
# Predict using the loaded model
fraud_prediction = rf_model.predict(new_data_scaled)
print("Fraud Prediction:", "Fraud" if fraud_prediction[0] == 1 else "Not Fraud")


Fraud Prediction: Fraud
