In [None]:
pip install pandas scikit-learn imbalanced-learn




In [None]:
import pandas as pd

# Load dataset specifying the 'engine' and potential 'encoding'
df = pd.read_csv("/content/creditcard.csv", engine='python', encoding='utf-8' )

# Basic info
print("Dataset shape:", df.shape)
print("Class distribution:\n", df['Class'].value_counts())


Dataset shape: (284807, 31)
Class distribution:
 Class
0    284315
1       492
Name: count, dtype: int64


In [None]:
# Separate input features and target
X = df.drop("Class", axis=1)
y = df["Class"]

print("Features shape:", X.shape)
print("Target shape:", y.shape)


Features shape: (284807, 30)
Target shape: (284807,)


In [None]:
from imblearn.over_sampling import SMOTE

# Initialize SMOTE
sm = SMOTE(random_state=42)

# Resample the data
X_res, y_res = sm.fit_resample(X, y)

# Check new class distribution
print("After SMOTE:")
print("Resampled features shape:", X_res.shape)
print("Resampled target distribution:\n", y_res.value_counts())


After SMOTE:
Resampled features shape: (568630, 30)
Resampled target distribution:
 Class
0    284315
1    284315
Name: count, dtype: int64


In [None]:
# Sample 50,000 rows from the resampled data
X_sample = X_res.sample(n=50000, random_state=42)
y_sample = y_res.loc[X_sample.index]

# Check shapes
print("Sampled features shape:", X_sample.shape)
print("Sampled labels distribution:\n", y_sample.value_counts())


Sampled features shape: (50000, 30)
Sampled labels distribution:
 Class
0    25062
1    24938
Name: count, dtype: int64


In [None]:
from sklearn.model_selection import train_test_split

# Split into training and test sets (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X_sample, y_sample, test_size=0.2, random_state=42
)

print("Training set size:", X_train.shape)
print("Testing set size:", X_test.shape)


Training set size: (40000, 30)
Testing set size: (10000, 30)


In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

print("✅ Model training completed.")


✅ Model training completed.


In [None]:
# Predict on the test data
y_pred = model.predict(X_test)

from sklearn.metrics import classification_report

# Evaluate the model
report = classification_report(y_test, y_pred)
print("Model Evaluation Report:\n")
print(report)



Model Evaluation Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5003
           1       1.00      1.00      1.00      4997

    accuracy                           1.00     10000
   macro avg       1.00      1.00      1.00     10000
weighted avg       1.00      1.00      1.00     10000



In [None]:
def predict_fraud(transaction_data, model, feature_columns):
    # Convert the dictionary into a DataFrame, ensuring column order matches the training data
    transaction_df = pd.DataFrame([transaction_data])

    # Ensure all expected columns are present (even with NaN values if missing)
    transaction_df = transaction_df.reindex(columns=feature_columns, fill_value=0)

 # Make prediction
    prediction = model.fit(X_train, y_train)

    if prediction == 1:
        return "Fraudulent transaction!"
    else:
        return "Legitimate transaction."

# List of feature names used in the model training
feature_columns = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13',
                   'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26',
                   'V27', 'V28', 'Amount', 'Time']

# Sample transaction (example)
transaction = {
    'V1': -0.097, 'V2': 0.233, 'V3': 0.527, 'V4': -0.251, 'V5': 0.312,
    'V6': -0.013, 'V7': 0.032, 'V8': -0.119, 'V9': 0.118, 'V10': -0.034,
    'V11': 0.033, 'V12': -0.050, 'V13': 0.078, 'V14': -0.016, 'V15': -0.032,
    'V16': 0.053, 'V17': 0.040, 'V18': -0.025, 'V19': 0.014, 'V20': -0.022,
    'V21': -0.027, 'V22': 0.087, 'V23': 0.035, 'V24': 0.089, 'V25': -0.006,
    'V26': -0.087, 'V27': 0.067, 'V28': 0.028, 'Amount': 89.4, 'Time': 3600
}

# Get prediction
print(predict_fraud(transaction, model, feature_columns))


Legitimate transaction.
