In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier

In [2]:
train = pd.read_csv("fraudTrain.csv")
test = pd.read_csv("fraudTest.csv")

In [3]:
drop_cols = [
    'Unnamed: 0', 'trans_date_trans_time', 'cc_num',
    'first', 'last', 'street', 'city', 'state',
    'zip', 'dob', 'trans_num', 'merchant'
]

train = train.drop(columns=drop_cols)
test = test.drop(columns=drop_cols)


In [4]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
categorical_cols = ['category', 'gender', 'job']
numerical_cols = [col for col in train.columns if col not in categorical_cols + ['is_fraud']]
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        ('num', StandardScaler(), numerical_cols)
    ]
)

In [9]:
model = Pipeline(
    steps=[
        ('preprocess', preprocessor),
        ('classifier', RandomForestClassifier(
            n_estimators=30,
            random_state=42,
            class_weight='balanced'
        ))
    ]
)

In [10]:
train_sample = train.sample(200000, random_state=42)

X = train_sample.drop('is_fraud', axis=1)
y = train_sample['is_fraud']
model.fit(X, y)
print("Model trained successfully")


Model trained successfully


In [11]:
new_transaction = {
    'category': 'shopping_net',
    'amt': 350.75,
    'gender': 'M',
    'lat': 40.7128,
    'long': -74.0060,
    'city_pop': 1000000,
    'job': 'Engineer',
    'unix_time': 1325377000,
    'merch_lat': 40.7306,
    'merch_long': -73.9352
}
input_df = pd.DataFrame([new_transaction])
prediction = model.predict(input_df)[0]

if prediction == 1:
    print("Fraudulent Transaction Detected")
else:
    print("Legitimate Transaction")


Legitimate Transaction
