In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

df_train = pd.read_csv("fraud-detection/fraudTrain.csv")
df_test = pd.read_csv("fraud-detection/fraudTest.csv")

df = pd.concat([df_train, df_test])  

drop_cols = ["Unnamed: 0", "cc_num", "first", "last", "street", "city", "state", "zip", "dob", "trans_num", "merch_lat", "merch_long", "trans_date_trans_time"]
df = df.drop(columns=drop_cols, errors='ignore')

label_encoders = {}
for col in ["category", "merchant", "gender", "job"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

df['unix_time'] = pd.to_numeric(df['unix_time'])

X = df.drop(columns=["is_fraud"])
y = df["is_fraud"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)

decision_tree_model = DecisionTreeClassifier()
decision_tree_model.fit(X_train, y_train)

def predict_fraud():
    print("\nEnter transaction details:")
    input_data = {}
    
    for col in X.columns:
        if col in label_encoders: 
            valid_values = list(label_encoders[col].classes_)
            value = input(f"Enter {col} : ")
            
            if value in valid_values:
                value = label_encoders[col].transform([value])[0]
            else:
                print(f"Invalid value for {col}. Using most frequent category.")
                value = label_encoders[col].transform([valid_values[0]])[0]  
            
        else:
            value = float(input(f"Enter {col}: "))
        
        input_data[col] = value
    
    input_df = pd.DataFrame([input_data])
    input_scaled = scaler.transform(input_df)
    
    # Get predictions
    log_pred = logistic_model.predict(input_scaled)
    dt_pred = decision_tree_model.predict(input_scaled)
    
    print("\nLogistic Regression Prediction:", "FRAUD" if log_pred[0] == 1 else "LEGITIMATE")
    print("Decision Tree Prediction:", "FRAUD" if dt_pred[0] == 1 else "LEGITIMATE")

predict_fraud()



Enter transaction details:
Enter merchant : fraud_Bauch-Blanda
Enter category : shopping_net
Enter amt: 75.5
Enter gender : M
Enter lat: 40.7128
Enter long: -74.0060
Enter city_pop: 1000000
Enter job : Accountant, chartered
Enter unix_time: 1375159200

Logistic Regression Prediction: LEGITIMATE
Decision Tree Prediction: LEGITIMATE
