In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
data = pd.read_csv("fraud det.csv")

# Display the first few rows of the dataset
print(data.head())

# Drop rows with missing target values
data = data.dropna(subset=['is_fraud'])

# Separate features and target
X = data.drop(['is_fraud', 'cc_num', 'name', 'street', 'city', 'state', 'zip', 'trans_num'], axis=1)  # Drop non-numeric and non-relevant columns
y = data['is_fraud']



         cc_num       category    amt         name  \
0  2.703190e+15       misc_net   4.97     Jennifer   
1  3.741250e+14  personal_care   8.40  Christopher   
2  3.434730e+14   shopping_pos   7.39       Gloria   
3  3.518670e+15       misc_net   2.08      Dorothy   
4  5.812930e+11           home  76.69      William   

                       street               city state      zip      lat  \
0              561 Perry Cove     Moravian Falls    NC  28654.0  36.0788   
1   20937 Reed Lakes Apt. 271         Washington    DC  20012.0  38.9757   
2  234 Bridges Wells Apt. 389  Center Tuftonboro    NH   3816.0  43.6849   
3   537 Rice Square Suite 040            Milford    OH  45150.0  39.1657   
4      39227 Mcpherson Shoals             Lahoma    OK  73754.0  36.3850   

      long  city_pop                         trans_num  merch_lat  merch_long  \
0 -81.1781    3495.0  0b242abb623afc578575680df30655b9  36.011293  -82.048315   
1 -77.0282  601723.0  ac60c99d1246e85ec8645c4ff1aa8b32  

In [4]:
# Identify categorical columns
categorical_cols = ['category']  # Add more categorical columns if present

# Define preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ]), X.select_dtypes(include=['float64', 'int64']).columns),
        ('cat', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ]), categorical_cols)
    ])

# Create preprocessing and training pipeline
def create_pipeline(model):
    return Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', model)])

# Initialize models
rand_forest = create_pipeline(RandomForestClassifier())

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the models
models = {'Random Forest': rand_forest}

for model_name, model in models.items():
    model.fit(X_train, y_train)

# Evaluate the models
for model_name, model in models.items():
    y_pred = model.predict(X_test)
    print(f'{model_name} Performance:')
    print(f'Accuracy: {accuracy_score(y_test, y_pred):.4f}')
    print(f'Precision: {precision_score(y_test, y_pred):.4f}')
    print(f'Recall: {recall_score(y_test, y_pred):.4f}')
    print(f'F1 Score: {f1_score(y_test, y_pred):.4f}')
    print('---')



Random Forest Performance:
Accuracy: 0.9976
Precision: 0.8130
Recall: 0.6702
F1 Score: 0.7348
---


In [5]:
# User input for evaluation
def evaluate_transaction(input_data):
    input_data_df = pd.DataFrame([input_data], columns=X.columns)
    results = {}
    for model_name, model in models.items():
        prediction = model.predict(input_data_df)[0]
        results[model_name] = 'Fraudulent' if prediction == 1 else 'Legitimate'
    return results

# Example usage for user input
def get_user_input():
    print("Enter the transaction details:")
    user_input = {}
    for col in X.columns:
        value = input(f"{col}: ")
        if col in categorical_cols:
            user_input[col] = value
        else:
            user_input[col] = float(value)
    return user_input

# Get user input and evaluate
user_input = get_user_input()
results = evaluate_transaction(user_input)
print("Prediction Results:")
for model_name, result in results.items():
    print(f"{model_name}: {result}")


Enter the transaction details:
category: misc_net
amt: 4.97
lat: 36.0788
long:  -82.048315
city_pop: 3495
merch_lat: 36.01129
merch_long: 36.01129
Prediction Results:
Random Forest: Legitimate
