In [None]:
#IMPORT LIBRARIES 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#from sklearn.impute import SimpleImputer

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, cohen_kappa_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder

from sklearn.svm import LinearSVC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import Pipeline

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

In [None]:
#LOADING DATA
data = pd.read_csv('credit_card_transactions.csv')
data = data.dropna()

x = data.drop(columns=['is_fraud'])
y = data['is_fraud']

# Set a custom sampling strategy
# Example: Maintain 2:1 ratio of majority to minority samples
# For instance, if minority class has 100 samples, we want majority class to have 200 samples
majority_count = y.value_counts().max()  # Total samples in the majority class
minority_count = y.value_counts().min()  # Total samples in the minority class

# Define sampling_strategy based on the desired ratio
desired_majority_count = 10 * minority_count  # Desired majority samples (2:1 ratio)
sampling_strategy = {0: desired_majority_count, 1: minority_count}  # Map class to desired counts

# Apply RandomUnderSampler with the custom strategy
rus = RandomUnderSampler(sampling_strategy=sampling_strategy, random_state=42)
x, y = rus.fit_resample(x, y)

#FEATURE ENGINEERING AND PREPROCESSING
categorical_features = ['merchant', 'category', 'gender', 'city', 'state', 'job']
numeric_features = ['amt', 'city_pop']

# Preprocessor: OneHotEncoding categorical variables and scaling numeric ones
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features), 
        ('num', StandardScaler(), numeric_features)
    ])

# Preprocessing pipeline (without SMOTE)
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

#TRAIN AND TEST SLIP
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)

# Preprocess the data
x_train_preprocessed = pipeline.fit_transform(x_train)
x_test_preprocessed = pipeline.transform(x_test)

# Apply SMOTE separately after preprocessing
smote = SMOTE(random_state=42)
x_train_resampled, y_train_resampled = smote.fit_resample(x_train_preprocessed, y_train)

In [None]:
# Define the parameter distribution
param_dist = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],  # Regularization strength
    'max_iter': [1000, 2000, 5000, 10000]  # Maximum number of iterations
}

# Initialize LinearSVC
svm = LinearSVC(random_state=42)

# Use RandomizedSearchCV to search for best parameters
random_search = RandomizedSearchCV(
    svm, param_distributions=param_dist, n_iter=10, cv=5, random_state=42, n_jobs=-1
)

# Fit the model on training data
random_search.fit(x_train_resampled, y_train_resampled)

# Print the best parameters
print("Best Parameters:", random_search.best_params_)

# Use the best estimator for predictions
best_svm = random_search.best_estimator_
y_pred = best_svm.predict(x_test_preprocessed)