In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [None]:
train_transaction = pd.read_csv("train_transaction.csv")
train_identity = pd.read_csv("train_identity.csv")

# Merge both dataframes on 'TransactionID'
train = pd.merge(train_transaction, train_identity, on="TransactionID", how="left")

print(f"Rows in merged training set: {train.shape[0]}")
print(f"Columns in merged training set: {train.shape[1]}")

In [None]:
# Drop columns that might be mostly NaN
drop_cols = [col for col in train.columns if train[col].isna().sum() > 0.9 * len(train)]
train.drop(columns=drop_cols, inplace=True)

# Fill remaining NaNs (simplistic approach)
train.fillna(-999, inplace=True)

# Label-encode some categorical features (example)
cat_cols = ["ProductCD", "card4", "DeviceType"]  # minimal example
for c in cat_cols:
    if c in train.columns:
        train[c] = train[c].astype(str)
        train[c] = LabelEncoder().fit_transform(train[c])

In [None]:
# Target is "isFraud"
X = train.drop(["isFraud", "TransactionID"], axis=1, errors="ignore")
y = train["isFraud"]

# Simple split (no cross-validation for brevity)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
def run_genetic_algorithm(X_data, y_data):
    """
    Placeholder for GA. Return a subset of feature indices.
    """
    # TODO: Implement population, crossover, mutation, selection, etc.
    # For demonstration, return all features:
    return np.arange(X_data.shape[1])

best_features = run_genetic_algorithm(X_train, y_train)
X_train_ga = X_train.iloc[:, best_features]
X_test_ga = X_test.iloc[:, best_features]

In [None]:
model = SVC(kernel="rbf")
model.fit(X_train_ga, y_train)
y_pred = model.predict(X_test_ga)
print("Baseline model Accuracy:", accuracy_score(y_test, y_pred))

In [None]:
def run_pso_for_hyperparams(X_data, y_data, model_class):
    """
    Placeholder for PSO. Return best hyperparams found.
    """
    # TODO: Swarm initialization, velocity updates, fitness evaluation, etc.
    return {"C": 1.0, "kernel": "rbf"}  # example

best_params = run_pso_for_hyperparams(X_train_ga, y_train, SVC)
model_pso = SVC(**best_params)
model_pso.fit(X_train_ga, y_train)
y_pred_pso = model_pso.predict(X_test_ga)
print("PSO-tuned model Accuracy:", accuracy_score(y_test, y_pred_pso))

In [None]:
def run_aco_classification(X_data, y_data):
    """
    Placeholder for ACO. Return final classification model or rules.
    """
    # TODO: ACO logic with ants’ path optimization, pheromone updates, etc.
    # Here we simply return the same baseline model for demonstration:
    return SVC(kernel="rbf")

aco_model = run_aco_classification(X_train_ga, y_train)
aco_model.fit(X_train_ga, y_train)
y_pred_aco = aco_model.predict(X_test_ga)
print("ACO-based model Accuracy:", accuracy_score(y_test, y_pred_aco))