In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

## Load the extracted features from ResNet50 

In [None]:
# Load the CSV file
df = pd.read_csv("features_512.csv")
df

In [None]:
# Remove columns with all zeros
df = df.drop(columns=[col for col in df if (df[col] == 0).all()])

In [None]:
X = df.drop(columns=['file_name', 'label'])
y = df['label']

In [None]:
X

## Train test split

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## Standardization

In [None]:
from sklearn.preprocessing import StandardScaler
# Initialize the scaler
scaler = StandardScaler()

# Fit on training data and transform both train and test sets
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Training SVM Classifier

In [None]:
from sklearn.svm import SVC
# Initialize the SVM model
svm = SVC(kernel="rbf", C=1.0, gamma="scale", random_state=42)

# Train the SVM model
svm.fit(X_train, y_train)

## Testing SVM Classifier

In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Predict on the test set
y_pred = svm.predict(X_test)

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Print accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

### Cross validation

In [None]:
from sklearn.model_selection import cross_val_score

# Perform 5-fold cross-validation
cv_scores = cross_val_score(svm, X, y, cv=5)

print(f"Cross-Validation Accuracy: {cv_scores.mean() * 100:.2f}%")

# Feture Embedding with Naive bayes

In [None]:
from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()

# Fit on training data and transform both train and test sets
X = scaler.fit_transform(X)
# X_test = scaler.transform(X_test)

## Split the training set into two mutually exclusive S1 and S2 subsets for finding feature embedding transfer function

In [None]:
from sklearn.neighbors import KernelDensity

# Step 2: Split data into S1 (density estimation) and S2 (transformation)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
S1_X, S2_X, S1_y, S2_y = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
S1_X

In [None]:
S1_y

In [None]:
S1_y = np.array(S1_y)
S2_y = np.array(S2_y)

In [None]:
# Step 2: Define a function to compute class densities
def estimate_densities(X, y, bandwidth=0.2):
    """
    Estimate density functions for each feature and class using Kernel Density Estimation (KDE).
    """
    classes = np.unique(y)
    densities = {}

    for feature in range(X.shape[1]):
        densities[feature] = {}
        for cls in classes:
            kde = KernelDensity(bandwidth=bandwidth)
            feature_data = X[y == cls, feature].reshape(-1, 1)
            kde.fit(feature_data)
            densities[feature][cls] = kde
    return densities

# Step 3: Transform the original feature to a linearly separable feature space
def transform_features(X, densities, priors, epsilon=1e-10):
    """
    Transform features using Naive Bayes Feature Embedding (NBFE).
    X: Input features to transform (pandas DataFrame).
    densities: KDE models for each feature and class (output of estimate_densities).
    priors: Prior probabilities for each class.
    """
    n_samples, n_features = X.shape  # Get the number of samples and features
    X_transformed = np.zeros_like(X)  # Initialize the transformed feature matrix

    # Loop through each feature
    for feature in range(n_features):
        for i in range(n_samples):  # Loop through each sample
            x_val = X[i, feature]  # Access the value of feature `feature` for sample `i`
            
            # Get the class density estimates for the current feature
            f_class_1 = np.exp(densities[feature][1].score_samples([[x_val]]))[0]
            f_class_0 = np.exp(densities[feature][0].score_samples([[x_val]]))[0]
            
            # Add epsilon to f_class_1 and f_class_0 to avoid zero densities
            f_class_1 = max(f_class_1, epsilon)
            f_class_0 = max(f_class_0, epsilon)
            # Calculate the marginal density ratio
            ratio = priors[1] * f_class_1 / (priors[0] * f_class_0)
            
            # Apply the log-transformation to the ratio
            # Safeguard against invalid logarithms
            if ratio > 0:
                X_transformed[i, feature] = np.log(ratio)
            else:
                X_transformed[i, feature] = 0  # Fallback value

    return X_transformed

In [None]:
# Step 5: Estimate densities and priors using S1
priors = {cls: np.mean(S1_y == cls) for cls in np.unique(S1_y)}
densities = estimate_densities(S1_X, np.array(S1_y))

In [None]:
priors

In [None]:
densities

In [None]:
# Step 6: Transform S2 and test set features
S2_X_transformed = transform_features(S2_X, densities, priors)
X_test_transformed = transform_features(X_test, densities, priors)

In [None]:
# Train an SVM on transformed features
svm = SVC(kernel="linear", random_state=42)
svm.fit(S2_X_transformed, S2_y)

# Evaluate the model
y_pred = svm.predict(X_test_transformed)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on Test Set (Naive Bayes Feature Embedding + SVM): {accuracy:.2f}")


# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))
