In [3]:
import pandas as pd
import numpy as np

In [6]:
# You are passing a string (filename) to pd.DataFrame, which is incorrect.
# Use pd.read_csv to load CSV files as DataFrames.

logits1 = pd.read_csv("logits.csv")
coordinates1 = pd.read_csv("coordinates.csv")

logits2 = pd.read_csv("logitsN.csv")
coordinates2 = pd.read_csv("coordinatesN.csv")

logits3 = pd.read_csv("logitsRama.csv")
coordinates3 = pd.read_csv("coordinatesRama.csv")

# Concatenate all logits and coordinates into single DataFrames
logits = pd.concat([logits1, logits2, logits3], ignore_index=True)
coordinates = pd.concat([coordinates1, coordinates2, coordinates3], ignore_index=True)

In [10]:
# Drop the 'unique_id' column
logits_noid = logits.drop(columns=['unique_id'])
coordinates_noid = coordinates.drop(columns=['unique_id'])

# Convert to numpy arrays
logits_np = logits_noid.drop(columns=['label']).to_numpy()
coordinates_np = coordinates_noid.drop(columns=['label']).to_numpy()
labels = logits_noid['label'].to_numpy()  # Assuming labels are the same for both

# Train/val/test split using sklearn
from sklearn.model_selection import train_test_split

# First split into train+val and test
X_temp, X_test, y_temp, y_test = train_test_split(logits_np, labels, test_size=0.2, random_state=42, stratify=labels)
# Then split train+val into train and val
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42, stratify=y_temp)

print("Train shape:", X_train.shape)
print("Val shape:", X_val.shape)
print("Test shape:", X_test.shape)

Train shape: (929, 33)
Val shape: (310, 33)
Test shape: (310, 33)


In [11]:
# Repeat the same split for coordinates if needed
Xc_temp, Xc_test, yc_temp, yc_test = train_test_split(coordinates_np, labels, test_size=0.2, random_state=42, stratify=labels)
Xc_train, Xc_val, yc_train, yc_val = train_test_split(Xc_temp, yc_temp, test_size=0.25, random_state=42, stratify=yc_temp)

print("Coordinates Train shape:", Xc_train.shape)
print("Coordinates Val shape:", Xc_val.shape)
print("Coordinates Test shape:", Xc_test.shape)

Coordinates Train shape: (929, 99)
Coordinates Val shape: (310, 99)
Coordinates Test shape: (310, 99)


In [21]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Normalize (standardize) the features
scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)
X_val_norm = scaler.transform(X_val)
X_test_norm = scaler.transform(X_test)

# Train SVM with RBF kernel and C=100
svm = SVC(kernel='rbf', C=500, gamma="scale")
svm.fit(X_train_norm, y_train)

# Evaluate on validation and test sets
train_score = svm.score(X_train_norm, y_train)
val_score = svm.score(X_val_norm, y_val)
test_score = svm.score(X_test_norm, y_test)


print(train_score)

print(f"Validation accuracy: {val_score:.4f}")
print(f"Test accuracy: {test_score:.4f}")

0.9483315392895587
Validation accuracy: 0.8806
Test accuracy: 0.9516


In [None]:
from sklearn.model_selection import GridSearchCV
import numpy as np


# Define parameter grid for C and gamma (gamma from 0.01 to 0.1 in 0.001 increments)
param_grid = {
    'C': list(range(50, 3001, 50)),
    'gamma': np.arange(0.01, 0.101, 0.001)
}

svm = SVC(kernel='rbf')
grid = GridSearchCV(svm, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid.fit(X_train_norm, y_train)

print("Best parameters:", grid.best_params_)
print("Best CV accuracy:", grid.best_score_)


# Evaluate on validation and test sets using the best estimator
val_score = grid.best_estimator_.score(X_val_norm, y_val)
test_score = grid.best_estimator_.score(X_test_norm, y_test)
print(f"Validation accuracy (best params): {val_score:.4f}")
print(f"Test accuracy (best params): {test_score:.4f}")

Best parameters: {'C': 1550, 'gamma': 0.04399999999999998}
Best CV accuracy: 0.9418763266868497
Validation accuracy (best params): 0.9226
Test accuracy (best params): 0.9516


In [None]:
import pickle
# Save the best SVM model from grid search using pickle
with open("svm_best_model.pkl", "wb") as f:
    pickle.dump(grid.best_estimator_, f)
print("Best SVM model saved to svm_best_model.pkl")

Best SVM model saved to svm_best_model.pkl
