In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV

In [7]:
def preprocess_data():
	train_df = pd.read_csv('processed_data/train_rf.csv')
	val_df = pd.read_csv(f'processed_data/val_rf.csv')
	test_df = pd.read_csv(f'processed_data/test_rf.csv')
    
    # Combine training and validation for tuning
	combined_df = pd.concat([train_df, val_df], ignore_index=True)
	X = combined_df.drop(['image_path', 'ClassId'], axis=1)
	y = combined_df['ClassId']
    
    # Prepare test features
	X_test = test_df.drop(['image_path', 'ClassId'], axis=1)
    
	scaler = StandardScaler()
	X_scaled = scaler.fit_transform(X)
	X_test_scaled = scaler.transform(X_test)

	return X_scaled, y, X_test_scaled

In [8]:
def tune_best_model(X_scaled, y):
	svm_param_grid = {
		'C': [0.1, 1, 10, 100],
		'gamma': ['scale', 'auto', 0.01, 0.1],
		'kernel': ['rbf', 'poly', 'sigmoid']
	}

	svm_random = RandomizedSearchCV(
		SVC(probability=True, random_state=42),
		param_distributions=svm_param_grid,
		n_iter=10,
		cv=5,
		verbose=1,
		random_state=42,
		n_jobs=-1
	)
    
	svm_random.fit(X_scaled, y)
	print(f"Best SVM parameters: {svm_random.best_params_}")
	tuned_best_model = svm_random.best_estimator_
	
	return tuned_best_model

In [9]:
def predict_test_data():
    X_scaled, y, X_test_scaled = preprocess_data()
    tuned_best_model = tune_best_model(X_scaled, y)
    test_pred = tuned_best_model.predict(X_test_scaled)

    test_metadata_df = pd.read_csv('test/test_metadata.csv')

    # Create submission DataFrame
    submission = pd.DataFrame({
        'id': test_metadata_df['id'],
        'ClassId': test_pred
    })

    submission.to_csv('submission.csv', index=False)

In [10]:
predict_test_data()


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best SVM parameters: {'kernel': 'rbf', 'gamma': 'scale', 'C': 10}
