In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

# Load your dataset containing RGB values and labels
data = pd.read_csv("merged_dataset.csv")

# Filter out rows labeled with colors you want to exclude
colors_to_keep = ['white', 'blue', 'orange', 'yellow', 'red', 'green']
filtered_data = data[data['label'].str.lower().isin(colors_to_keep)]

# Extract RGB values and labels from the filtered dataset
rgb_values = filtered_data[['R', 'G', 'B']].values
labels = filtered_data['label'].str.lower().values

# Scale the RGB values
scaler = StandardScaler()
scaled_data = scaler.fit_transform(rgb_values)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(scaled_data, labels, test_size=0.2, random_state=42)

# Define parameter grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto'],
    'kernel': ['linear', 'rbf', 'poly']
}

# Create a GridSearchCV object
grid_search = GridSearchCV(SVC(), param_grid, refit=True, verbose=3)
grid_search.fit(X_train, y_train)

# Get the best parameters
print("Best Parameters:", grid_search.best_params_)

# Train the SVM model with the best parameters
svm_model = grid_search.best_estimator_
svm_model.fit(X_train, y_train)

# Save the trained model and scaler
joblib.dump(svm_model, 'svm_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

# Evaluate the model
y_pred = svm_model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))


Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV 1/5] END .C=0.1, gamma=scale, kernel=linear;, score=1.000 total time=   0.8s
[CV 2/5] END .C=0.1, gamma=scale, kernel=linear;, score=1.000 total time=   0.7s
[CV 3/5] END .C=0.1, gamma=scale, kernel=linear;, score=1.000 total time=   0.8s
[CV 4/5] END .C=0.1, gamma=scale, kernel=linear;, score=1.000 total time=   0.7s
[CV 5/5] END .C=0.1, gamma=scale, kernel=linear;, score=1.000 total time=   0.7s
[CV 1/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=1.000 total time=   1.9s
[CV 2/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=1.000 total time=   1.9s
[CV 3/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=1.000 total time=   1.9s
[CV 4/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=1.000 total time=   1.9s
[CV 5/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=1.000 total time=   1.8s
[CV 1/5] END ...C=0.1, gamma=scale, kernel=poly;, score=1.000 total time=   1.3s
[CV 2/5] END ...C=0.1, gamma=scale, kernel=poly

In [4]:
import joblib
# After training the SVM model
joblib.dump(svm_model, 'svm_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']