In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.color import lab2rgb
import sys
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.preprocessing import FunctionTransformer
from skimage.color import rgb2lab
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier

# Load CSV file
df = pd.read_csv("colour-data.csv")

# Extract R, G, B columns as features
X = df[['R', 'G', 'B']].values  

# Normalize RGB values to range [0,1]
X = X / 255.0  

# Extract color labels as target variable
y = df['Label'].values  # Change 'Label' to the correct column name from your CSV


print("First 5 rows of X:\n", X[:5])
print("First 5 labels (y):\n", y[:5])
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)



# representative RGB colours for each label, for nice display
COLOUR_RGB = {
    'red': (255, 0, 0),
    'orange': (255, 114, 0),
    'yellow': (255, 255, 0),
    'green': (0, 230, 0),
    'blue': (0, 0, 255),
    'purple': (187, 0, 187),
    'brown': (117, 60, 0),
    'pink': (255, 187, 187),
    'black': (0, 0, 0),
    'grey': (150, 150, 150),
    'white': (255, 255, 255),
}
name_to_rgb = np.vectorize(COLOUR_RGB.get, otypes=[np.uint8, np.uint8, np.uint8])


def plot_predictions(model, lum=71, resolution=256):
    """
    Create a slice of LAB colour space with given luminance; predict with the model; plot the results.
    """
    wid = resolution
    hei = resolution
    n_ticks = 5

    # create a hei*wid grid of LAB colour values, with L=lum
    ag = np.linspace(-100, 100, wid)
    bg = np.linspace(-100, 100, hei)
    aa, bb = np.meshgrid(ag, bg)
    ll = lum * np.ones((hei, wid))
    lab_grid = np.stack([ll, aa, bb], axis=2)

    # convert to RGB for consistency with original input
    X_grid = lab2rgb(lab_grid)

    # predict and convert predictions to colours so we can see what's happening
    y_grid = model.predict(X_grid.reshape((wid*hei, 3)))
    pixels = np.stack(name_to_rgb(y_grid), axis=1) / 255
    pixels = pixels.reshape((hei, wid, 3))

    # plot input and predictions
    plt.figure(figsize=(10, 5))
    plt.suptitle('Predictions at L=%g' % (lum,))
    plt.subplot(1, 2, 1)
    plt.title('Inputs')
    plt.xticks(np.linspace(0, wid, n_ticks), np.linspace(-100, 100, n_ticks))
    plt.yticks(np.linspace(0, hei, n_ticks), np.linspace(-100, 100, n_ticks))
    plt.xlabel('A')
    plt.ylabel('B')
    plt.imshow(X_grid.reshape((hei, wid, 3)))

    plt.subplot(1, 2, 2)
    plt.title('Predicted Labels')
    plt.xticks(np.linspace(0, wid, n_ticks), np.linspace(-100, 100, n_ticks))
    plt.yticks(np.linspace(0, hei, n_ticks), np.linspace(-100, 100, n_ticks))
    plt.xlabel('A')
    plt.imshow(pixels)

First 5 rows of X:
 [[0.65882353 0.82745098 0.95294118]
 [0.14509804 0.1254902  0.15686275]
 [0.1372549  0.13333333 0.14901961]
 [0.30980392 0.29019608 0.62352941]
 [0.21568627 0.38823529 0.13333333]]
First 5 labels (y):
 ['blue' 'black' 'black' 'purple' 'green']
Shape of X: (3950, 3)
Shape of y: (3950,)


In [7]:
X_train, X_remainder, y_train, y_remainder = train_test_split(X, y, random_state=42, test_size=0.3)
X_valid, X_test, y_valid, y_test = train_test_split(X_remainder, y_remainder, random_state=42, test_size=0.5)

In [8]:
# Train Logistic Regression model on RGB
model_rgb = LogisticRegression(max_iter=1000)
model_rgb.fit(X_train, y_train)

# Print validation score
print("Validation Score (RGB Model):", model_rgb.score(X_valid, y_valid))

Validation Score (RGB Model): 0.6925675675675675


In [9]:
# Predict on validation set
y_pred = model_rgb.predict(X_valid)

# Print classification report
print(classification_report(y_valid, y_pred))

              precision    recall  f1-score   support

       black       0.70      0.87      0.77        61
        blue       0.86      0.74      0.79        99
       brown       0.64      0.24      0.35        29
       green       0.73      0.96      0.83       127
        grey       0.49      0.53      0.51        49
      orange       0.75      0.40      0.52        15
        pink       0.50      0.68      0.58        38
      purple       0.72      0.80      0.76        95
         red       0.50      0.30      0.38        50
       white       0.00      0.00      0.00        10
      yellow       1.00      0.32      0.48        19

    accuracy                           0.69       592
   macro avg       0.63      0.53      0.54       592
weighted avg       0.69      0.69      0.67       592



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
def makelab(X):
    X = X.reshape(1, -1, 3)  # Reshape for skimage
    lab = rgb2lab(X)         # Convert to LAB
    return lab.reshape(-1, 3)  # Reshape back


In [11]:
# Create a pipeline that first converts RGB → LAB, then applies Logistic Regression
model_lab = make_pipeline(
    FunctionTransformer(makelab),
    LogisticRegression(max_iter=1000)
)

# Train on training data
model_lab.fit(X_train, y_train)

# Print validation score for LAB model
print("Validation Score (LAB Model):", model_lab.score(X_valid, y_valid))

Validation Score (LAB Model): 0.722972972972973


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [12]:
y_pred_lab = model_lab.predict(X_valid)
print(classification_report(y_valid, y_pred_lab))

              precision    recall  f1-score   support

       black       0.81      0.84      0.82        61
        blue       0.81      0.75      0.78        99
       brown       0.69      0.31      0.43        29
       green       0.85      0.92      0.88       127
        grey       0.48      0.73      0.58        49
      orange       0.73      0.53      0.62        15
        pink       0.60      0.79      0.68        38
      purple       0.76      0.79      0.77        95
         red       0.42      0.32      0.36        50
       white       0.00      0.00      0.00        10
      yellow       0.86      0.63      0.73        19

    accuracy                           0.72       592
   macro avg       0.64      0.60      0.61       592
weighted avg       0.72      0.72      0.71       592



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [13]:
# Try different values of C (Regularization Parameter)
for c in [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]:
    model = LogisticRegression(C=c, max_iter=1000)
    scores = cross_val_score(model, X_train, y_train, cv=5)
    print(f"C={c}, Mean Validation Score: {scores.mean()}")

C=0.0001, Mean Validation Score: 0.22423146473779382
C=0.001, Mean Validation Score: 0.22423146473779382
C=0.01, Mean Validation Score: 0.2835443037974683
C=0.1, Mean Validation Score: 0.532368896925859
C=1, Mean Validation Score: 0.6433996383363473
C=10, Mean Validation Score: 0.6871609403254973
C=100, Mean Validation Score: 0.6933092224231465
C=1000, Mean Validation Score: 0.6943942133815552


In [None]:
# Choose the best model type (RGB or LAB) and best C value
final_model = LogisticRegression(C=best_C, max_iter=1000)

# Train on both training + validation set
final_model.fit(np.vstack([X_train, X_valid]), np.hstack([y_train, y_valid]))

# Print test score
print("Final Test Score:", final_model.score(X_test, y_test))

In [None]:
y_test_pred = final_model.predict(X_test)
print(classification_report(y_test, y_test_pred))


In [None]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_test_pred)

# Plot heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted Label")
plt.ylabel("Actual Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
# Train KNN with k=5
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)

# Print validation score
print("Validation Score (KNN Model):", knn_model.score(X_valid, y_valid))