In [1]:
!pip install numpy pandas matplotlib seaborn opencv-python scikit-learn

Collecting numpy
  Downloading numpy-2.2.2-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting pandas
  Downloading pandas-2.2.3-cp310-cp310-win_amd64.whl.metadata (19 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.0-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp310-cp310-win_amd64.whl.metadata (15 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.1-cp310-cp310-win_amd64.whl.metadata (5.4 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecti

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from glob import glob

# Load images from a folder
def load_images_from_folder(folder, size=(64, 64)):
    images = []
    filenames = []
    for filename in glob(os.path.join(folder, "*.jpg")):
        img = cv2.imread(filename)
        if img is not None:
            img = cv2.resize(img, size)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            images.append(img)
            filenames.append(os.path.basename(filename))
    return images, filenames

# Convert image to RGBW features
def extract_rgbw_features(image):
    r, g, b = cv2.split(image)
    w = (r + g + b) / 3  # White component as average of RGB
    return np.array([r.mean(), g.mean(), b.mean(), w.mean()])

# Set the folder path where images are stored
image_folder = "E:/Coding/Advanced ML/train_data"
labels_file = "E:/Coding/Advanced ML/train.csv"

# Load labels from CSV
labels_df = pd.read_csv(labels_file)
labels_dict = dict(zip(labels_df['file_name'], labels_df['label']))


# Load images and extract features
images, filenames = load_images_from_folder(image_folder)
features = np.array([extract_rgbw_features(img) for img in images])

# Create DataFrame for RGBW values
df_rgbw = pd.DataFrame(features, columns=['Red', 'Green', 'Blue', 'White'])
df_rgbw['Image'] = filenames

# Map labels to images
df_rgbw['Label'] = df_rgbw['Image'].map(lambda x: labels_dict.get(f"train_data/{x}", np.nan))

# Display the RGBW table with Label next to White
print("RGBW Values of Images with Labels:")
print(df_rgbw[['Red', 'Green', 'Blue', 'White', 'Label', 'Image']])

# Standardize the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Apply PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(features_scaled)

# Create a DataFrame for the PCA result
df_pca = pd.DataFrame(pca_result, columns=['PC1', 'PC2'])
df_pca['Image'] = filenames

# Display the PCA table
print("PCA Results:")
print(df_pca)

loadings = pd.DataFrame(pca.components_, columns=['Red', 'Green', 'Blue', 'White'], index=['PC1', 'PC2'])

print(loadings)

RGBW Values of Images with Labels:
              Red       Green        Blue      White  Label  \
0      181.152588  176.425781  176.861328  47.750732      0   
1      130.607666   99.067139   55.423584  35.720296      1   
2      146.630615   92.545410   36.175049  39.367025      0   
3      244.734131  204.105957  124.103271  53.876953      0   
4      131.071045  141.335693  152.371826  48.197021      0   
...           ...         ...         ...        ...    ...   
79945  216.273438  195.844971  168.042969  54.782959      1   
79946   79.455566   58.445068   30.175293  24.337809      1   
79947  111.601562  112.196533  109.803223  40.637939      0   
79948  140.229980  148.943115  157.530518  49.213704      1   
79949  141.340820  141.365234  143.736816  44.064290      0   

                                      Image  
0      000295da5dca4af09d5593174e15bb09.jpg  
1      0002f7db7beb4bf5879a0cdb7f17209d.jpg  
2      0003538364d44952924d83980771e5b7.jpg  
3      00040d088f054d379