In [29]:
import json
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from skimage.feature import hog, local_binary_pattern
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import multilabel_confusion_matrix,classification_report,hamming_loss, f1_score


In [30]:
# resize with keeping ratio
def resize_with_padding(img, target_size):
    h, w = img.shape[:2]
    scale = target_size / max(h, w)
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(img, (new_w, new_h))
    result = np.zeros((target_size, target_size, 3), dtype=np.uint8)
    x_offset = (target_size - new_w) // 2
    y_offset = (target_size - new_h) // 2
    result[y_offset:y_offset+new_h, x_offset:x_offset+new_w] = resized
    return result

In [31]:
def lbp_histogram(gray, P=8, R=1, method='uniform'):
    lbp = local_binary_pattern(gray, P=P, R=R, method=method)
    hist, _ = np.histogram(lbp.ravel(), bins=59, range=(0, 59))
    hist = hist.astype('float32')
    hist = hist / (hist.sum() + 1e-8)
    return hist

In [32]:
def color_hist(img, bins=16):
    hist_b = cv2.calcHist([img], [0], None, [bins], [0, 256])
    hist_g = cv2.calcHist([img], [1], None, [bins], [0, 256])
    hist_r = cv2.calcHist([img], [2], None, [bins], [0, 256])
    hist = np.concatenate([hist_b, hist_g, hist_r]).ravel()
    hist = hist / (hist.sum() + 1e-8)
    return hist

In [33]:
def extract_hog(gray):
    return hog(gray, orientations=9, pixels_per_cell=(8,8), cells_per_block=(2,2), block_norm='L2-Hys')

In [34]:
with open(f'data/train/_annotations.coco.json') as f:
    train = json.load(f)
print(train.keys())

dict_keys(['info', 'licenses', 'categories', 'images', 'annotations'])


In [35]:
train_images = train['images']
train_labels = pd.DataFrame(train['annotations'])
categories = train['categories']
len(train_images) , len(train_labels)

(2571, 4044)

In [36]:
categories

[{'id': 0, 'name': 'Recycling-Waste', 'supercategory': 'none'},
 {'id': 1, 'name': 'cardboard', 'supercategory': 'Recycling-Waste'},
 {'id': 2, 'name': 'glass', 'supercategory': 'Recycling-Waste'},
 {'id': 3, 'name': 'metal', 'supercategory': 'Recycling-Waste'},
 {'id': 4, 'name': 'paper', 'supercategory': 'Recycling-Waste'},
 {'id': 5, 'name': 'plastic', 'supercategory': 'Recycling-Waste'}]

In [37]:
train_images[:1]

[{'id': 0,
  'license': 1,
  'file_name': 'metal152_jpg.rf.06414376028f03cc3a360e50505abb0f.jpg',
  'height': 384,
  'width': 512,
  'date_captured': '2025-11-30T15:32:11+00:00',
  'extra': {'name': 'metal152.jpg'}}]

In [38]:
train_labels[:1]

Unnamed: 0,id,image_id,category_id,bbox,area,segmentation,iscrowd
0,1,0,3,"[172, 152, 114, 93]",10602,[],0


In [39]:
train_labels.groupby('image_id').size().sort_values()

image_id
50       1
51       1
52       1
53       1
54       1
        ..
573     11
532     11
1681    11
1693    11
2125    13
Length: 2571, dtype: int64

In [40]:
train_labels.groupby('category_id').size().sort_values()

category_id
1     630
4     683
2     726
3     916
5    1089
dtype: int64

In [41]:
def make_x_y(part, target_size):
    with open(f'data/{part}/_annotations.coco.json') as f:
        data = json.load(f)

    images = data['images']
    labels = pd.DataFrame(data['annotations'])

    labels_per_image = labels.groupby("image_id")["category_id"].apply(lambda x: sorted(set(x)))

    mlb = MultiLabelBinarizer(classes=[1,2,3,4,5])
    binary_labels = mlb.fit_transform(labels_per_image)

    y_data = pd.DataFrame(binary_labels, index=labels_per_image.index, columns=mlb.classes_)
    
    X_hog = []
    X_lbp = []
    X_color_hist = []

    for image in images:
        img = cv2.imread(f'data/{part}/{image["file_name"]}')
        img = resize_with_padding(img, target_size)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        normal_gray = cv2.normalize(gray.astype("float32"), None, alpha=0, beta=1,
                            norm_type=cv2.NORM_MINMAX)
        X_hog.append(extract_hog(normal_gray))
        X_lbp.append(lbp_histogram(gray))
        X_color_hist.append(color_hist(img))

    X_hog = np.array(X_hog)
    X_lbp = np.array(X_lbp)
    X_color_hist = np.array(X_color_hist)

    return X_hog, X_lbp, X_color_hist, y_data

In [42]:
def svc_predict(X, y, X_test):
    base_svc = SVC()
    multi_clf = MultiOutputClassifier(base_svc)

    param_grid = {
        "estimator__kernel": ["rbf", "poly", "sigmoid", "linear"],
        "estimator__C": [0.1, 1, 10],
        "estimator__gamma": ["scale", "auto", 0.01],
        "estimator__degree": [2, 3],
    }

    grid = GridSearchCV(
        estimator=multi_clf,
        param_grid=param_grid,
        scoring="f1_micro",
        cv=5,
        n_jobs=-1
    )

    grid.fit(X, y)  
    y_pred = grid.predict(X_test)
    return y_pred


In [43]:
validation_target_size = []
sizes = [(128, True), (64, True), (64, False), (32, False), (16, False)]
for target_size, use_pca in sizes:
    X_hog_train, X_lbp_train, X_color_train, y_train = make_x_y('train', target_size)
    X_hog_valid, X_lbp_valid, X_color_valid, y_valid= make_x_y('valid', target_size)
    X_hog_test, X_lbp_test, X_color_test, y_test = make_x_y('test', target_size)
    X_hog = np.vstack([X_hog_train, X_hog_valid])
    X_lbp = np.vstack([X_lbp_train, X_lbp_valid])
    X_color = np.vstack([X_color_train, X_color_valid])
    y = pd.concat([y_train, y_valid], axis=0).reset_index(drop=True)
    if use_pca:
        pca = PCA(n_components=300, random_state=7)
        X_hog_pca = pca.fit_transform(X_hog)
        X_hog_test_pca = pca.transform(X_hog_test)
        X = np.hstack([X_hog_pca, X_lbp, X_color])
        X_test  = np.hstack([X_hog_test_pca, X_lbp_test, X_color_test])
    else:
        X = np.hstack([X_hog, X_lbp, X_color])
        X_test  = np.hstack([X_hog_test, X_lbp_test, X_color_test])
    y_pred = svc_predict(X, y, X_test)
    validation_target_size.append((f1_score(y_test.values, y_pred, average='micro'),hamming_loss(y_test.values, y_pred)))

    
index_labels = [f"{s}_{'PCA' if p else 'noPCA'}" for s, p in sizes]
validation_target_size = pd.DataFrame(validation_target_size, index=index_labels, columns=['f1','hamming_loss'])

validation_target_size

Unnamed: 0,f1,hamming_loss
128_PCA,0.669261,0.131783
64_PCA,0.631579,0.141085
64_noPCA,0.635659,0.145736
32_noPCA,0.575875,0.168992
16_noPCA,0.550218,0.15969


In [44]:
# X_hog_train.shape, y_train.shape, X_hog_valid.shape, y_valid.shape

In [45]:
# X_hog = np.vstack([X_hog_train, X_hog_valid])
# X_lbp = np.vstack([X_lbp_train, X_lbp_valid])
# X_color = np.vstack([X_color_train, X_color_valid])
# y = pd.concat([y_train, y_valid], axis=0).reset_index(drop=True)

In [46]:
# X_hog.shape, y.shape

In [47]:
# pca = PCA(n_components=300, random_state=7)   
# X_hog_pca = pca.fit_transform(X_hog)
# X_hog_test_pca = pca.transform(X_hog_test)


In [48]:
# X = np.hstack([X_hog_pca, X_lbp, X_color])
# X_test  = np.hstack([X_hog_test_pca, X_lbp_test, X_color_test])

In [49]:
# X.shape, X_test.shape , y.shape, y_test.shape

In [50]:
# base_svc = SVC()
# multi_clf = MultiOutputClassifier(base_svc)

# param_grid = {
#     "estimator__kernel": ["rbf", "poly", "sigmoid", "linear"],
#     "estimator__C": [0.1, 1, 10],
#     "estimator__gamma": ["scale", "auto", 0.01],
#     "estimator__degree": [2, 3],
# }

# grid = GridSearchCV(
#     estimator=multi_clf,
#     param_grid=param_grid,
#     scoring="f1_micro",
#     cv=5,
#     n_jobs=-1
# )

# grid.fit(X, y)  
# y_pred = grid.predict(X_test)


In [51]:
# hamming_loss(y_test, y_pred)

In [52]:
# grid.best_params_

In [53]:
# mcm = multilabel_confusion_matrix(y_test, y_pred)

# print("Multi-label Confusion Matrices:")
# print(mcm)


# print("Classification Report:")
# print(classification_report(y_test, y_pred))