In [19]:
import os
import numpy as np
from skimage.io import imread
from skimage.transform import resize

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

import matplotlib.pyplot as plt

Prepare data

In [3]:
#unzip the zip file
import zipfile

zip_file_path = '/content/clf-data.zip'
extract_dir = '/content/Dataset'

# Create the extraction directory if it doesn't exist
if not os.path.exists(extract_dir):
    os.makedirs(extract_dir)

# Open the zip file in read mode
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"Zip file '{zip_file_path}' extracted to '{extract_dir}'")

Zip file '/content/clf-data.zip' extracted to '/content/Dataset'


In [15]:
input_dir = '/content/Dataset/clf-data'
categories = ['empty', 'not_empty']

In [16]:
#load data

data = []
labels = []

for category_idx, category in enumerate(categories):
  for file in os.listdir(os.path.join(input_dir, category)):
    img_path = os.path.join(input_dir, category, file)
    img = imread(img_path)
    img = resize(img, (15,15))
    data.append(img.flatten())
    labels.append(category_idx)

data = np.asarray(data)
labels = np.asarray(labels)

Split the data

In [25]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size= 0.2, shuffle=True, stratify=labels )

Training

In [26]:
#model
clf = SVC()
parameters = [{'gamma':[0.1, 0.01, 0.001], 'C':[1, 10, 100, 1000]}]
grid_search = GridSearchCV(clf, parameters)

In [27]:
#train model
grid_search.fit(X_train, y_train)

Prediction

In [28]:
best_estimator = grid_search.best_estimator_
y_pred = best_estimator.predict(X_test)

Evaluate

In [30]:
accuracy_score(y_test, y_pred)

1.0

In [32]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       609
           1       1.00      1.00      1.00       609

    accuracy                           1.00      1218
   macro avg       1.00      1.00      1.00      1218
weighted avg       1.00      1.00      1.00      1218



Download model

In [35]:
import pickle

pickle.dump(best_estimator, open('model.pkl', 'wb'))