In [1]:
from skimage.feature import hog
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm

def get_hog_features(img, orient=9, pix_per_cell=8, cell_per_block=2,
                        vis=False, feature_vec=True):
    # Call with two outputs if vis==True
    if vis == True:
        features, hog_image = hog(img, orientations=orient,
                                  pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block),
                                  transform_sqrt=True,
                                  visualize=vis, feature_vector=feature_vec)
        return features, hog_image
    # Otherwise call with one output
    else:
        features = hog(img, orientations=orient,
                       pixels_per_cell=(pix_per_cell, pix_per_cell),
                       cells_per_block=(cell_per_block, cell_per_block),
                       transform_sqrt=True,
                       visualize=vis, feature_vector=feature_vec)
        return features

def get_features(root_dir = '/kaggle/input/car-vs-bike-classification-dataset/Car-Bike-Dataset'):
    category_map = {'Bike': 0, 'Car': 1}
    hog_features = []
    labels = []

    for category, label in category_map.items():
        category_path = os.path.join(root_dir,category)
        for img_name in tqdm(os.listdir(category_path)):
            img_path = os.path.join(category_path,img_name)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (224,224))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            hog_feature = get_hog_features(img)
            hog_features.append(hog_feature)
            labels.append(label)
    return np.array(hog_features), np.array(labels)

In [2]:
import os

# Create Kaggle folder
os.makedirs("/root/.kaggle", exist_ok=True)

# Move the kaggle.json file to the correct location
!mv kaggle.json /root/.kaggle/

# Set permissions for the file
!chmod 600 /root/.kaggle/kaggle.json


mv: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory


In [3]:
!kaggle datasets download -d utkarshsaxenadn/car-vs-bike-classification-dataset

Dataset URL: https://www.kaggle.com/datasets/utkarshsaxenadn/car-vs-bike-classification-dataset
License(s): CC0-1.0
Downloading car-vs-bike-classification-dataset.zip to /content
 92% 95.0M/103M [00:01<00:00, 71.1MB/s]
100% 103M/103M [00:01<00:00, 66.2MB/s] 


In [4]:
!unzip car-vs-bike-classification-dataset.zip -d /content/data

Archive:  car-vs-bike-classification-dataset.zip
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (1).jpeg  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (1).jpg  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (1).png  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (10).jpeg  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (10).jpg  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (10).png  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (100).jpeg  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (100).jpg  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (1000).jpeg  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (1001).jpeg  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (1002).jpeg  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (1003).jpeg  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (1004).jpeg  
  inflating: /content/data/Car-Bike-Dataset/Bike/Bike (1005).jpeg  
  inflating: /content

In [5]:
features, labels = get_features(root_dir = '/content/data/Car-Bike-Dataset')

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

In [6]:
len(features[0])

26244

In [7]:
from sklearn.model_selection import train_test_split
train_data, test_data, train_targets, test_targets = train_test_split(features, labels, test_size=0.2, random_state=42)
len(train_data),len(test_data)

(3200, 800)

In [None]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report

svm = SVC()
param_distributions = {
    'C': np.logspace(-1, 2, 10),           # Exponentially spaced values for regularization
    'kernel': ['linear', 'rbf', 'poly'],   # Kernel types
    'gamma': ['scale', 'auto'],  # Kernel coefficient
    'degree': [2, 3, 4],                  # Degree for the 'poly' kernel
}

In [None]:
random_search = RandomizedSearchCV(
    estimator=svm,
    param_distributions=param_distributions,
    n_iter=20,                # Number of random combinations to try
    cv=5,                     # 5-fold cross-validation
    scoring='accuracy',       # Scoring metric
    random_state=42,          # For reproducibility
    verbose=1,                # Show progress
    n_jobs=-1                 # Use all CPU cores
)
random_search.fit(train_data, train_targets)
print("Best parameters:", random_search.best_params_)
print("Best cross-validation accuracy:", random_search.best_score_)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best parameters: {'kernel': 'rbf', 'gamma': 'scale', 'degree': 2, 'C': 46.41588833612777}
Best cross-validation accuracy: 0.93625


In [None]:
results = random_search.cv_results_
results.keys()

dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_kernel', 'param_gamma', 'param_degree', 'param_C', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split3_test_score', 'split4_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score'])

In [None]:
for mean_score, params in zip(results["mean_test_score"], results["params"]):
  print(mean_score, params)

0.9140625 {'kernel': 'rbf', 'gamma': 'scale', 'degree': 2, 'C': 0.21544346900318834}
0.9118749999999999 {'kernel': 'linear', 'gamma': 'scale', 'degree': 3, 'C': 0.46415888336127786}
0.9118749999999999 {'kernel': 'linear', 'gamma': 'auto', 'degree': 3, 'C': 46.41588833612777}
0.9118749999999999 {'kernel': 'linear', 'gamma': 'scale', 'degree': 3, 'C': 2.1544346900318834}
0.93625 {'kernel': 'rbf', 'gamma': 'scale', 'degree': 2, 'C': 46.41588833612777}
0.9118749999999999 {'kernel': 'linear', 'gamma': 'auto', 'degree': 4, 'C': 0.1}
0.9118749999999999 {'kernel': 'linear', 'gamma': 'scale', 'degree': 3, 'C': 0.21544346900318834}
0.9340624999999999 {'kernel': 'poly', 'gamma': 'scale', 'degree': 4, 'C': 1.0}
0.5068750000000001 {'kernel': 'poly', 'gamma': 'auto', 'degree': 2, 'C': 10.0}
0.9246874999999999 {'kernel': 'rbf', 'gamma': 'auto', 'degree': 3, 'C': 10.0}
0.9118749999999999 {'kernel': 'linear', 'gamma': 'auto', 'degree': 2, 'C': 4.6415888336127775}
0.9118749999999999 {'kernel': 'linear',

In [8]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.svm import SVC
# Define the SVM model
svm = SVC()

# Define the parameter grid
param_grid = {
    'C': [46.41588833612777, 50, 60, 70, 100],
    'kernel': ['rbf', 'poly'],
    'gamma': ['scale'],
    'degree': [2, 3, 4],
}

In [9]:
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=3, scoring='accuracy', verbose=1)

grid_search.fit(train_data, train_targets)

Fitting 3 folds for each of 30 candidates, totalling 90 fits


In [10]:
results = grid_search.cv_results_
results.keys()

dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_C', 'param_degree', 'param_gamma', 'param_kernel', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score'])

In [11]:
for mean_score, params in zip(results["mean_test_score"], results["params"]):
  print(mean_score, params)

0.9371866085469304 {'C': 46.41588833612777, 'degree': 2, 'gamma': 'scale', 'kernel': 'rbf'}
0.9324970562083964 {'C': 46.41588833612777, 'degree': 2, 'gamma': 'scale', 'kernel': 'poly'}
0.9371866085469304 {'C': 46.41588833612777, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
0.9356237174944745 {'C': 46.41588833612777, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}
0.9371866085469304 {'C': 46.41588833612777, 'degree': 4, 'gamma': 'scale', 'kernel': 'rbf'}
0.9340605333816296 {'C': 46.41588833612777, 'degree': 4, 'gamma': 'scale', 'kernel': 'poly'}
0.9371866085469304 {'C': 50, 'degree': 2, 'gamma': 'scale', 'kernel': 'rbf'}
0.9324970562083964 {'C': 50, 'degree': 2, 'gamma': 'scale', 'kernel': 'poly'}
0.9371866085469304 {'C': 50, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
0.9356237174944745 {'C': 50, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}
0.9371866085469304 {'C': 50, 'degree': 4, 'gamma': 'scale', 'kernel': 'rbf'}
0.9340605333816296 {'C': 50, 'degree': 4, 'gamma': 'scale'

In [12]:
best_model = grid_search.best_estimator_

In [13]:
y_pred = best_model.predict(test_data)

array([0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,

In [15]:
from sklearn.metrics import classification_report
print(classification_report(test_targets, y_pred))

              precision    recall  f1-score   support

           0       0.95      0.93      0.94       422
           1       0.92      0.94      0.93       378

    accuracy                           0.94       800
   macro avg       0.93      0.94      0.93       800
weighted avg       0.94      0.94      0.94       800



In [16]:
import joblib
joblib.dump(best_model, 'best_model.pkl')

['best_model.pkl']