In [1]:
from xml.etree.ElementPath import xpath_tokenizer
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from skimage.feature import hog
from skimage import exposure
from tqdm import tqdm
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearnex import patch_sklearn  # Intel Extension for Scikit-learn
from sklearn.svm import LinearSVC
import torch
import torch.nn as nn

ModuleNotFoundError: No module named 'cv2'

In [3]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler

In [4]:
transform_aug = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=2),
    transforms.ToTensor(),
])

In [5]:
cifar10_train = datasets.CIFAR10(root='data', train=True , download=True)

Files already downloaded and verified


In [6]:
cifar10_test = datasets.CIFAR10(root='data', train=False, download=True)

Files already downloaded and verified


In [8]:
def get_multi_channel_features(img, orientations = 9,pixels_per_cell = (4,4),cells_per_block= (2,2) ):
    np_image = np.array(img)
    features = np.array([])
    len_features = 0
    for  channel_slice  in cv.split(np_image):     
        features_channel  = hog(
        channel_slice,
        orientations=orientations,
        pixels_per_cell=pixels_per_cell,
        cells_per_block=cells_per_block,
        visualize=False,
        feature_vector=True
        )
        len_features = len(features_channel)
        features = np.hstack((features, features_channel))
    features = np.reshape(features, (3, len_features))    
    return np.sum(features,axis=0)

In [9]:
def get_grayscale_features(img, orientations = 9,pixels_per_cell = (8,8),cells_per_block= (2,2) ):
    np_image = np.array(img)
    grayscale_img = cv.cvtColor(np_image, cv.COLOR_RGB2GRAY)    
    features  = hog(
    grayscale_img,
    orientations=orientations,
    pixels_per_cell=pixels_per_cell,
    cells_per_block=cells_per_block,
    visualize=False,
    feature_vector=True  # Развернуть в 1D-вектор
    )
    return features

In [10]:
def get_data(dataset,get_features):
    X_train = []
    Y_train = []
    for i in tqdm(range(len(dataset))):
        original_img, label = dataset[i]
        X_train.append(get_features(original_img))
        Y_train.append(label)
    return np.array(X_train), np.array(Y_train)

In [11]:
X_train, Y_train = get_data(cifar10_train,get_multi_channel_features)

100%|██████████| 50000/50000 [01:56<00:00, 429.99it/s]


In [12]:
X_train.shape

(50000, 1764)

In [13]:
X_test, Y_test = get_data(cifar10_test,get_multi_channel_features)

100%|██████████| 10000/10000 [00:23<00:00, 432.40it/s]


In [55]:
patch_sklearn() 

Intel(R) Extension for Scikit-learn* enabled (https://github.com/uxlfoundation/scikit-learn-intelex)


In [62]:
X_train, Y_train = get_data(cifar10_train, get_grayscale_features)
X_test, Y_test = get_data(cifar10_test, get_grayscale_features)

100%|██████████| 50000/50000 [00:45<00:00, 1099.51it/s]
100%|██████████| 10000/10000 [00:08<00:00, 1131.43it/s]


In [164]:
svm = SVC(
    class_weight='balanced',
    verbose=1,
    kernel='rbf',
    gamma= "scale",
    C = 0.01,
    max_iter  = 1000,
    tol=1e-4,
)
svm.fit(X_train, Y_train)

[LibSVM]



In [14]:
pipeline = Pipeline([
    ('scaler', StandardScaler()), 
    ('pca', PCA()),               
    ('svm', LinearSVC(             
        class_weight='balanced',
        max_iter=500,
        tol=1e-2,
        dual=True
    ))
])

In [15]:
param_grid = {
    'pca__n_components': [100, 300, 400, 700], 
    'svm__C': [0.01,0.001, 0.005],              
    'svm__penalty': ['l2']                   
}

In [16]:
grid_search = GridSearchCV(
    pipeline,
    param_grid,
    cv=5,                     
    scoring='f1_weighted',       
    verbose=1                
)

grid_search.fit(X_train, Y_train)

# Лучшие параметры
print("Лучшие параметры:", grid_search.best_params_)
print("Лучшая точность:", grid_search.best_score_)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
Лучшие параметры: {'pca__n_components': 700, 'svm__C': 0.005, 'svm__penalty': 'l2'}
Лучшая точность: 0.5422660098778153


In [19]:
param_grid = {
    'pca__n_components': [600,800,900], 
    'svm__C': [0.001, 0.005],              
    'svm__penalty': ['l2']                   
}

In [20]:
grid_search_2 = GridSearchCV(
    pipeline,
    param_grid,
    cv=5,                     
    scoring='f1_weighted',       
    verbose=1                
)

grid_search_2.fit(X_train, Y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


In [21]:
print("Лучшие параметры:", grid_search_2.best_params_)
print("Лучшая точность:", grid_search_2.best_score_)

Лучшие параметры: {'pca__n_components': 900, 'svm__C': 0.005, 'svm__penalty': 'l2'}
Лучшая точность: 0.5441859568080554


Попробуем обучить модель с такими параметрами на всём train датасете. Так SVM, может быть, найдёт больше опорных векторов.

In [25]:
scaler = StandardScaler()
pca = PCA(n_components=900)
X_train_scaled = scaler.fit_transform(X_train)
X_train_pca = pca.fit_transform(X_train_scaled)
svm = LinearSVC(
    class_weight='balanced',
    verbose=1,
    C = 0.005,
    max_iter  = 1000,
    tol=1e-3,
    dual = True,
    fit_intercept = True
)
svm.fit(X_train_pca, Y_train)

[LibLinear]

In [27]:
X_test_pca= pca.transform(scaler.transform(X_test))

In [28]:
Y_pred = svm.predict(X_test_pca)
print("Accuracy:", accuracy_score(Y_test, Y_pred))
cm = confusion_matrix(Y_test, Y_pred)
print(cm)

Accuracy: 0.5517
[[625  30  78  15  34  17  22  27 117  35]
 [ 30 717  14   7  17  11  24  15  70  95]
 [ 86  22 359  55 141 111 103  52  54  17]
 [ 56  42  66 261  92 173 148  76  36  50]
 [ 29  21  51  47 474  44 155 103  49  27]
 [ 17  12  84 110  71 419 106 117  32  32]
 [ 18  28  27  37  59  48 718  29  24  12]
 [ 31   8  48  38  68  74  35 633  17  48]
 [ 94 105  30  12  19  12  14  32 624  58]
 [ 39  89  14  17  22  25  11  45  51 687]]


In [29]:
print(classification_report(Y_test, Y_pred, target_names=[str(i) for i in range(10)]))

              precision    recall  f1-score   support

           0       0.61      0.62      0.62      1000
           1       0.67      0.72      0.69      1000
           2       0.47      0.36      0.41      1000
           3       0.44      0.26      0.33      1000
           4       0.48      0.47      0.47      1000
           5       0.45      0.42      0.43      1000
           6       0.54      0.72      0.61      1000
           7       0.56      0.63      0.59      1000
           8       0.58      0.62      0.60      1000
           9       0.65      0.69      0.67      1000

    accuracy                           0.55     10000
   macro avg       0.54      0.55      0.54     10000
weighted avg       0.54      0.55      0.54     10000



В целом, accuracy в 55% сильно лучше случайного прогноза в 10%. Видим, что метрики проседают на 2-5 классе по сравнению с остальными. 

Попробуем простую FFNN

In [34]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Нормализация RGB-каналов
])

train_data = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_data = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data, batch_size=128, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [35]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Flatten(), 
            nn.Linear(32*32*3, 1024), 
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )
    
    def forward(self, x):
        return self.layers(x)

In [36]:
model = MLP()

In [37]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train_losses = []
val_accuracies = []

for epoch in range(15):
    model.train()
    epoch_loss = 0
    for batch_x, batch_y in train_loader:
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        epoch_loss += loss.item()
    
    # Валидация
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            outputs = model(batch_x)
            _, predicted = torch.max(outputs, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()
    
    accuracy = correct / total
    train_losses.append(epoch_loss / len(train_loader))
    val_accuracies.append(accuracy)
    print(f"Epoch {epoch+1}, Loss: {train_losses[-1]:.4f}, Accuracy: {accuracy:.4f}")

Epoch 1, Loss: 1.6669, Accuracy: 0.4666
Epoch 2, Loss: 1.4325, Accuracy: 0.4956
Epoch 3, Loss: 1.3089, Accuracy: 0.5166
Epoch 4, Loss: 1.2072, Accuracy: 0.5348
Epoch 5, Loss: 1.1131, Accuracy: 0.5426
Epoch 6, Loss: 1.0204, Accuracy: 0.5369
Epoch 7, Loss: 0.9347, Accuracy: 0.5421
Epoch 8, Loss: 0.8523, Accuracy: 0.5464
Epoch 9, Loss: 0.7636, Accuracy: 0.5522
Epoch 10, Loss: 0.6929, Accuracy: 0.5395
Epoch 11, Loss: 0.6189, Accuracy: 0.5458
Epoch 12, Loss: 0.5527, Accuracy: 0.5368
Epoch 13, Loss: 0.4956, Accuracy: 0.5431
Epoch 14, Loss: 0.4458, Accuracy: 0.5468
Epoch 15, Loss: 0.4115, Accuracy: 0.5397


In [96]:

test_loader = DataLoader(test_data, batch_size=128, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        y_true.extend(labels.cpu().numpy())  # 
        y_pred.extend(preds.cpu().numpy())   

print(classification_report(y_true, y_pred, digits=2))


              precision    recall  f1-score   support

           0       0.58      0.61      0.60      1000
           1       0.63      0.66      0.64      1000
           2       0.43      0.45      0.44      1000
           3       0.35      0.38      0.36      1000
           4       0.47      0.46      0.46      1000
           5       0.48      0.40      0.43      1000
           6       0.55      0.68      0.61      1000
           7       0.63      0.59      0.61      1000
           8       0.67      0.67      0.67      1000
           9       0.66      0.50      0.56      1000

    accuracy                           0.54     10000
   macro avg       0.54      0.54      0.54     10000
weighted avg       0.54      0.54      0.54     10000



В целом, точность нейронки колеблется где-то в районе 54-55 процентов, что аналогичну HOG+SVM