In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split


X, y = fetch_openml('Fashion-MNIST', version=1, return_X_y=True)
# Розділення на навчальний та тестовий набори
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [2]:
class_names = [
    'T-shirt',
    'Trouser',
    'Pullover',
    'Dress',
    'Coat',
    'Sandal',
    'Shirt',
    'Sneaker',
    'Bag',
    'Ankle boot'
]

class_names[int(y_train[0])]

'Ankle boot'

In [3]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train = scaler.fit_transform(X_train)
x_test = scaler.transform(X_test)

In [4]:
import numpy as np


# Розділення навчального набору на два: з мітками та без міток
X_labeled, X_unlabeled, y_labeled, y_unlabeled = train_test_split(x_train, y_train, test_size=0.5, random_state=42)

X_labeled, X_unlabeled, y_labeled, y_unlabeled

(array([[-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        ...,
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068]]),
 array([[-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        ...,
        [-0.00796844, -0.02016575, -0.02805079, ..., -

In [5]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report


base_classifier = SVC(C=1.0, kernel='rbf', gamma='scale', probability=True)
base_classifier.fit(X_labeled, y_labeled)

# Оцінка точності базового класифікатора
y_pred_labeled = base_classifier.predict(x_test)

In [6]:
accu = accuracy_score(y_test, y_pred_labeled)
crep = classification_report(y_test, y_pred_labeled, target_names=class_names)

print(f"Accuracy: {accu}\nClassification report:\n{crep}")

Accuracy: 0.8812857142857143
Classification report:
              precision    recall  f1-score   support

     T-shirt       0.83      0.84      0.83      1394
     Trouser       1.00      0.97      0.98      1402
    Pullover       0.82      0.82      0.82      1407
       Dress       0.87      0.90      0.89      1449
        Coat       0.79      0.83      0.81      1357
      Sandal       0.96      0.95      0.96      1449
       Shirt       0.72      0.64      0.68      1407
     Sneaker       0.92      0.95      0.94      1359
         Bag       0.93      0.98      0.96      1342
  Ankle boot       0.96      0.94      0.95      1434

    accuracy                           0.88     14000
   macro avg       0.88      0.88      0.88     14000
weighted avg       0.88      0.88      0.88     14000



In [7]:
from sklearn.semi_supervised import SelfTrainingClassifier


base_classifier = SVC(C=1.0, kernel='rbf', gamma='scale', probability=True)
# Класифікатор самонавчання
self_training_classifier = SelfTrainingClassifier(base_classifier)
self_training_classifier.fit(X_labeled, y_labeled)

y_pred_self_training = self_training_classifier.predict(x_test)



In [8]:
accu = accuracy_score(y_test, y_pred_self_training)
crep = classification_report(y_test, y_pred_self_training, target_names=class_names)

print(f"Accuracy: {accu}\nClassification report:\n{crep}")

Accuracy: 0.8812857142857143
Classification report:
              precision    recall  f1-score   support

     T-shirt       0.83      0.84      0.83      1394
     Trouser       1.00      0.97      0.98      1402
    Pullover       0.82      0.82      0.82      1407
       Dress       0.87      0.90      0.89      1449
        Coat       0.79      0.83      0.81      1357
      Sandal       0.96      0.95      0.96      1449
       Shirt       0.72      0.64      0.68      1407
     Sneaker       0.92      0.95      0.94      1359
         Bag       0.93      0.98      0.96      1342
  Ankle boot       0.96      0.94      0.95      1434

    accuracy                           0.88     14000
   macro avg       0.88      0.88      0.88     14000
weighted avg       0.88      0.88      0.88     14000



In [9]:
from sklearn.semi_supervised import LabelPropagation


# Класифікатор поширення міток
label_propagation_classifier = LabelPropagation(kernel='rbf', gamma=0.1, n_jobs=-1)
label_propagation_classifier.fit(X_labeled, y_labeled)

y_pred_label_propagation = label_propagation_classifier.predict(x_test)

  probabilities /= normalizer


In [10]:
accu = accuracy_score(y_test, y_pred_label_propagation)
crep = classification_report(y_test, y_pred_label_propagation, target_names=class_names)

print(f"Accuracy: {accu}\nClassification report:\n{crep}")

Accuracy: 0.845
Classification report:
              precision    recall  f1-score   support

     T-shirt       0.76      0.83      0.79      1394
     Trouser       0.99      0.97      0.98      1402
    Pullover       0.76      0.75      0.75      1407
       Dress       0.90      0.85      0.87      1449
        Coat       0.74      0.75      0.75      1357
      Sandal       0.99      0.85      0.91      1449
       Shirt       0.58      0.62      0.60      1407
     Sneaker       0.87      0.96      0.91      1359
         Bag       0.98      0.93      0.95      1342
  Ankle boot       0.91      0.96      0.93      1434

    accuracy                           0.84     14000
   macro avg       0.85      0.85      0.85     14000
weighted avg       0.85      0.84      0.85     14000



In [5]:
from sklearn.semi_supervised import LabelSpreading


# Класифікатор розповсюдження міток
label_spreading_classifier = LabelSpreading(kernel='rbf', gamma=0.1, n_jobs=-1)
label_spreading_classifier.fit(x_train, y_train)

y_pred_label_spreading = label_spreading_classifier.predict(x_test)

MemoryError: Unable to allocate 23.4 GiB for an array with shape (56000, 56000) and data type float64

In [None]:
accu = accuracy_score(y_test, y_pred_label_spreading)
crep = classification_report(y_test, y_pred_label_spreading, target_names=class_names)

print(f"Accuracy: {accu}\nClassification report:\n{crep}")

In [None]:
base_classifier = SVC(C=1.0, kernel='rbf', gamma='scale', probability=True)
self_training_classifier = SelfTrainingClassifier(base_classifier)
label_propagation_classifier = LabelPropagation(kernel='rbf', gamma=0.1, n_jobs=-1)
label_spreading_classifier = LabelSpreading(kernel='rbf', gamma=0.1, n_jobs=-1)

models = [self_training_classifier, label_propagation_classifier, label_spreading_classifier]
percentages = [70, 60, 50, 40, 30]

for model in models:
    for perc in percentages:
        # make unlabeled_indices with random percentage of data.
        rui = np.random.choice(unlabeled_indices, size=num_unlabeled, replace=False)
        y_unlabeled[rui] = -1
        
        model.fit(X_unlabeled, y_unlabeled)

        pred = model.predict(x_test)
        accu = accuracy_score(y_test, pred)
        crep = classification_report(y_test, pred, target_names=class_names)
        
        print(f"Model: {model}, percentage of unlabeled data: {perc}, accuracy: {accu}\n"
              f"Classification report:\n{crep}")

In [None]:
# y_unlabeled = y_unlabeled.replace(to_replace=y_unlabeled.values, value=-1)