In [21]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split


X, y = fetch_openml('Fashion-MNIST', version=1, return_X_y=True)
# Розділення на навчальний та тестовий набори
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
class_names = [
    'T-shirt',
    'Trouser',
    'Pullover',
    'Dress',
    'Coat',
    'Sandal',
    'Shirt',
    'Sneaker',
    'Bag',
    'Ankle boot'
]

class_names[int(y_train[0])]

'Ankle boot'

In [23]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train = scaler.fit_transform(X_train)
x_test = scaler.transform(X_test)

In [24]:
import numpy as np


# Розділення навчального набору на два: з мітками та без міток
X_labeled, X_unlabeled, y_labeled, y_unlabeled = train_test_split(x_train, y_train, test_size=0.5, random_state=42)

X_labeled, X_unlabeled, y_labeled, y_unlabeled

(array([[-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        ...,
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068]]),
 array([[-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        [-0.00796844, -0.02016575, -0.02805079, ..., -0.15864275,
         -0.09123214, -0.03342068],
        ...,
        [-0.00796844, -0.02016575, -0.02805079, ..., -

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report


base_classifier = SVC(C=1.0, kernel='rbf', gamma='scale', probability=True)
base_classifier.fit(X_labeled, y_labeled)

# Оцінка точності класифікатора з мітками на тестовому наборі
y_pred_labeled = base_classifier.predict(x_test)

In [None]:
accu = accuracy_score(y_test, y_pred_labeled)
crep = classification_report(y_test, y_pred_labeled, target_names=class_names)

print(f"Accuracy{accu}\nClassification report:\n{crep}")

In [None]:
from sklearn.semi_supervised import SelfTrainingClassifier, LabelPropagation, LabelSpreading


self_training_classifier = SelfTrainingClassifier(base_classifier)
label_propagation_classifier = LabelPropagation(kernel='rbf', n_jobs=-1)
label_spreading_classifier = LabelSpreading(kernel='rbf', n_jobs=-1)

models = [self_training_classifier, label_propagation_classifier, label_spreading_classifier]
percentages = [70, 60, 50, 40, 30]

for model in models:
    for perc in percentages:
        num_unlabeled = int(len(y) * perc)
        unlabeled_indices = np.random.choice(len(y), num_unlabeled, replace=False)
        y_unlabeled[unlabeled_indices] = -1
        # y_unlabeled = y_unlabeled.replace(to_replace=y_unlabeled.values, value=-1)
        
        model.fit(X_unlabeled, y_unlabeled)

        pred = model.predict(x_test)
        accu = accuracy_score(y_test, pred)
        crep = classification_report(y_test, pred, target_names=class_names)
        
        print(f"Mode: {model}, percentage of unlabeled data: {perc}, accuracy: {accu}\
                \nPrediction: {accu}\nClassification report:\n{crep})