<a href="https://colab.research.google.com/github/maksblink/image/blob/main/photo_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importowanie modułu drive z biblioteki google.colab i zamontowanie Google Drive

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Importowanie potrzebnych bibliotek

In [None]:
from math import e
import re
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.models import resnet34
from sklearn.metrics import f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import cv2
import os
from torch.optim.lr_scheduler import StepLR

Sekcja odpowiezialna za zwiększenie ilości danych treningowych [wyłączona ponieważ prowadziła do overfit]


In [None]:
root_directory = r"/content/drive/MyDrive/colab/ML_data/train"

#Definiowanie wartości alpha i beta do przekształceń kontrastu i jasności obrazu
alpha_plus = 1.5
beta_plus = 10
alpha_minus = 0.5
beta_minus = -35

for dir_path, dir_names, file_names in os.walk(root_directory):
    for file_name in file_names:
        # Usuwanie przerobionych plików
        # if file_name.endswith('_contrast_brightness.JPEG'):
        #     file_path = os.path.join(dir_path, file_name)
        #     os.remove(file_path)
        #     print(f'Deleted: {file_path}')

        # Transformowanie plików i zapis ich obok orginałów
        if file_name.endswith(('.JPEG', '.JPG', '.PNG', '.jpeg', '.jpg', '.png')):
            input_path = os.path.join(dir_path, file_name)

            image = cv2.imread(input_path)

            if image is not None:
                contrast_brightness_image = cv2.convertScaleAbs(image, alpha=alpha_plus, beta=alpha_plus)

                base_name, ext = os.path.splitext(file_name)
                new_file_name = f'{base_name}_plus_contrast_brightness{ext}'

                output_path = os.path.join(dir_path, new_file_name)

                cv2.imwrite(output_path, contrast_brightness_image)

                contrast_brightness_image = cv2.convertScaleAbs(image, alpha=alpha_minus, beta=beta_minus)

                base_name, ext = os.path.splitext(file_name)
                new_file_name = f'{base_name}_minus_contrast_brightness{ext}'

                output_path = os.path.join(dir_path, new_file_name)

                cv2.imwrite(output_path, contrast_brightness_image)
            else:
                print(f'Image loading error: {input_path}')

Tworzenie transformacji obrazu

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Zmiana rozmiaru obrazu na 224x224 pikseli
    transforms.ToTensor(),  # Konwersja obrazu na tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalizacja wartości pikseli
])

Sekcja odpowiezialna za wyświetlanie każdego zdjęcia przed jego wczytaniem [wyłączona]


In [None]:
train_dataset = datasets.ImageFolder(r"C:\Users\blink\Desktop\ML_data\train", transform=None)

for img, label in train_dataset:
    img = transform(img)

    plt.imshow(img.squeeze().permute(1, 2, 0).numpy())
    plt.title(f"Label: {label}")
    plt.show()

train_dataset.transform = transform

Tworzenie zbioru treningowego i walidacyjnego oraz załadowanie ich do DataLoader

In [None]:
train_dataset = datasets.ImageFolder(r"/content/drive/MyDrive/colab/ML_big_data/train", transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_dataset = datasets.ImageFolder(r"/content/drive/MyDrive/colab/ML_big_data/val", transform=transform)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)

Definicja modelu ResNet-34 z warstwą Dropout

In [None]:
class MyResNet(nn.Module):
    def __init__(self, num_classes, dropout_chance=0.5):
        super(MyResNet, self).__init__()
        self.resnet = resnet34(pretrained=True)  # Ładowanie pre-trenowanego modelu ResNet-34
        num_features = self.resnet.fc.in_features  # Pobieranie liczby cech wejściowych do ostatniej warstwy
        self.resnet.fc = nn.Sequential(
            nn.Dropout(p=dropout_chance),  # Dodawanie warstwy Dropout
            nn.Linear(num_features, num_classes)  # Zamiana ostatniej warstwy na liniową warstwę wyjściową z num_classes klasami
        )

    def forward(self, x):
        return self.resnet(x)

Tworzenie modelu z warstwą Dropout


In [None]:
model = MyResNet(num_classes=10, dropout_chance=0.5)

Definiowanie funkcji straty (CrossEntropyLoss) i optymalizatora (SGD)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

Tworzenie scheduler [wyłączone]

In [None]:
scheduler = StepLR(optimizer, step_size=4, gamma=0.9)

Wczytanie wag modelu z pliku


In [None]:
# model = torch.load(r"/content/drive/MyDrive/colab/the_best_model.pt", map_location=torch.device('cpu'))
model.load_state_dict(torch.load(r"/content/drive/MyDrive/colab/the_best_model.pt"))

Określanie liczby epok i urządzenia wykonawczego

In [None]:
number_of_epochs = 22
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device) # Przekazanie modelu na urządzenie

Tworzenie tablic do zapisu statystyk

In [None]:
train_precision_history = []
val_precision_history = []
train_recall_history = []
val_recall_history = []
train_loss_history = []
val_loss_history = []
train_f1_history = []
val_f1_history = []

Pętla trenująca model


In [None]:
for epoch in range(number_of_epochs):
    model.train()  # Ustawienie modelu w tryb treningu

    running_train_loss = 0.0
    train_labels = []
    train_preds = []

    now = 1
    how_much = str(len(train_loader))

    for images, labels in train_loader:
        print("Epoch: " + str(epoch + 1) + " [" + str(now) + "/" + how_much + "]")
        now += 1

        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()  # Wyzerowanie gradientów
        outputs = model(images)  # Przekazanie obrazów przez model
        loss = criterion(outputs, labels)  # Obliczenie straty
        loss.backward()  # Propagacja wsteczna

        optimizer.step()  # Aktualizacja wag

        # scheduler.step() # Wyłączony

        running_train_loss += loss.item()  # Dodanie strat
        train_labels.extend(labels.cpu().numpy())
        train_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())

    # Obliczenie i zapisanie statystyk treningowych; loss, precision, recall, f1
    train_precision = precision_score(train_labels, train_preds, average='macro', zero_division=1)
    train_recall = recall_score(train_labels, train_preds, average='macro', zero_division=1)
    train_loss = running_train_loss / len(train_loader)
    train_f1 = f1_score(train_labels, train_preds, average='macro')

    model.eval()  # Ustawienie modelu w trybie ewaluacji (bez obliczania gradientów)
    running_val_loss = 0.0
    val_labels = []
    val_preds = []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)  # Przekazanie obrazów przez model
            loss = criterion(outputs, labels)  # Obliczenie straty
            running_val_loss += loss.item()  # Dodanie strat
            val_labels.extend(labels.cpu().numpy())
            val_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())

    # Obliczenie i zapisanie statystyk walidacyjnych; loss, precision, recall, f1
    val_precision = precision_score(val_labels, val_preds, average='macro', zero_division=1)
    val_recall = recall_score(val_labels, val_preds, average='macro', zero_division=1)
    val_loss = running_val_loss / len(val_loader)
    val_f1 = f1_score(val_labels, val_preds, average='macro')

    train_precision_history.append(train_precision)
    val_precision_history.append(val_precision)
    train_recall_history.append(train_recall)
    val_recall_history.append(val_recall)
    train_loss_history.append(train_loss)
    val_loss_history.append(val_loss)
    train_f1_history.append(train_f1)
    val_f1_history.append(val_f1)

    print(f"Epoch [{epoch + 1}/{number_of_epochs}] Train Loss: {train_loss:.6f} F1: {train_f1:.6f} | Val Loss: {val_loss:.6f} F1: {val_f1:.6f}")

Zapisanie modelu

In [None]:
torch.save(model.state_dict(), 'the_best_model.pt')

Tworzenie i wyświetlanie wykresów prezentujących wyniki treningu

In [None]:
plt.figure(figsize=(12, 6))

plot_range = []

for i in range(number_of_epochs):
    plot_range.append(i + 1)

# Loss
plt.subplot(2, 2, 4)
plt.plot(plot_range, train_loss_history, label='Train Loss')
plt.plot(plot_range, val_loss_history, label='Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Precision
plt.subplot(2, 2, 1)
plt.plot(plot_range, train_precision_history, label='Train Precision')
plt.plot(plot_range, val_precision_history, label='Val Precision')
plt.xlabel('Epochs')
plt.ylabel('Precision')
plt.legend()

# Recall
plt.subplot(2, 2, 2)
plt.plot(plot_range, train_recall_history, label='Train Recall')
plt.plot(plot_range, val_recall_history, label='Val Recall')
plt.xlabel('Epochs')
plt.ylabel('Recall')
plt.legend()

# f1
plt.subplot(2, 2, 3)
plt.plot(plot_range, train_f1_history, label='Train F1')
plt.plot(plot_range, val_f1_history, label='Val F1')
plt.xlabel('Epochs')
plt.ylabel('F1')
plt.legend()

plt.tight_layout()

# Wyświetlenie wykresów prezentujących wyniki treningu
plt.show()

Funkcja do przewidywania klasy obrazu za pomocą modelu


In [None]:
def predict_class(model_to_prediction, image_path):
    image = Image.open(image_path)
    image = transform(image).unsqueeze(0)
    image = image.to(device)

    model_to_prediction.eval()
    with torch.no_grad():
        outputs_to_prediction = model_to_prediction(image)
        predicted_class = torch.argmax(outputs_to_prediction, dim=1).item()

        # Wyświetlenia prawdopodobieństw dla wszystkich indeksów klas
        print("===========================================================================\n")

        probabilities = F.softmax(outputs_to_prediction, dim=1)
        for i in range(10):
            class_probability = probabilities[0, i].item()
            print("Probability for class index ", i, " : ", class_probability)

        print("\n===========================================================================\n")

    return predicted_class

Funkcja do wyświetlania przewidywanych klas obrazów


In [None]:
def view_prediction(list_of_paths):
    class_map = {
        0: 'aquatic_plants',
        1: 'fish',
        2: 'flamingo',
        3: 'nematodes',
        4: 'shark',
        5: 'small_dragon',
        6: 'snake',
        7: 'spider',
        8: 'turtle',
        9: 'yellow_bird'
    }

    for path in list_of_paths:
        predicted_class_index = predict_class(model, path)

        match = re.search(r'test_(\w+)', path)

        if match:
            true_class_index = match.group(1)
            print(f'True class: {class_map[int(true_class_index)]}, True class index: {true_class_index}')
            print(f'Predicted class: {class_map[predicted_class_index]}, Predicted class index: {predicted_class_index}\n')

Wyświetlenie przewidywanych klas obrazów

In [None]:
print("===========================================================================")
print("===========================================================================")
print("===========================================================================\n")

view_prediction([r"/content/drive/MyDrive/colab/tests/test_0.JPEG", r"/content/drive/MyDrive/colab/tests/test_1.JPEG",
                 r"/content/drive/MyDrive/colab/tests/test_2.JPEG", r"/content/drive/MyDrive/colab/tests/test_3.JPEG",
                 r"/content/drive/MyDrive/colab/tests/test_4.JPEG", r"/content/drive/MyDrive/colab/tests/test_5.JPEG",
                 r"/content/drive/MyDrive/colab/tests/test_6.JPEG", r"/content/drive/MyDrive/colab/tests/test_7.JPEG",
                 r"/content/drive/MyDrive/colab/tests/test_8.JPEG", r"/content/drive/MyDrive/colab/tests/test_9.JPEG"])

print("===========================================================================")
print("===========================================================================")
print("===========================================================================\n")