## Wybór optymalnego modelu - ocena sieci neuronowych
#### ostatecznie nie zdecydowaliśmy się na użycie żadnego z tych modeli jako finalnego ze względu na gorszą predykcyjność


In [38]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import f1_score, confusion_matrix
from tensorflow.keras.utils import to_categorical
import cv2
from collections import defaultdict
import matplotlib.pyplot as plt
import random
from PIL import Image
from tensorflow.keras import regularizers




In [39]:
# Wczytanie danych
train_data = pd.read_csv("train_data_with_features.csv")
test_data = pd.read_csv("test_data_with_features.csv")

In [40]:
# Przygotowanie cech i etykiet
y_train_val = train_data['class']
X_train_val = train_data.drop(['class', "path", "filename", "R", "G", "B", "brightness"], axis=1)
y_test = test_data['class']
X_test = test_data.drop(['class', "path", "filename", "R", "G", "B", "brightness"], axis=1)


scaler = StandardScaler()
X_train_val = scaler.fit_transform(X_train_val)
X_test = scaler.transform(X_test)

In [41]:
# Konwersja etykiet na wartości liczbowe
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_train_val)



In [42]:
# Definicja 5-krotnej walidacji krzyżowej
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accuracies = []
reports = []

In [43]:
# Iteracja po podziałach walidacji krzyżowej
for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_val, y_train_val)):
    print(f"Fold {fold + 1}")
    X_train, X_val = X_train_val[train_idx], X_train_val[val_idx]
    y_train, y_val = y_train_val[train_idx], y_train_val[val_idx]
    
    # Tworzenie i trenowanie modelu
    clf = RandomForestClassifier(n_estimators=100, random_state=42,max_depth=7)
    clf.fit(X_train, y_train)
    
    # Predykcja
    y_pred = clf.predict(X_val)
    
    # Ewaluacja
    accuracy = accuracy_score(y_val, y_pred)
    accuracies.append(accuracy)
    report = classification_report(y_val, y_pred, target_names=label_encoder.classes_)
    reports.append(report)
    
    print(f"Dokładność (walidacja): {accuracy:.4f}")
    print("Raport klasyfikacji:\n", report)
    print("-" * 50)



Fold 1
Dokładność (walidacja): 0.8912
Raport klasyfikacji:
                       precision    recall  f1-score   support

      Bombali Ghevda       0.71      0.60      0.65        90
        Butka Ghevda       0.97      0.98      0.97        90
       Gabara Ghevda       1.00      0.98      0.99        89
             God Wal       0.71      0.82      0.76        90
          Hirva Lamb       0.94      0.91      0.93        89
            Kadu wal       0.83      0.97      0.89        89
            Kala Wal       0.94      0.90      0.92        90
     Lal lamb shenga       0.81      0.92      0.86        89
Lamb Shiracha Ghevda       0.90      0.84      0.87        90
      Shravan Ghevda       1.00      0.92      0.96        89
          Tambda wal       1.00      1.00      1.00        90
       Vatana Ghevda       0.92      0.86      0.89        90

            accuracy                           0.89      1075
           macro avg       0.89      0.89      0.89      1075
        

In [44]:
# Ostateczna ewaluacja na zbiorze testowym
final_model = RandomForestClassifier(n_estimators=100, random_state=42)
final_model.fit(X_train_val, y_train_val)
y_test_pred = final_model.predict(X_test)



In [45]:
test_accuracy = accuracy_score(y_test, y_test_pred)
test_report = classification_report(y_test, y_test_pred, target_names=label_encoder.classes_)



In [46]:
print(f"Średnia dokładność na walidacji: {np.mean(accuracies):.4f}")
print(f"Dokładność na zbiorze testowym: {test_accuracy:.4f}")
print("Raport klasyfikacji na zbiorze testowym:\n", test_report)

Średnia dokładność na walidacji: 0.9023
Dokładność na zbiorze testowym: 0.8111
Raport klasyfikacji na zbiorze testowym:
                       precision    recall  f1-score   support

      Bombali Ghevda       0.77      0.38      0.50        80
        Butka Ghevda       1.00      0.99      0.99        80
       Gabara Ghevda       0.99      0.84      0.91        80
             God Wal       0.40      0.65      0.50        80
          Hirva Lamb       0.99      1.00      0.99        80
            Kadu wal       0.67      0.89      0.76        80
            Kala Wal       0.96      1.00      0.98        80
     Lal lamb shenga       0.78      0.75      0.76        80
Lamb Shiracha Ghevda       0.78      0.78      0.78        80
      Shravan Ghevda       0.82      0.47      0.60        78
          Tambda wal       1.00      0.99      0.99        80
       Vatana Ghevda       0.87      1.00      0.93        80

            accuracy                           0.81       958
         

In [47]:
y_one_hot = to_categorical(y_encoded)


In [48]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train_val)

In [49]:
mlp_accuracies = []
mlp_f1_scores = []
mlp_specificities = []
mlp_precisions = []
mlp_recalls = []

for train_idx, val_idx in skf.split(X_scaled, y_encoded):
    X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
    y_train, y_val = y_one_hot[train_idx], y_one_hot[val_idx]
    
    mlp_model = models.Sequential([
        layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(32, activation='relu'),
    # Change the number of units in the final layer to match the number of classes
        layers.Dense(12, activation='softmax') ])
    mlp_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'Precision', 'Recall'])

    history = mlp_model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), batch_size=32, verbose=0)
    
    val_loss, val_accuracy, val_precision, val_recall = mlp_model.evaluate(X_val, y_val, verbose=0)
    
    y_pred = mlp_model.predict(X_val)
    y_pred_class = np.argmax(y_pred, axis=1)
    y_val_class = np.argmax(y_val, axis=1)
    val_f1 = f1_score(y_val_class, y_pred_class, average='weighted')
    
    
    
    mlp_accuracies.append(val_accuracy)
    mlp_f1_scores.append(val_f1)
    mlp_precisions.append(val_precision)
    mlp_recalls.append(val_recall)


avg_accuracy = np.mean(mlp_accuracies)
avg_f1 = np.mean(mlp_f1_scores)
avg_precision = np.mean(mlp_precisions)
avg_recall = np.mean(mlp_recalls)


    




In [50]:
# Ukazanie średnich metryk - sugestia zespołu walidacyjnego by dodać inne niż accuracy
print(f"Średnia dokładność: {avg_accuracy:.4f}")
print(f"Średnia F1-score: {avg_f1:.4f}")
print(f"Średnia Precision: {avg_precision:.4f}")
print(f"Średnia Recall: {avg_recall:.4f}")

Średnia dokładność: 0.9307
Średnia F1-score: 0.9307
Średnia Precision: 0.9379
Średnia Recall: 0.9214


In [51]:
final_mlp_model = models.Sequential([
    tf.keras.Input(shape=(X_scaled.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(12, activation='softmax')
])
final_mlp_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Trenowanie na pełnym zbiorze
final_mlp_model.fit(X_scaled, y_one_hot, epochs=10, batch_size=32, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1aff7bf95d0>

In [52]:
X_test_scaled = scaler.transform(X_test)
y_test_encoded = label_encoder.transform(y_test)
y_test_one_hot = to_categorical(y_test_encoded)

In [53]:
test_loss, test_accuracy = final_mlp_model.evaluate(X_test_scaled, y_test_one_hot)
y_test_pred_proba = final_mlp_model.predict(X_test_scaled)
y_test_pred = np.argmax(y_test_pred_proba, axis=1)

print(f"Dokładność na zbiorze testowym: {test_accuracy:.4f}")
print("Raport klasyfikacji:\n", classification_report(y_test_encoded, y_test_pred, target_names=label_encoder.classes_))

Dokładność na zbiorze testowym: 0.8132
Raport klasyfikacji:
                       precision    recall  f1-score   support

      Bombali Ghevda       0.43      0.04      0.07        80
        Butka Ghevda       1.00      1.00      1.00        80
       Gabara Ghevda       0.97      0.88      0.92        80
             God Wal       0.35      0.79      0.49        80
          Hirva Lamb       0.96      1.00      0.98        80
            Kadu wal       0.69      0.90      0.78        80
            Kala Wal       0.99      0.99      0.99        80
     Lal lamb shenga       0.84      0.94      0.89        80
Lamb Shiracha Ghevda       0.97      0.82      0.89        80
      Shravan Ghevda       0.95      0.45      0.61        78
          Tambda wal       1.00      0.99      0.99        80
       Vatana Ghevda       0.96      0.96      0.96        80

            accuracy                           0.81       958
           macro avg       0.84      0.81      0.80       958
       

In [54]:
data_path = "Dataset_split/train"
data_images = []
labels = []
image_size=(100, 70)


for class_folder in os.listdir(data_path):  
    class_path = os.path.join(data_path, class_folder)
    if os.path.isdir(class_path):  
        for file in os.listdir(class_path):  
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):  
                img = cv2.imread(os.path.join(class_path, file))  # Wczytanie obrazu w kolorze
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Konwersja z BGR do RGB
                img = cv2.resize(img, image_size)
                data_images.append(img)
                labels.append(class_folder)




In [55]:
X_images = np.array(data_images) / 255.0  # Normalizacja

label_encoder_images = LabelEncoder()
y_images = label_encoder_images.fit_transform(labels)
y_images_one_hot = to_categorical(y_images)

In [31]:
# 5-krotna walidacja krzyżowa dla CNN
cnn_accuracies = []
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
X_images = np.array(X_images)



In [32]:
i=1
for train_idx, val_idx in skf.split(y_images, y_images):  # Zmiana indeksacji na y_images
    X_train, X_val = X_images[train_idx], X_images[val_idx]
    y_train, y_val = y_images_one_hot[train_idx], y_images_one_hot[val_idx]
    
    
    cnn_model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(70, 100, 3)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(128, (3,3), activation='relu'),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(np.unique(y_images)), activation='softmax')
])
    cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    cnn_model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), batch_size=32, verbose=0)
    
    val_accuracy = cnn_model.evaluate(X_val, y_val, verbose=0)[1]
    cnn_accuracies.append(val_accuracy)
    
    print(f"Dokładność w iteracji {i}: {val_accuracy:.4f}")
    i+=1


Dokładność w iteracji 1: 0.6921
Dokładność w iteracji 2: 0.8343
Dokładność w iteracji 3: 0.8520
Dokładność w iteracji 4: 0.8399
Dokładność w iteracji 5: 0.8147


In [33]:
print(f"Średnia dokładność CNN: {np.mean(cnn_accuracies):.4f}")

Średnia dokładność CNN: 0.8066


In [37]:
## sprawdzenie na zbiorze testowym

data_path = "Dataset_split/test"
data_images = []
labels = []
image_size=(100, 70)


for class_folder in os.listdir(data_path):  
    class_path = os.path.join(data_path, class_folder)
    if os.path.isdir(class_path):  
        for file in os.listdir(class_path):  
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):  
                img = cv2.imread(os.path.join(class_path, file))  # Wczytanie obrazu w kolorze
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Konwersja z BGR do RGB
                img = cv2.resize(img, image_size)
                data_images.append(img)
                labels.append(class_folder)
X_images = np.array(data_images) / 255.0  # Normalizacja

label_encoder_images = LabelEncoder()
y_images = label_encoder_images.fit_transform(labels)
y_images_one_hot = to_categorical(y_images)


cnn_model.evaluate(X_images, y_images_one_hot, verbose=0)[1]

0.7779433727264404