In [101]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import f1_score, confusion_matrix
from tensorflow.keras.utils import to_categorical
import cv2
from collections import defaultdict
import matplotlib.pyplot as plt
import random
from PIL import Image
from tensorflow.keras import regularizers




In [110]:
# Wczytanie danych
train_data = pd.read_csv("train_data_with_features.csv")
test_data = pd.read_csv("test_data_with_features.csv")

In [111]:
# Przygotowanie cech i etykiet
y_train_val = train_data['class']
X_train_val = train_data.drop(['class', "path", "filename", "R", "G", "B", "brightness"], axis=1)
y_test = test_data['class']
X_test = test_data.drop(['class', "path", "filename", "R", "G", "B", "brightness"], axis=1)


scaler = StandardScaler()
X_train_val = scaler.fit_transform(X_train_val)
X_test = scaler.transform(X_test)

In [112]:
# Konwersja etykiet na wartości liczbowe
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_train_val)



In [113]:
# Definicja 5-krotnej walidacji krzyżowej
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accuracies = []
reports = []

In [114]:
# Iteracja po podziałach walidacji krzyżowej
for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_val, y_train_val)):
    print(f"Fold {fold + 1}")
    X_train, X_val = X_train_val[train_idx], X_train_val[val_idx]
    y_train, y_val = y_train_val[train_idx], y_train_val[val_idx]
    
    # Tworzenie i trenowanie modelu
    clf = RandomForestClassifier(n_estimators=100, random_state=42,max_depth=7)
    clf.fit(X_train, y_train)
    
    # Predykcja
    y_pred = clf.predict(X_val)
    
    # Ewaluacja
    accuracy = accuracy_score(y_val, y_pred)
    accuracies.append(accuracy)
    report = classification_report(y_val, y_pred, target_names=label_encoder.classes_)
    reports.append(report)
    
    print(f"Dokładność (walidacja): {accuracy:.4f}")
    print("Raport klasyfikacji:\n", report)
    print("-" * 50)



Fold 1
Dokładność (walidacja): 0.8600
Raport klasyfikacji:
                       precision    recall  f1-score   support

      Bombali Ghevda       0.81      0.60      0.69        86
        Butka Ghevda       0.98      0.93      0.96        59
       Gabara Ghevda       0.95      0.92      0.93        59
             God Wal       0.66      0.87      0.75        86
          Hirva Lamb       0.89      0.95      0.92        60
            Kadu wal       0.81      0.94      0.87        85
            Kala Wal       0.93      0.90      0.91        59
     Lal lamb shenga       0.81      0.86      0.84        59
Lamb Shiracha Ghevda       0.89      0.85      0.87        60
      Shravan Ghevda       0.96      0.88      0.92        60
          Tambda wal       1.00      0.97      0.98        60
       Vatana Ghevda       0.86      0.72      0.78        60

            accuracy                           0.86       793
           macro avg       0.88      0.87      0.87       793
        

In [115]:
# Ostateczna ewaluacja na zbiorze testowym
final_model = RandomForestClassifier(n_estimators=100, random_state=42)
final_model.fit(X_train_val, y_train_val)
y_test_pred = final_model.predict(X_test)



In [116]:
test_accuracy = accuracy_score(y_test, y_test_pred)
test_report = classification_report(y_test, y_test_pred, target_names=label_encoder.classes_)



In [117]:
print(f"Średnia dokładność na walidacji: {np.mean(accuracies):.4f}")
print(f"Dokładność na zbiorze testowym: {test_accuracy:.4f}")
print("Raport klasyfikacji na zbiorze testowym:\n", test_report)

Średnia dokładność na walidacji: 0.8687
Dokładność na zbiorze testowym: 0.9713
Raport klasyfikacji na zbiorze testowym:
                       precision    recall  f1-score   support

      Bombali Ghevda       0.93      0.90      0.91        58
        Butka Ghevda       1.00      1.00      1.00        56
       Gabara Ghevda       1.00      0.97      0.98        58
             God Wal       0.90      0.93      0.92        60
          Hirva Lamb       1.00      1.00      1.00        56
            Kadu wal       0.93      0.97      0.95        58
            Kala Wal       1.00      0.98      0.99        58
     Lal lamb shenga       0.96      0.98      0.97        56
Lamb Shiracha Ghevda       0.98      0.98      0.98        60
      Shravan Ghevda       1.00      0.98      0.99        61
          Tambda wal       1.00      1.00      1.00        60
       Vatana Ghevda       0.95      0.96      0.96        56

            accuracy                           0.97       697
         

In [118]:
y_one_hot = to_categorical(y_encoded)


In [119]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train_val)

In [123]:
##### mlp_accuracies = []
mlp_f1_scores = []
mlp_specificities = []
mlp_precisions = []
mlp_recalls = []

for train_idx, val_idx in skf.split(X_scaled, y_encoded):
    X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
    y_train, y_val = y_one_hot[train_idx], y_one_hot[val_idx]
    
    mlp_model = models.Sequential([
        layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(32, activation='relu'),
    # Change the number of units in the final layer to match the number of classes
        layers.Dense(12, activation='softmax') ])
    mlp_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'Precision', 'Recall'])

    # Train the model
    history = mlp_model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), batch_size=32, verbose=0)
    
    # Evaluate model on validation data
    val_loss, val_accuracy, val_precision, val_recall = mlp_model.evaluate(X_val, y_val, verbose=0)
    
    # Calculate F1-score
    y_pred = mlp_model.predict(X_val)
    y_pred_class = np.argmax(y_pred, axis=1)
    y_val_class = np.argmax(y_val, axis=1)
    val_f1 = f1_score(y_val_class, y_pred_class, average='weighted')
    
    
    
    # Store metrics for averaging
    mlp_accuracies.append(val_accuracy)
    mlp_f1_scores.append(val_f1)
    mlp_precisions.append(val_precision)
    mlp_recalls.append(val_recall)

# Calculate the average for each metric
avg_accuracy = np.mean(mlp_accuracies)
avg_f1 = np.mean(mlp_f1_scores)
avg_precision = np.mean(mlp_precisions)
avg_recall = np.mean(mlp_recalls)


    




In [122]:
# Print the average metrics
print(f"Średnia dokładność: {avg_accuracy:.4f}")
print(f"Średnia F1-score: {avg_f1:.4f}")
print(f"Średnia Precision: {avg_precision:.4f}")
print(f"Średnia Recall: {avg_recall:.4f}")

Średnia dokładność: 0.8748
Średnia F1-score: 0.8619
Średnia Precision: 0.8889
Średnia Recall: 0.8485


In [81]:
data_path = "Dataset_split/train"
data_images = []
labels = []
image_size=(100, 70)


for class_folder in os.listdir(data_path):  
    class_path = os.path.join(data_path, class_folder)
    if os.path.isdir(class_path):  
        for file in os.listdir(class_path):  
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):  
                img = cv2.imread(os.path.join(class_path, file))  # Wczytanie obrazu w kolorze
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Konwersja z BGR do RGB
                img = cv2.resize(img, image_size)
                data_images.append(img)
                labels.append(class_folder)




In [82]:
X_images = np.array(data_images) / 255.0  # Normalizacja

label_encoder_images = LabelEncoder()
y_images = label_encoder_images.fit_transform(labels)
y_images_one_hot = to_categorical(y_images)

In [83]:
# 5-krotna walidacja krzyżowa dla CNN
cnn_accuracies = []
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
X_images = np.array(X_images)



In [84]:
i=1
for train_idx, val_idx in skf.split(y_images, y_images):  # Zmiana indeksacji na y_images
    X_train, X_val = X_images[train_idx], X_images[val_idx]
    y_train, y_val = y_images_one_hot[train_idx], y_images_one_hot[val_idx]
    
    
    cnn_model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(70, 100, 3)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(128, (3,3), activation='relu'),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(np.unique(y_images)), activation='softmax')
])
    cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    cnn_model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val), batch_size=32, verbose=0)
    
    val_accuracy = cnn_model.evaluate(X_val, y_val, verbose=0)[1]
    cnn_accuracies.append(val_accuracy)
    
    print(f"Dokładność w iteracji {i}: {val_accuracy:.4f}")
    i+=1


Dokładność w iteracji 1: 0.8225
Dokładność w iteracji 2: 0.7980
Dokładność w iteracji 3: 0.7993
Dokładność w iteracji 4: 0.7961
Dokładność w iteracji 5: 0.7080


In [97]:
print(f"Średnia dokładność CNN: {np.mean(cnn_accuracies):.4f}")

Średnia dokładność CNN: 0.7848


In [98]:
## sprawdzenie na zbiorze testowym

data_path = "Dataset_split/test"
data_images = []
labels = []
image_size=(100, 70)


for class_folder in os.listdir(data_path):  
    class_path = os.path.join(data_path, class_folder)
    if os.path.isdir(class_path):  
        for file in os.listdir(class_path):  
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):  
                img = cv2.imread(os.path.join(class_path, file))  # Wczytanie obrazu w kolorze
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Konwersja z BGR do RGB
                img = cv2.resize(img, image_size)
                data_images.append(img)
                labels.append(class_folder)
X_images = np.array(data_images) / 255.0  # Normalizacja

label_encoder_images = LabelEncoder()
y_images = label_encoder_images.fit_transform(labels)
y_images_one_hot = to_categorical(y_images)


cnn_model.evaluate(X_images, y_images_one_hot, verbose=0)[1]

0.7374461889266968