# Feature extraction

In [7]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.applications.efficientnet import preprocess_input
import joblib

# Parametri principali
IMG_SIZE = 380
input_dir = 'Nuove immagini'  # Directory di input contenente modelli e immagini
output_dir = 'Output_' + input_dir  # Directory di output per salvare le caratteristiche
# Creazione della cartella di output se non esiste
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
# Nomi dei file e delle cartelle
keras_model_name = 'modello_finale.keras'
new_images_folder = 'test_6'
new_images_excel = 'test_6.xlsx'

# Percorsi completi dei file e delle cartelle
keras_model_path = os.path.join(input_dir, keras_model_name)
image_dir = os.path.join(input_dir, new_images_folder)
excel_path = os.path.join(input_dir, new_images_excel)

# Funzione per pre-elaborare l'immagine
def preprocess_image(image):
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = preprocess_input(image)
    return image

# Funzione per caricare e preprocessare le immagini
def load_and_preprocess_images(image_dir):
    image_paths = []
    images = []
    for img_name in os.listdir(image_dir):
        if img_name.endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(image_dir, img_name)
            img = tf.io.read_file(img_path)
            img = tf.image.decode_image(img, channels=3)
            img = preprocess_image(img)
            images.append(img)
            image_paths.append(img_name)
    return images, image_paths

# Funzione per estrarre caratteristiche dalle immagini
def extract_features(images, feature_extractor):
    images_np = np.array([img.numpy() for img in images])
    features = feature_extractor.predict(images_np)
    features_flattened = [feature.flatten() for feature in features]
    return np.array(features_flattened)

# Carica il modello CNN salvato
cnn_model = tf.keras.models.load_model(keras_model_path)

# Crea un nuovo modello che restituisce le caratteristiche dallo strato desiderato del modello EfficientNet
feature_extractor = Model(inputs=cnn_model.input, outputs=cnn_model.get_layer('dense_1').output)

# Carica le immagini e preprocessale
images, image_paths = load_and_preprocess_images(image_dir)

# Estrai le caratteristiche dalle nuove immagini
features = extract_features(images, feature_extractor)

# Salva le caratteristiche estratte e i percorsi delle immagini in un file .npz
output_features_file = os.path.join(output_dir, new_images_folder + '_extracted_features.npz')
np.savez_compressed(output_features_file, features=features, image_paths=image_paths)

print(f'Features extracted and saved to {output_features_file}')


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 1s/step
Features extracted and saved to Output_Nuove immagini\test_6_extracted_features.npz


# Prediction

In [8]:
import os
import numpy as np
import pandas as pd
import joblib
from sklearn.metrics import mean_absolute_percentage_error

# Parametri principali
IMG_SIZE = 380
input_dir = 'Nuove immagini'  # Directory di input contenente modelli e immagini
output_dir = 'Output_' + input_dir  # Directory di output per salvare le caratteristiche
# Creazione della cartella di output se non esiste
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
# Nomi dei file e delle cartelle
random_forest_model_name = 'random_forest_model.pkl'
new_images_folder = 'test_6'
new_images_excel = 'test_6.xlsx'

# Percorsi completi dei file e delle cartelle
random_forest_model_path = os.path.join(input_dir, random_forest_model_name)
excel_path = os.path.join(input_dir, new_images_excel)
extracted_features_file = os.path.join(output_dir, new_images_folder + '_extracted_features.npz')

# Carica le caratteristiche estratte e i percorsi delle immagini
data = np.load(extracted_features_file)
features = data['features']
image_paths = data['image_paths']

# Carica il modello Random Forest salvato
random_forest_model = joblib.load(random_forest_model_path)

# Carica le etichette vere dal file Excel
labels_df = pd.read_excel(excel_path)

# Aggiungi le caratteristiche estratte al DataFrame
labels_df['Features'] = labels_df['Filename'].map(dict(zip(image_paths, features.tolist())))

# Raggruppa le caratteristiche per "PLOT" e calcola la media delle caratteristiche per ogni "PLOT"
grouped_features = labels_df.groupby('PLOT')['Features'].apply(lambda x: np.mean(np.stack(x), axis=0))

# Prevedi utilizzando il modello Random Forest sui vettori delle caratteristiche raggruppati
predicted_labels_rf = grouped_features.apply(lambda x: random_forest_model.predict(x.reshape(1, -1))[0])

# Aggiungi le etichette previste al DataFrame raggruppato
grouped_df_rf = labels_df.groupby(['PLOT', 'RST', 'CROP'], as_index=False).first()
grouped_df_rf['Predicted Label'] = grouped_df_rf['PLOT'].map(predicted_labels_rf)

# Calcola il MAPE per le nuove immagini
true_labels_rf = grouped_df_rf['RST']
mape_rf = mean_absolute_percentage_error(true_labels_rf, grouped_df_rf['Predicted Label'])
print(f'MAPE using CNN + Random Forest: {mape_rf}')

# Aggiungi una riga per il MAPE
mape_row_rf = pd.DataFrame({'PLOT': ['MAPE'], 'RST': [''], 'CROP': [''], 'Predicted Label': [mape_rf]})
grouped_df_rf = pd.concat([grouped_df_rf, mape_row_rf], ignore_index=True)

# Salva i risultati in un file Excel con due fogli
output_excel_rf = os.path.join(output_dir, new_images_folder + '_predictions_cnn_rf.xlsx')
with pd.ExcelWriter(output_excel_rf) as writer:
    labels_df.to_excel(writer, sheet_name='Predictions', index=False)
    grouped_df_rf.to_excel(writer, sheet_name='Grouped Predictions', index=False)

print(f'Results saved to {output_excel_rf}')


MAPE using CNN + Random Forest: 0.05288240818896522
Results saved to Output_Nuove immagini\test_6_predictions_cnn_rf.xlsx
