<a href="https://colab.research.google.com/github/lucevito/image/blob/main/random_forest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive


In [2]:
import glob
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import joblib
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.metrics import f1_score, roc_auc_score, roc_curve, auc
from imblearn.metrics import geometric_mean_score
import os
import pandas as pd
from openpyxl import Workbook, load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows


def loaddataset(directory):
  images_files = glob.glob(directory + '/images' + '/*.npy')
  masks_files = glob.glob(directory + '/masks' + '/*.npy')
  x = np.array([np.load(file) for file in images_files])
  y = np.array([np.load(file) for file in masks_files])
  x = x.reshape(len(x) * len(x[0]) * len(x[0][0]), 10)
  y = y.reshape(len(y) * len(y[0]) * len(y[0][0]), 1)
  y = np.ravel(y)
  return x,y

def rflearn(X,Y,filename):
  rf_model = RandomForestClassifier(random_state=42)
  rf_model.fit(X, Y)
  joblib.dump(rf_model, filename)

def rftest(test,filename):
  rf_model = joblib.load(filename)
  predictions = rf_model.predict(test)
  return predictions


def print_metrics(y_true, y_pred):
    # Calcola la matrice di confusione
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    # Metriche per la classe negativa (classe 0)
    precision_negative = precision_score(y_true, y_pred, pos_label=0)
    recall_negative = recall_score(y_true, y_pred, pos_label=0)
    fscore_negative = f1_score(y_true, y_pred, pos_label=0)
    # Metriche per la classe positiva (classe 1)
    precision_positive = precision_score(y_true, y_pred, pos_label=1)
    recall_positive = recall_score(y_true, y_pred, pos_label=1)
    fscore_positive = f1_score(y_true, y_pred, pos_label=1)
    # Calcola l'accuratezza media e l'accuratezza complessiva
    average_accuracy = (accuracy_score(y_true, y_pred) + accuracy_score(y_true, y_pred, normalize=False)) / 2
    overall_accuracy = accuracy_score(y_true, y_pred)
    # Calcola la G-Mean (Geometric Mean Score)
    gmean = geometric_mean_score(y_true, y_pred)
    # Calcola l'AUC (Area Under the Curve) per la curva ROC
    roc_auc = roc_auc_score(y_true, y_pred)
    print("True Negative (TN):", tn)
    print("False Positive (FP):", fp)
    print("False Negative (FN):", fn)
    print("True Positive (TP):", tp)
    print("Precision (Negative Class):", precision_negative)
    print("Recall (Negative Class):", recall_negative)
    print("F-score (Negative Class):", fscore_negative)
    print("Precision (Positive Class):", precision_positive)
    print("Recall (Positive Class):", recall_positive)
    print("F-score (Positive Class):", fscore_positive)
    print("Average Accuracy:", average_accuracy)
    print("Overall Accuracy:", overall_accuracy)
    print("G-Mean:", gmean)
    print("AUC (Area Under the Curve):", roc_auc)
    print("\n")

def save_csv(model_name,y_true_train, y_pred_train,y_true_test,y_pred_test):

  file_name = 'risultati_modelli.xlsx'
  cm_train = confusion_matrix(y_true_train, y_pred_train)
  tn_train, fp_train, fn_train, tp_train = cm_train.ravel()
  precision_negative_train = precision_score(y_true_train, y_pred_train, pos_label=0)
  recall_negative_train = recall_score(y_true_train, y_pred_train, pos_label=0)
  fscore_negative_train = f1_score(y_true_train, y_pred_train, pos_label=0)
  precision_positive_train = precision_score(y_true_train, y_pred_train, pos_label=1)
  recall_positive_train = recall_score(y_true_train, y_pred_train, pos_label=1)
  fscore_positive_train = f1_score(y_true_train, y_pred_train, pos_label=1)
  average_accuracy_train = (accuracy_score(y_true_train, y_pred_train) + accuracy_score(y_true_train, y_pred_train, normalize=False)) / 2
  overall_accuracy_train = accuracy_score(y_true_train, y_pred_train)
  gmean_train = geometric_mean_score(y_true_train, y_pred_train)
  roc_auc_train = roc_auc_score(y_true_train, y_pred_train)

  cm_test = confusion_matrix(y_true_test, y_pred_test)
  tn_test, fp_test, fn_test, tp_test = cm_test.ravel()
  precision_negative_test = precision_score(y_true_test, y_pred_test, pos_label=0)
  recall_negative_test = recall_score(y_true_test, y_pred_test, pos_label=0)
  fscore_negative_test = f1_score(y_true_test, y_pred_test, pos_label=0)
  precision_positive_test = precision_score(y_true_test, y_pred_test, pos_label=1)
  recall_positive_test = recall_score(y_true_test, y_pred_test, pos_label=1)
  fscore_positive_test = f1_score(y_true_test, y_pred_test, pos_label=1)
  average_accuracy_test = (accuracy_score(y_true_test, y_pred_test) + accuracy_score(y_true_test, y_pred_test, normalize=False)) / 2
  overall_accuracy_test = accuracy_score(y_true_test, y_pred_test)
  gmean_test = geometric_mean_score(y_true_test, y_pred_test)
  roc_auc_test = roc_auc_score(y_true_test, y_pred_test)

  results = [
      {
          'Modello': model_name,
          'Dataset': 'Train Set',
          'Parametri della configurazione': 'rf_senza_bilanciamenti',
          'True Negative': tn_train,
          'False Negative': fn_train,
          'False Positive': fp_train,
          'True Positive': tp_train,
          'Precision Negative': precision_negative_train,
          'Recall Negative': recall_negative_train,
          'Fscore Negative': fscore_negative_train,
          'Precision Positive': precision_positive_train,
          'Recall Positive': recall_positive_train,
          'Fscore Positive': fscore_positive_train,
          'Average Accuracy': average_accuracy_train,
          'Overall Accuracy': overall_accuracy_train,
          'GMean': gmean_train,
          'AUC': roc_auc_train,
      },
      {
          'Modello': model_name,
          'Dataset': 'Test Set',
          'Parametri della configurazione': 'rf_senza_bilanciamenti',
          'True Negative': tn_test,
          'False Negative': fn_test,
          'False Positive': fp_test,
          'True Positive': tp_test,
          'Precision Negative': precision_negative_test,
          'Recall Negative': recall_negative_test,
          'Fscore Negative': fscore_negative_test,
          'Precision Positive': precision_positive_test,
          'Recall Positive': recall_positive_test,
          'Fscore Positive': fscore_positive_test,
          'Average Accuracy': average_accuracy_test,
          'Overall Accuracy': overall_accuracy_test,
          'GMean': gmean_test,
          'AUC': roc_auc_test,
      },
  ]

  # Controlla se il file del foglio di calcolo esiste
  if os.path.exists(file_name):
      # Carica i dati esistenti dal foglio di calcolo
      existing_df = pd.read_excel(file_name)
      # Unisci i nuovi dati con quelli esistenti
      df = pd.concat([existing_df, pd.DataFrame(results)])
  else:
      # Se il file non esiste, crea semplicemente il DataFrame con i nuovi dati
      df = pd.DataFrame(results)

  # Creazione del foglio di calcolo Excel
  wb = Workbook()
  ws = wb.active

  # Inserimento dei dati nel foglio di calcolo
  for r in dataframe_to_rows(df, index=False, header=True):
      ws.append(r)

  # Salvataggio del foglio di calcolo
  wb.save(file_name)




In [3]:
train_path = 'Immagini_satellitari/Train'
test_path = 'Immagini_satellitari/Test/'
model_name = "rf_model.h"

trainX,trainY = loaddataset(train_path)
testX,testY = loaddataset(test_path)

rflearn(trainX,trainY,model_name)


test_predictions = rftest(testX,model_name)
print("TEST : ")
print_metrics(testY,test_predictions)

train_predictions = rftest(trainX,model_name)
print("TRAIN : ")
print_metrics(trainY,train_predictions)

save_csv(model_name,trainY, train_predictions,testY,test_predictions)


TEST : 
True Negative (TN): 161888
False Positive (FP): 212
False Negative (FN): 2753
True Positive (TP): 11
Precision (Negative Class): 0.9832787701726787
Recall (Negative Class): 0.9986921653300432
F-score (Negative Class): 0.9909255342916867
Precision (Positive Class): 0.04932735426008968
Recall (Positive Class): 0.003979739507959479
F-score (Positive Class): 0.007365249414127887
Average Accuracy: 80949.9910077397
Overall Accuracy: 0.9820154794254659
G-Mean: 0.06304391062310118
AUC (Area Under the Curve): 0.5013359524190014


TRAIN : 
True Negative (TN): 646037
False Positive (FP): 128
False Negative (FN): 645
True Positive (TP): 22886
Precision (Negative Class): 0.9990026009692554
Recall (Negative Class): 0.9998019081813468
F-score (Negative Class): 0.9994020947567654
Precision (Positive Class): 0.9944381680716086
Recall (Positive Class): 0.9725893502188603
F-score (Positive Class): 0.9833924159415619
Average Accuracy: 334461.99942287244
Overall Accuracy: 0.9988457449350153
G-Mean: