In [None]:
import numpy as np
import matplotlib.pyplot as plt
from skimage import segmentation, future
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from src.dataset import Dataset
from src.modelling import DeepModel_Trainer
import torch.utils.data
import torch
import wandb
import seaborn as sns
import numpy as np
import pandas as pd
import os


In [None]:
file_name_zueri = r'D:\Temp\AgroLuege\raw_data\ZueriCrop\ZueriCrop.hdf5'
# file_name_zueri = r'C:\Temp\AgroLuege\raw_data\ZueriCrop\ZueriCrop.hdf5'
random_state = 123
np.random.seed(random_state)


os.environ["WANDB_SILENT"] = "true"

def setup_wandb_run(
    project_name: str,
    run_group: str,
    fold: int,
    model_architecture: str,
    batchsize: int,
    seed:int,
    entity:str="dlbs_crop",
):
    """
    Sets a new run up (used for k-fold)
    :param str project_name: Name of the project in wandb.
    :param str run_group: Name of the project in wandb.
    :param str fold: number of the executing fold
    :param str model_architecture: Modeltype (architectur) of the model
    :param int batchsize
    :param int seed
    """
    # init wandb
    run = wandb.init(
        settings=wandb.Settings(start_method="thread"),
        project=project_name,
        entity=entity,
        name=f"{fold}-Fold",
        group=run_group,
        config={
            "model architecture": model_architecture,
            "batchsize": batchsize,
            "seed":seed
        },
    )
    return run

def load_data_train(model_trainer):
    #     # Initialize an empty list to store batches
    all_data_input = []
    all_data_target = []
    # Iterate through the DataLoader
    for batch in model_trainer.train_loader:
        input, _, target_2, _ = batch

        all_data_input.append(input)
        all_data_target.append(target_2)

    # Concatenate all batches into a single tensor along the batch dimension (dim=0)
    input_train = torch.cat(all_data_input, dim=0)
    target_train = torch.cat(all_data_target, dim=0)
    return input_train,target_train

def load_data_test(model_trainer):
    #     # Initialize an empty list to store batches
    all_data_input = []
    all_data_target = []
    # Iterate through the DataLoader
    for batch in model_trainer.test_loader:
        input, _, target_2, _ = batch

        all_data_input.append(input)
        all_data_target.append(target_2)

    # Concatenate all batches into a single tensor along the batch dimension (dim=0)
    input_test = torch.cat(all_data_input, dim=0)
    target_test= torch.cat(all_data_target, dim=0)
    return input_test,target_test

def prepare_data_fold(input,target):
    reshape_factor = len(input) // 10 
    input= input[0:reshape_factor*10]
    target = target[0:reshape_factor*10]
    reshaped_tensor = input.reshape(24*reshape_factor, 24*10, 4)
    reshaped_target = target.reshape(24*reshape_factor, 24*10)

    return reshaped_tensor,reshaped_target

def fit_rf(clf:RandomForestClassifier,reshaped_tensor_train,reshaped_target_train):
    clf = future.fit_segmenter(reshaped_target_train, reshaped_tensor_train, clf)
    return clf

def predict_rf(model:RandomForestClassifier,reshaped_tensor):
    y_pred = future.predict_segmenter(reshaped_tensor, model)
    return y_pred

def evaluate_log(reshaped_target_train,y_pred,wandbrun,verbose:bool=False,
                 class_names_cm =["0_unknown","Field crops","Forest","Grassland","Orchards","Special crops"],is_test:bool=False):
    
    accuracy = accuracy_score(reshaped_target_train.numpy().ravel(), y_pred.ravel())
    conf_matrix = confusion_matrix(reshaped_target_train.numpy().ravel(), y_pred.ravel())
    f1score = f1_score(reshaped_target_train.numpy().ravel(), y_pred.ravel(), average=None)
    # Log the F1 scores for each class
    if is_test:
        f1_scores_dict = {"F1-Score_test_"+class_names_cm[i]: f1score[i] for i in range(len(f1score))}
    else:
        f1_scores_dict = {"F1-Score_train_"+class_names_cm[i]: f1score[i] for i in range(len(f1score))}
    wandbrun.log(f1_scores_dict)
    if verbose:
        print("Accuracy:", accuracy)
        print("f1scores:", f1_scores_dict)
    if is_test:
        wandbrun.log({'accuracy_test': accuracy})
        wandbrun.log(f1_scores_dict)
    else:
        wandbrun.log({'accuracy_train': accuracy})
        wandbrun.log(f1_scores_dict)

    plt.figure(figsize=(12, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
                cbar=False, xticklabels=class_names_cm,
                yticklabels=class_names_cm)
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    if is_test:
        wandbrun.log({"confusion_matrix_test": wandb.Image(plt)})
    else:
        wandbrun.log({"confusion_matrix_train": wandb.Image(plt)})
    plt.close()

def train_evaluate_rf(clf,trainer:DeepModel_Trainer, run_group="RandomForest-Baseline"):
    # init the wandb run
    run = setup_wandb_run(project_name="dlbs_crop-rf",
                            run_group=run_group,
                            fold='all', model_architecture="RandomForest",
                            batchsize='Full',seed=random_state)
    trainer.create_loader()
    input_train,target_train = load_data_train(trainer)
    reshaped_tensor_train,reshaped_target_train = prepare_data_fold(input_train,target_train)
    rfmodel = fit_rf(clf,reshaped_tensor_train,reshaped_target_train)
    y_pred_train = predict_rf(rfmodel,reshaped_tensor_train)

    evaluate_log(reshaped_target_train,y_pred_train,run,verbose=True)

    input_test,target_test= load_data_test(trainer)
    reshaped_tensor_test,reshaped_target_test = prepare_data_fold(input_test,target_test)
    y_pred_test = predict_rf(rfmodel,reshaped_tensor_test)

    evaluate_log(reshaped_target_test,y_pred_test,run,verbose=True,is_test=True)
    wandb.finish()   

In [None]:
trainer = DeepModel_Trainer(r'C:\Temp\AgroLuege\raw_data\ZueriCrop\ZueriCrop.hdf5', 'labels.csv', None, 'cpu')
clf = RandomForestClassifier(n_estimators=300,n_jobs=-1,class_weight={0: 1e-10, 1: 1, 2:1 ,3:1 ,4:1 ,5:1})
train_evaluate_rf(clf,trainer,"RandomForest-Baseline-300")

In [None]:
trainer = DeepModel_Trainer(r'C:\Temp\AgroLuege\raw_data\ZueriCrop\ZueriCrop.hdf5', 'labels.csv', None, 'cpu')
clf = RandomForestClassifier(n_estimators=200,n_jobs=-1,class_weight={0: 1e-10, 1: 1, 2:1 ,3:1 ,4:1 ,5:1})
train_evaluate_rf(clf,trainer,"RandomForest-Baseline-200")

In [None]:
trainer = DeepModel_Trainer(r'C:\Temp\AgroLuege\raw_data\ZueriCrop\ZueriCrop.hdf5', 'labels.csv', None, 'cpu')
clf = RandomForestClassifier(n_estimators=150,n_jobs=-1)
train_evaluate_rf(clf,trainer,"RandomForest-Baseline-150-No-Weight")

In [None]:
trainer = DeepModel_Trainer(r'C:\Temp\AgroLuege\raw_data\ZueriCrop\ZueriCrop.hdf5', 'labels.csv', None, 'cpu')
clf = RandomForestClassifier(n_estimators=150,n_jobs=-1,class_weight={0: 1e-10, 1: 1, 2:1 ,3:1 ,4:1 ,5:1})
train_evaluate_rf(clf,trainer,"RandomForest-Baseline-150")

In [None]:
trainer = DeepModel_Trainer(r'C:\Temp\AgroLuege\raw_data\ZueriCrop\ZueriCrop.hdf5', 'labels.csv', None, 'cpu')
clf = RandomForestClassifier(n_estimators=100,n_jobs=-1,class_weight={0: 1e-10, 1: 1, 2:1 ,3:1 ,4:1 ,5:1})
train_evaluate_rf(clf,trainer,"RandomForest-Baseline-100")

In [None]:
trainer = DeepModel_Trainer(r'C:\Temp\AgroLuege\raw_data\ZueriCrop\ZueriCrop.hdf5', 'labels.csv', None, 'cpu')
clf = RandomForestClassifier(n_estimators=50,n_jobs=-1,class_weight={0: 1e-10, 1: 1, 2:1 ,3:1 ,4:1 ,5:1})
train_evaluate_rf(clf,trainer,"RandomForest-Baseline-50")

In [None]:
trainer = DeepModel_Trainer(r'C:\Temp\AgroLuege\raw_data\ZueriCrop\ZueriCrop.hdf5', 'labels.csv', None, 'cpu')
clf = RandomForestClassifier(n_estimators=20,n_jobs=-1,class_weight={0: 1e-10, 1: 1, 2:1 ,3:1 ,4:1 ,5:1})
train_evaluate_rf(clf,trainer,"RandomForest-Baseline-20")

In [None]:
trainer = DeepModel_Trainer(r'C:\Temp\AgroLuege\raw_data\ZueriCrop\ZueriCrop.hdf5', 'labels.csv', None, 'cpu')
clf = RandomForestClassifier(n_estimators=10,n_jobs=-1,class_weight={0: 1e-10, 1: 1, 2:1 ,3:1 ,4:1 ,5:1})
train_evaluate_rf(clf,trainer,"RandomForest-Baseline-10")