In [42]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob
from pathlib import Path
import os
from datetime import datetime
import re
import math

In [22]:
def best_images():
    dirpath="/s/bach/b/class/cs535/cs535a/data/eval_results"
    savedirpath="/s/chopin/n/under/voleskyj/cs535_project/analysis/best_images"
    models = ["CNN","LSTM","LSTM_bidirectional"]
    insteps = ["1Day","4Day","8Day"]
    
    for model in models:
        for instep in insteps:
            csv_files = glob.glob(f"{dirpath}/{model}/{instep}/*.csv")

            df_list = []
            for file in csv_files:
                temp_df = pd.read_csv(file)
                df_list.append(temp_df)

            df = pd.concat(df_list, ignore_index=True)
            df = df.nsmallest(10, 'Real_NDVI_RMSE')

            i = 1
            for index,image in df.iterrows():
                step = int(image['Step'])
                sample_num = int(image['GlobalSampleID'])
                rmse = image['Real_NDVI_RMSE']

                true_path=f"{dirpath}/{model}/{instep}/{model}_sample{sample_num:05d}_day{step}_true_{instep.lower()}.png"
                pred_path=f"{dirpath}/{model}/{instep}/{model}_sample{sample_num:05d}_day{step}_pred_{instep.lower()}.png"

                true_img = mpimg.imread(true_path)
                pred_img = mpimg.imread(pred_path)

                fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
                plt.subplots_adjust(wspace=0.3, top=0.85, bottom=0.15)

                ax1.imshow(true_img)
                ax1.axis('off')
                ax1.set_title('Truth', fontsize=8)

                ax2.imshow(pred_img)
                ax2.axis('off')
                ax2.set_title('Prediction', fontsize=8)

                fig.suptitle(f"{model}-{instep} day{step} RMSE: {rmse:.4f}", 
                            fontsize=10, y=0.05, va='bottom')

                plt.savefig(f"{savedirpath}/{model}_{instep}/{model}_{instep}_{i}best.png")
                plt.close()
                i+=1

best_images()

In [None]:
def make_histograms(model,instep):
    csv_files = glob.glob(f"/s/bach/b/class/cs535/cs535a/data/eval_results/{model}/{instep}/*.csv")  # Replace with your actual file path

    df_list = []
    for file in csv_files:
        temp_df = pd.read_csv(file)
        df_list.append(temp_df)

    df = pd.concat(df_list, ignore_index=True)

    for i in range(9):
        if i > 0:
            step_df = df[df['Step'] == i]
            title = f"Histogram of {model}-{instep} RMSE for Step {i}"
            save_file = f"histograms/{model}_{instep}/{model}_{instep}_step{i}.png"
        else:
            step_df = df
            title = f"Histogram of {model}-{instep} RMSE (Total)"
            save_file = f"histograms/{model}_{instep}/{model}_{instep}_total.png"

        plt.figure(figsize=(10, 6))
        plt.hist(step_df['Real_NDVI_RMSE'], bins=20, edgecolor='black')
        plt.title(title)
        plt.xlabel('RMSE')
        plt.ylabel('Frequency')
        plt.grid(True, alpha=0.3)
        
        avg_value=step_df['Real_NDVI_RMSE'].mean()
        
        plt.axvline(avg_value, color='red', linestyle='--', linewidth=2, 
            label=f'Mean: {avg_value:.4f}')
        
        plt.legend()

        plt.savefig(save_file)
        plt.close()

make_histograms("CNN", "8Day")

In [30]:
def get_average_rmse():
    models = ["CNN","LSTM","LSTM_bidirectional"]
    insteps = ["1Day","4Day","8Day"]
    csv = "model,instep,step,avg_rmse\n" # step==0 is average rmse across all steps
    for model in models:
        for instep in insteps:
            for step in range(9):
                csv_files = glob.glob(f"/s/bach/b/class/cs535/cs535a/data/eval_results/{model}/{instep}/*.csv")  # Replace with your actual file path

                df_list = []
                for file in csv_files:
                    temp_df = pd.read_csv(file)
                    if step > 0:
                        df_list.append(temp_df[temp_df['Step'] == step])
                    else:
                        df_list.append(temp_df)

                df = pd.concat(df_list, ignore_index=True)

                csv += f"{model},{instep},{step},{df['Real_NDVI_RMSE'].mean()}\n"
                
    with open('/s/chopin/n/under/voleskyj/cs535_project/analysis/average_rmse.csv', 'w') as f:
        f.write(csv)

get_average_rmse()

In [47]:
def graph_training_loss():
    dirpath = "/s/bach/b/class/cs535/cs535a/data/new_models/"
    models = ["CNN", "LSTM", "bidirectional_LSTM"]
    insteps = ["1Day", "4Day", "8Day"]
    
    plt.figure(figsize=(12, 8))
    colors = plt.cm.tab10.colors
    
    markers=['o','x','+','^']
    marker_index=0
    
    for model_idx, model in enumerate(models):
        for instep_idx, instep in enumerate(insteps):
            with open(f"{dirpath}/{instep.lower()}_{model}_model_log.txt", 'r') as f:
                log_lines = f.readlines()

            rmse_values = []
            timestamps = []

            for line in log_lines:
                mse_match = re.search(r'MSE (\d+\.\d+)', line)
                if mse_match:
                    rmse_values.append(math.sqrt(float(mse_match.group(1))))

                time_match = re.search(r'at (.+)$', line)
                if time_match:
                    timestamps.append(datetime.strptime(time_match.group(1), '%Y-%m-%d %H:%M:%S.%f'))

            df = pd.DataFrame({'timestamp': timestamps, 'rmse': rmse_values})
            df = df.sort_values('timestamp')

            # Plot with unique label and color
            label = f"{model}-{instep}"
            color = colors[model_idx * len(insteps) + instep_idx]
            plt.plot(df['rmse'], 
                     marker=markers[marker_index%len(markers)], 
                     linestyle='-',
                     markersize=4,
                     label=label,
                     color=color)
            marker_index+=1
    
    plt.title('Training RMSE Comparison Across Models', fontsize=14)
    plt.xlabel('Training Step', fontsize=12)
    plt.ylabel('RMSE', fontsize=12)
    plt.grid(True, alpha=0.3)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.savefig('/s/chopin/n/under/voleskyj/cs535_project/analysis/training_loss.png')
    plt.close()
    
graph_training_loss()