In [None]:
# Plot graphs for each individual behaviors 
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import glob
import re
from matplotlib.backends.backend_pdf import PdfPages

model_name = 'Llama2'
dataset_name = 'AdvBench'
# Specify the path to your CSV files
file_paths = glob.glob(f"./{model_name}/{dataset_name}/output_EGD_*200_steps).csv")
# Set file's name to save plots
save_title = "Undefined"
if str(file_paths[0]).__contains__("EGD"):
    save_title = "Exponentiated Gradient Descent"
elif str(file_paths[0]).__contains__("PGD"):
    save_title = "Projected Gradient Descent"

# Initialize dictionaries to store the data
epoch_data = {}
continuous_loss_data = {}
discrete_loss_data = {}

# Regular expression to find the substring between `)_` and `.csv`
pattern = r'\)_([^\.]+)\.csv'

pdf_title = f'./{model_name}/{dataset_name}/{save_title} (Individual Behaviors).pdf'
# Initialize the PdfPages object
with PdfPages(pdf_title) as pdf:
    # Loop through each file and extract the necessary data
    for file_path in file_paths:
        df = pd.read_csv(file_path)
        epochs = df['epoch']
        continuous_loss = df['continuous_loss'].fillna(0) # fillna is used to handle some occurences of NaN value
        discrete_loss = df['discrete_loss'].fillna(0) # fillna is used to handle some occurences of NaN value
        # Extract label from the file_name
        # Search for the pattern in the input string
        match = re.search(pattern, file_path)
        label = file_path
        # Extract the matched substring if the pattern is found
        if match:
            label = match.group(1)

        for epoch, closs, dloss in zip(epochs, continuous_loss, discrete_loss):
        # for epoch, closs in zip(epochs, continuous_loss):
            if epoch not in epoch_data:
                epoch_data[epoch] = []
                continuous_loss_data[epoch] = []
                discrete_loss_data[epoch] = []
            epoch_data[epoch].append(epoch)
            if math.isnan(closs):
                break
            continuous_loss_data[epoch].append(closs)
            discrete_loss_data[epoch].append(dloss)

        # Plot the results
        plt.figure(figsize=(6, 4))
        plt.plot(epochs, continuous_loss, label='Continuous Loss', color='blue')
        # plt.plot(epochs, discrete_loss, label='Discrete Loss', color='red')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title(label[label.find('('):-4])
        plt.legend()
        plt.grid(True)

        # Save the current plot to the PDF file
        pdf.savefig()
        plt.show()
        plt.close()  # Close the current figure to release memory

# Calculate the mean losses for each epoch
epochs = sorted(epoch_data.keys())
median_continuous_loss = [np.median(continuous_loss_data[epoch]) for epoch in epochs]
median_discrete_loss = [np.median(discrete_loss_data[epoch]) for epoch in epochs]

print("Continuous Loss:", median_continuous_loss)
print("max:", np.max(median_continuous_loss), end=", ")
print("min:", np.min(median_continuous_loss))
print("Discrete Loss:", median_discrete_loss)
print("max:", np.max(median_discrete_loss), end=", ")
print("min:", np.min(median_discrete_loss))


In [None]:
pdf_title = f'./{model_name}/{dataset_name}/{save_title} (Median).pdf'
# Plot the results
plt.figure(figsize=(6, 4))
plt.plot(epochs, median_continuous_loss, label='Median Loss', color='blue')
# plt.plot(epochs, median_discrete_loss, label='Median Discrete Loss', color='red')
plt.xlabel('No. of Epoch')
plt.ylabel('Cross-Entropy Loss')
plt.title(label=f'Model: {model_name}, Dataset: {dataset_name}')
plt.legend()
plt.grid(True)
plt.savefig(pdf_title, format="pdf", bbox_inches="tight")
plt.show()

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Set font properties for IEEE style
plt.rcParams.update({
    'font.family': 'serif',          # Use serif font
    'axes.labelsize': 12,           # Axis label font size
    'xtick.labelsize': 12,           # X-axis tick label font size
    'ytick.labelsize': 12,           # Y-axis tick label font size
    'legend.fontsize': 12,           # Legend font size
    'figure.titlesize': 12,         # Figure title font size (optional)
    'text.usetex': False            # Set to True if using LaTeX for text rendering
})
# Directory containing the CSV files (current directory)
directory = f'./{model_name}/{dataset_name}/'

# Columns to read
columns_to_read = ['epoch'] + [f'max_{i}' for i in range(1, 21)]

# List to hold dataframes from each file
dataframes = []

# Read each file and extract necessary columns
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path, usecols=columns_to_read)
        dataframes.append(df)

# Concatenate all dataframes
all_data = pd.concat(dataframes)

# Group by 'epoch' and calculate the mean for each max column across all files
mean_values = all_data.groupby('epoch').mean()

# Plot each max column as a separate curve
plt.figure(figsize=(12, 8))
for i in range(1, 21):
    plt.plot(mean_values.index, mean_values[f'max_{i}'], marker='.', label=f'max_{i}')

pdf_title = f'{directory}/{save_title} (Mean Max Values).pdf'

# Adding labels and title
plt.xlabel("Epoch")
plt.ylabel("Mean of max values")
plt.title("Mean of max_1 to max_20 Across Epochs")
# plt.legend(title="Max Columns")
plt.grid(True)
plt.savefig(pdf_title, format="pdf", bbox_inches="tight")
plt.show()