In [1]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import os

mpl.rcParams['figure.max_open_warning'] = 10


def get_dirs():
    dirs = os.listdir("in")
    page_dirs = []
    proc_dirs = []
    for dir in dirs:
        if dir.split("-")[1] == "pages":
            page_dirs.append(dir)
        elif dir.split("-")[1] == "processes":
            proc_dirs.append(dir)
    return page_dirs, proc_dirs


page_dirs, proc_dirs = get_dirs()

# read in the process data from files
proc_df = pd.concat(
    (
        pd.read_csv(
            f"{'in' if alg_dir == 'input' else 'out'}/{proc_dir}/{max_arrive_time_dir}/{max_execution_time_dir}/{'' if alg_dir == 'input' else alg_dir + '/'}process.csv")
        .assign(
            alg=alg_dir,
            processes=int(proc_dir.split('-')[0]),
            maxArriveTime=int(max_arrive_time_dir.split('-')[0]),
            maxExecutionTime=int(max_execution_time_dir.split('-')[0])
        )
        for proc_dir in proc_dirs
        for max_arrive_time_dir in os.listdir(f"in/{proc_dir}")
        for max_execution_time_dir in os.listdir(f"in/{proc_dir}/{max_arrive_time_dir}")
        for alg_dir in ['input'] + os.listdir(f"out/{proc_dir}/{max_arrive_time_dir}/{max_execution_time_dir}")
    ),
    ignore_index=True
)


# function to parse the list of ints from the string format "[%d %d...]"
def parse_list_column(column):
    return column.str.strip('[]').str.split(' ').apply(lambda x: list(map(int, x)) if x != [''] else [])


def get_times_used(df):
    series = pd.Series(df.loc[0, 'referencePattern']).value_counts(sort=False)
    return df['id'].apply(lambda x: series.get(x, default=0))


# read in the page data from files
page_df = pd.concat(
    (
        pd.read_csv(f"out/{page_dir}/{ref_dir}/{alg_dir}/page.csv")
        .assign(
            alg=alg_dir,
            pages=int(page_dir.split("-")[0])
        )
        .pipe(lambda df: df.assign(
            pageFaultAt=parse_list_column(df['pageFaultAt']),
            swappedOutAt=parse_list_column(df['swappedOutAt']),
            referencePattern=[pd.read_csv(f"in/{page_dir}/{ref_dir}/pageReferencePattern.csv",
                                          header=None).squeeze().to_list()] * len(df),
            pageFaults=df['pageFaultAt'].apply(len),
        )
              )
        .pipe(lambda df: df.assign(
            referencePatternLen=df['referencePattern'].apply(len),
            timesUsed=get_times_used(df))
              )
        for page_dir in page_dirs
        for ref_dir in os.listdir(f"in/{page_dir}")
        for alg_dir in os.listdir(f"out/{page_dir}/{ref_dir}")
    ),
    ignore_index=True
)



In [2]:
input_data = proc_df[proc_df['alg'] == 'input']

# create plots for all the different datasets
for (processes, maxArriveTime, maxExecutionTime), group in input_data.groupby(
        ['processes', 'maxArriveTime', 'maxExecutionTime']):
    fig = plt.figure(figsize=(24, 24))  # Increased figure height
    gs = fig.add_gridspec(3, 2, height_ratios=[1, 1.5, 1])  # Added 3rd row

    # arrive time histogram
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.hist(group['arriveTime'], bins=np.arange(0, maxArriveTime + 2), color='blue', alpha=0.7, align='left')
    ax1.set_title(
        f'Arrive Time Histogram\nProcesses: {processes}, Max Arrive Time: {maxArriveTime}, Max Execution Time: {maxExecutionTime}')

    # execution time histogram
    ax2 = fig.add_subplot(gs[0, 1])
    ax2.hist(group['executionTime'], bins=np.arange(0, maxExecutionTime + 2), color='green', alpha=0.7, align='left')
    ax2.set_title(
        f'Execution Time Histogram\nProcesses: {processes}, Max Arrive Time: {maxArriveTime}, Max Execution Time: {maxExecutionTime}')

    # plot out the average wait time for each algorithm
    filtered_group = proc_df[(proc_df['alg'] != 'input') &
                             (proc_df['processes'] == processes) &
                             (proc_df['maxArriveTime'] == maxArriveTime) &
                             (proc_df['maxExecutionTime'] == maxExecutionTime)]
    avg_wait_time = filtered_group.groupby('alg')['waitTime'].mean().reset_index()

    ax3 = fig.add_subplot(gs[1, :])
    colors = mpl.colormaps['plasma'](np.linspace(0, 1, len(avg_wait_time)))
    ax3.bar(avg_wait_time['alg'], avg_wait_time['waitTime'], color=colors, alpha=0.7)
    ax3.set_title(
        f'Average Wait Time for Each Algorithm\nProcesses: {processes}, Max Arrive Time: {maxArriveTime}, Max Execution Time: {maxExecutionTime}')
    ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)

    # plot out the wait time as a function of arrive time for LCFS and PreemptiveLCFS
    ax4 = fig.add_subplot(gs[2, 0])
    lcfs_group = filtered_group[filtered_group['alg'].isin(['LCFS', 'PreemptiveLCFS'])]
    for idx, alg in enumerate(['LCFS', 'PreemptiveLCFS']):
        subset = lcfs_group[lcfs_group['alg'] == alg]
        ax4.scatter(subset['arriveTime'], subset['waitTime'], color=colors[idx], label=alg, alpha=0.7)
    ax4.set_title('Wait Time vs Arrive Time (LCFS Variants)')
    ax4.set_xlabel('Arrive Time')
    ax4.set_ylabel('Wait Time')
    ax4.legend()

    # plot out the wait time as a function of arrive time for SJF and PreemptiveSJF
    ax5 = fig.add_subplot(gs[2, 1])
    sjf_group = filtered_group[filtered_group['alg'].isin(['SJF', 'PreemptiveSJF'])]
    for idx, alg in enumerate(['SJF', 'PreemptiveSJF']):
        subset = sjf_group[sjf_group['alg'] == alg]
        ax5.scatter(subset['executionTime'], subset['waitTime'], color=colors[idx + 2], label=alg, alpha=0.7)
    ax5.set_title('Wait Time vs Execution Time (SJF Variants)')
    ax5.set_xlabel('Execution Time')
    ax5.set_ylabel('Wait Time')
    ax5.legend()

    # save plot to file
    save_dir = (
        f"out/{processes}-processes/"
        f"{maxArriveTime}-max-arrive-time/"
        f"{maxExecutionTime}-max-execution-time"
    )
    plt.savefig(f"{save_dir}/plot.png", bbox_inches='tight')
    plt.close()


  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_time['alg'], rotation=45)
  ax3.set_xticklabels(avg_wait_

In [3]:
# make plots for all the unique datasets
for (pages, referencePatternLen), group in page_df.groupby(['pages', 'referencePatternLen']):
    fig = plt.figure(figsize=(24, 24))  # Increased height
    gs = fig.add_gridspec(2, 2, height_ratios=[1, 1])  # 2 rows

    # plot out how the reference pattern histogram
    ax1 = fig.add_subplot(gs[0, :])
    reference_pattern = group.iloc[0]['referencePattern']
    ax1.hist(
        reference_pattern,
        bins=np.arange(0, max(reference_pattern) + 2),
        color='blue', alpha=0.7, align='left'
    )
    ax1.set_title(f'Reference Pattern Histogram\nPages: {pages}, Length: {referencePatternLen}')
    ax1.set_xlabel('Page Number')
    ax1.set_ylabel('Frequency')

    # plot the average amount of page faults by algorithm
    ax2 = fig.add_subplot(gs[1, 0])
    page_faults = group.groupby('alg')['pageFaults']
    colors = plt.colormaps['plasma'](np.linspace(0, 1, len(page_faults)))

    ax2.bar(
        page_faults.mean().index,
        page_faults.mean().values,
        color=colors,
        alpha=0.7,
        yerr=page_faults.std()
    )
    ax2.set_title(f'Avg Page Faults by Algorithm\nPages: {pages}')
    ax2.set_xticklabels(page_faults.mean().index, rotation=45)
    ax2.set_ylabel('Average Page Faults')

    # plot out the relationship between page use frequency and the amount of page faults in relation to algorithms
    ax3 = fig.add_subplot(gs[1, 1])
    algorithms = page_faults.mean().index.tolist()

    for idx, alg in enumerate(algorithms):
        alg_data = group[group['alg'] == alg]
        ax3.scatter(
            alg_data['timesUsed'],
            alg_data['pageFaults'],
            color=colors[idx],
            label=alg,
            alpha=0.7
        )
    ax3.set_title(f'Page Faults vs Times Used\nPages: {pages}')
    ax3.set_xlabel('Times Used')
    ax3.set_ylabel('Page Faults')
    ax3.legend()

    # save plot to file
    save_dir = f"out/{pages}-pages/{referencePatternLen}-refs"
    plt.savefig(f"{save_dir}/plot.png", bbox_inches='tight')
    plt.close()

  ax2.set_xticklabels(page_faults.mean().index, rotation=45)
  ax2.set_xticklabels(page_faults.mean().index, rotation=45)
  ax2.set_xticklabels(page_faults.mean().index, rotation=45)
  ax2.set_xticklabels(page_faults.mean().index, rotation=45)
  ax2.set_xticklabels(page_faults.mean().index, rotation=45)
  ax2.set_xticklabels(page_faults.mean().index, rotation=45)
  ax2.set_xticklabels(page_faults.mean().index, rotation=45)
  ax2.set_xticklabels(page_faults.mean().index, rotation=45)
  ax2.set_xticklabels(page_faults.mean().index, rotation=45)
