In [1]:
import pandas as pd
import os
import getpass

export_username = "ts"  # Only save tables to dropbox on my machine

import pandas as pd


def create_comparison_table(dataframes, estimator_names, metrics=['RMSE', 'MAE', 'rMAE'], decimal_places=3):
    def calc_stats(df, metrics):
        stats = {}
        for metric in metrics:
            stats[f'{metric}_min'] = df[metric].min()
            stats[f'{metric}_mean'] = df[metric].mean()
            stats[f'{metric}_median'] = df[metric].median()
            stats[f'{metric}_max'] = df[metric].max()
        
        time_metrics = ['fit_time', 'predict_time', 'optimize_time']
        for time_metric in time_metrics:
            stats[f'{time_metric}_mean'] = df[time_metric].mean()
        
        return pd.Series(stats)

    all_stats = {}
    for df, name in zip(dataframes, estimator_names):
        all_stats[name] = calc_stats(df, metrics)

    comparison_table = pd.DataFrame(all_stats).T

    # Create multi-level columns
    column_tuples = [(metric, stat) for metric in metrics for stat in ['min', 'mean', 'median', 'max']] + \
                    [(metric, 'mean') for metric in ['fit time', 'predict time', 'optimize time']]
    comparison_table.columns = pd.MultiIndex.from_tuples(column_tuples)

    # Reorder columns
    new_order = metrics + ['fit time', 'predict time', 'optimize time']
    comparison_table = comparison_table.reindex(columns=new_order, level=0)

    return comparison_table.round(decimal_places)


def create_latex_table_content(df, caption, label):
    table_content = []
    table_content.append(r'\begin{table}[H]')
    table_content.append(r'\centering')
    table_content.append(f'\\caption{{{caption}}}')
    table_content.append(f'\\label{{{label}}}')
    table_content.append(r'\resizebox{\textwidth}{!}{')
    
    num_cols = len(df.columns) + 1
    table_format = '|l|' + 'c|' * (num_cols - 1)
    table_content.append(f'\\begin{{tabular}}{{{table_format}}}')
    table_content.append(r'\hline')
    
    # Create main headers
    main_metrics = df.columns.get_level_values(0).unique()
    main_headers = ['Estimator'] + [f'\\multicolumn{{4}}{{c|}}{{{metric}}}' for metric in ['RMSE', 'MAE', 'rMAE']] + [f'\\multicolumn{{3}}{{c|}}{{{'Mean Execution Time (s)'}}}']
    table_content.append(' & '.join(main_headers) + r' \\')
    
    # Add horizontal line between metric names and subcolumn names
    table_content.append(r'\cline{2-' + str(num_cols) + '}')
    
    # Create sub-headers
    sub_headers = ['']
    for metric in main_metrics:
        if metric in ['RMSE', 'MAE', 'rMAE']:
            sub_headers.extend(['min', 'mean', 'median', 'max'])
        # else:
        #     sub_headers.append('mean')
    sub_headers.extend([r'\texttt{fit()}', r'\texttt{predict()}', r'\texttt{optimize()}'])
    table_content.append(' & '.join(sub_headers) + r' \\')
    
    table_content.append(r'\hline')
    
    # Find minimum values for each column
    min_values = df.min()

    for i, (index, row) in enumerate(df.iterrows()):
        row_content = [f"{index}"]
        for (col, subcol), value in row.items():
            if pd.isna(value):
                cell_content = ''
            elif isinstance(value, (int, float)):
                if value == 0:
                    cell_content = '0'
                elif value.is_integer():
                    cell_content = f"\\num{{{int(value):,}}}"
                elif value < 1 and value > 0:
                    cell_content = f"\\num{{{value:.3f}}}"
                else:
                    cell_content = f"\\num{{{value:.3f}}}"
                
                # Highlight minimum value
                if value == min_values[(col, subcol)]  and not df[col][subcol].eq(value).all():
                    cell_content = f"\\cellcolor{{gray!25}}\\textbf{{{cell_content}}}"
            else:
                cell_content = f"{value}"
            row_content.append(cell_content)
        table_content.append(' & '.join(row_content) + r' \\')
        
        # Add horizontal line after each estimator
        table_content.append(r'\hline')
    
    table_content.append(r'\end{tabular}')
    table_content.append(r'}')
    table_content.append(r'\end{table}')
    
    return '\n'.join(table_content)



def save_latex_table_content(content, filename):
    """
    Save LaTeX table content to a file, but only if on the specified machine.
    
    Args:
    content (str): LaTeX table content to save
    filename (str): Name of the file to save (without extension)
    
    Returns:
    None
    """
    username = getpass.getuser()
    if username == export_username:
        filepath = "/Users/ts/Library/CloudStorage/Dropbox/Apps/Overleaf/Dissertation Oxford/Tables"
        full_filename = os.path.join(filepath, filename + ".tex")
        
        with open(full_filename, 'w') as file:
            file.write(content)
        
        print(f"Table content saved to {full_filename}")
    else:
        print("Table content not saved (not on the specified machine)")

## Comparison of 64 and 32 bit float precision for MC-NNM 56

In [2]:
MCNNM_56_64 = pd.read_parquet('../results/MCNNM_56_results.parquet')
MCNNM_56_32 = pd.read_parquet('../results/MCNNM-56_32_results.parquet')
MCNNM_56_32.shape, MCNNM_56_64.shape

((2006, 11), (363, 11))

In [3]:
# subset the first 363 rows of the 32 bit precision results (this is where I stopped the training for 64 bit)
MCNNM_56_32_trunc = MCNNM_56_32.iloc[:363]
MCNNM_56_32_trunc.shape, MCNNM_56_64.shape

((363, 11), (363, 11))

In [4]:
dataframes_32_64 = [MCNNM_56_32_trunc, MCNNM_56_64]
estimator_names_32_64 = ['32-bit', '64-bit']
table_32_64 = create_comparison_table(dataframes_32_64, estimator_names_32_64)
print(table_32_64)

          RMSE                             MAE                           rMAE  \
           min    mean  median      max    min    mean  median     max    min   
32-bit  14.245  33.065  30.275  105.003  8.242  18.739  17.650  45.308  0.447   
64-bit  13.322  33.033  30.578  105.003  7.547  18.721  17.585  46.387  0.435   

                            fit time predict time optimize time  
         mean median    max     mean         mean          mean  
32-bit  1.750  1.545  4.957      0.0        5.031         2.245  
64-bit  1.754  1.565  4.957      0.0        5.041        15.646  


We can see that the 64-bit precision model has slightly better performance than the 32-bit model, with lower RMSE and MAE values. The difference is relatively small, but it is consistent across all metrics. The prediction time is also slightly longer for the 64-bit model, which severely compounds during the optimization phase. This is expected, as the 64-bit model has twice the memory requirements and will be slower to compute.

In [6]:
# Create your comparison table
dataframes = [MCNNM_56_32_trunc, MCNNM_56_64]
estimator_names = [r'\hyperref[est:MC-NNM]{MC-NNM} (32)', r'\hyperref[est:MC-NNM]{MC-NNM} (64)']
table_32_64 = create_comparison_table(dataframes, estimator_names, decimal_places=3)

# Generate the LaTeX content
latex_32_64 = create_latex_table_content(
    table_32_64,
    caption="Comparison of 32-bit and 64-bit MC-NNM Estimators On The First Year of Data",
    label="tab:32-64"
)

# Save the LaTeX content
save_latex_table_content(latex_32_64, "mcnnm_32_64_comparison")


Table content saved to /Users/ts/Library/CloudStorage/Dropbox/Apps/Overleaf/Dissertation Oxford/Tables/mcnnm_32_64_comparison.tex


## Results for 56 day sliding window

In [7]:
MCNNM_56 = pd.read_parquet('../results/MCNNM-56_32_results.parquet')
MCNNM_TSR_56 = pd.read_parquet('../results/MCNNM-TSR-56_results.parquet')
Elasticnet_56 = pd.read_parquet('../results/ElasticNet-56_results.parquet')
LASSO_56 = pd.read_parquet('../results/LASSO-56_results.parquet')
LEAR_56 = pd.read_parquet('../results/LEAR-56_results.parquet')
LEAR_Panel_56 = pd.read_parquet('../results/LEAR-Panel-56_results.parquet')
# check all have same shape
MCNNM_56.shape, MCNNM_TSR_56.shape, Elasticnet_56.shape, LASSO_56.shape, LEAR_56.shape, LEAR_Panel_56.shape

((2006, 11), (2006, 11), (2006, 11), (2006, 12), (2006, 12), (2006, 12))

In [10]:
dataframes_56 = [MCNNM_56, MCNNM_TSR_56, Elasticnet_56, LASSO_56, LEAR_56, LEAR_Panel_56]
estimator_names_56 = [r'\hyperref[est:MC-NNM]{MC-NNM}', r'\hyperref[est:MC-NNM-TSR]{MC-NNM-TSR}', r'\hyperref[est:EN]{ElasticNet}', r'\hyperref[est:LASSO]{LASSO}', r'\hyperref[est:LEAR]{LEAR}', r'\hyperref[est:LEAR-Panel]{LEAR-Panel}']
table_56 = create_comparison_table(dataframes_56, estimator_names_56)
latex_56 = create_latex_table_content(
    table_56,
    caption="Comparison of Estimators on 56 Day Sliding Window",
    label="tab:56"
)
save_latex_table_content(latex_56, "56_day_comparison")

Table content saved to /Users/ts/Library/CloudStorage/Dropbox/Apps/Overleaf/Dissertation Oxford/Tables/56_day_comparison.tex


## Results for 84 day sliding window

In [11]:
MCNNM_84 = pd.read_parquet('../results/MCNNM-84_32_results.parquet')
MCNNM_TSR_84 = pd.read_parquet('../results/MCNNM-TSR-84_results.parquet')
Elasticnet_84 = pd.read_parquet('../results/ElasticNet-84_results.parquet')
LASSO_84 = pd.read_parquet('../results/LASSO-84_results.parquet')
LEAR_84 = pd.read_parquet('../results/LEAR-84_results.parquet')
LEAR_Panel_84 = pd.read_parquet('../results/LEAR-Panel-84_results.parquet')
# check all have same shape
MCNNM_84.shape, MCNNM_TSR_84.shape, Elasticnet_84.shape, LASSO_84.shape, LEAR_84.shape, LEAR_Panel_84.shape

((2006, 11), (2006, 11), (2006, 11), (2006, 12), (2006, 12), (2006, 12))

In [12]:
dataframes_84 = [MCNNM_84, MCNNM_TSR_84, Elasticnet_84, LASSO_84, LEAR_84, LEAR_Panel_84]
estimator_names_84 = [r'\hyperref[est:MC-NNM]{MC-NNM}', r'\hyperref[est:MC-NNM-TSR]{MC-NNM-TSR}', r'\hyperref[est:EN]{ElasticNet}', r'\hyperref[est:LASSO]{LASSO}', r'\hyperref[est:LEAR]{LEAR}', r'\hyperref[est:LEAR-Panel]{LEAR-Panel}']
table_84 = create_comparison_table(dataframes_84, estimator_names_84)
latex_84 = create_latex_table_content(
    table_84,
    caption="Comparison of Estimators on 84 Day Sliding Window",
    label="tab:84"
)
save_latex_table_content(latex_84, "84_day_comparison")

Table content saved to /Users/ts/Library/CloudStorage/Dropbox/Apps/Overleaf/Dissertation Oxford/Tables/84_day_comparison.tex


In [13]:
MCNNM_112 = pd.read_parquet('../results/MCNNM-112_32_results.parquet')
MCNNM_TSR_112 = pd.read_parquet('../results/MCNNM-TSR-112_results.parquet')
Elasticnet_112 = pd.read_parquet('../results/ElasticNet-112_results.parquet')
LASSO_112 = pd.read_parquet('../results/LASSO-112_results.parquet')
LEAR_112 = pd.read_parquet('../results/LEAR-112_results.parquet')
LEAR_Panel_112 = pd.read_parquet('../results/LEAR-Panel-112_results.parquet')
# check all have same shape
MCNNM_112.shape, MCNNM_TSR_112.shape, Elasticnet_112.shape, LASSO_112.shape, LEAR_112.shape, LEAR_Panel_112.shape

FileNotFoundError: [Errno 2] No such file or directory: '../results/MCNNM-TSR-112_results.parquet'

In [None]:
dataframes_112 = [MCNNM_112, MCNNM_TSR_112, Elasticnet_112, LASSO_112, LEAR_112, LEAR_Panel_112]
estimator_names_112 = [r'\hyperref[est:MC-NNM]{MC-NNM}', r'\hyperref[est:MC-NNM-TSR]{MC-NNM-TSR}', r'\hyperref[est:EN]{ElasticNet}', r'\hyperref[est:LASSO]{LASSO}', r'\hyperref[est:LEAR]{LEAR}', r'\hyperref[est:LEAR-Panel]{LEAR-Panel}']
table_112 = create_comparison_table(dataframes_112, estimator_names_112)
latex_112 = create_latex_table_content(
    table_112,
    caption="Comparison of Estimators on 112 Day Sliding Window",
    label="tab:112"
)
save_latex_table_content(latex_112, "112_day_comparison")