In [1]:
import os 
import re
import numpy as np
import pandas as pd


In [2]:
path = 'output/HAR_outputdata/'

result_list =[]

train_pattern = r'Train Accuracy\s+(\{.*?\})'
test_pattern = r'Test Accuracy\s+(\{.*?\})'

In [3]:
for f in os.listdir(path):

    file_path = os.path.join(path,f)

    with open(file_path, 'r') as file:
        data = file.read()
        
    train_match = re.search(train_pattern, data, re.DOTALL)
    if train_match:
        train_accuracy_str = train_match.group(1)
        train_accuracy = eval(train_accuracy_str) 

    test_match = re.search(test_pattern, data, re.DOTALL)
    if test_match:
        test_accuracy_str = test_match.group(1)
        test_accuracy = eval(test_accuracy_str) 
        
    result_list.append({
            'filename': f,
            'train_accuracy': train_accuracy,
            'test_accuracy': test_accuracy
        })

In [4]:
result_list[0]['train_accuracy']

{'MSE': 9.683530115110246e-09,
 'MAE': 3.476322677276525e-05,
 'RSquared': 0.578089543994176}

In [5]:
df = pd.DataFrame(result_list, columns=['filename', 'train_accuracy', 'test_accuracy'])
df['train_RMSE'] = df['train_accuracy'].apply(lambda x: np.sqrt(x['MSE']) * 1e4)
df['train_MAE'] = df['train_accuracy'].apply(lambda x: x['MAE'] * 1e5)
df['train_R2'] = df['train_accuracy'].apply(lambda x : x['RSquared'])
df['test_RMSE'] = df['test_accuracy'].apply(lambda x:np.sqrt(x['MSE']) * 1e4)
df['test_MAE'] = df['test_accuracy'].apply(lambda x: x['MAE'] * 1e5)
df['test_R2'] = df['test_accuracy'].apply(lambda x : x['RSquared'])

df.drop(['train_accuracy', 'test_accuracy'], axis=1, inplace=True)

In [6]:
df = df.sort_values(by='filename')

In [7]:
df

Unnamed: 0,filename,train_RMSE,train_MAE,train_R2,test_RMSE,test_MAE,test_R2
3,HAR_1_False_False.txt,0.965522,3.217821,0.724364,0.313588,1.744372,0.633443
8,HAR_1_False_True.txt,1.013387,3.166767,0.696358,0.32171,1.614133,0.614211
2,HAR_1_True_False.txt,0.959656,3.141167,0.727703,0.308457,1.709955,0.645342
10,HAR_1_True_True.txt,1.017561,3.147095,0.693852,0.31873,1.598987,0.621324
4,HAR_20_False_False.txt,0.903008,3.670307,0.644721,0.298359,2.362422,0.372274
0,HAR_20_False_True.txt,0.984049,3.476323,0.57809,0.264966,1.622384,0.504921
5,HAR_20_True_False.txt,0.897455,3.655432,0.649077,0.296358,2.336517,0.380663
6,HAR_20_True_True.txt,0.983667,3.474004,0.578417,0.265284,1.624685,0.503734
9,HAR_5_False_False.txt,0.85999,3.084718,0.733057,0.283158,1.829297,0.612958
11,HAR_5_False_True.txt,0.921598,3.068768,0.693441,0.281429,1.576588,0.61767


In [8]:
# Generate LaTeX table
latex_table = df.to_latex(index=False, caption='Table Caption', label='tab:table_label')

# Print LaTeX table
print(latex_table)

\begin{table}
\centering
\caption{Table Caption}
\label{tab:table_label}
\begin{tabular}{lrrrrrr}
\toprule
              filename &  train\_RMSE &  train\_MAE &  train\_R2 &  test\_RMSE &  test\_MAE &  test\_R2 \\
\midrule
 HAR\_1\_False\_False.txt &    0.965522 &   3.217821 &  0.724364 &   0.313588 &  1.744372 & 0.633443 \\
  HAR\_1\_False\_True.txt &    1.013387 &   3.166767 &  0.696358 &   0.321710 &  1.614133 & 0.614211 \\
  HAR\_1\_True\_False.txt &    0.959656 &   3.141167 &  0.727703 &   0.308457 &  1.709955 & 0.645342 \\
   HAR\_1\_True\_True.txt &    1.017561 &   3.147095 &  0.693852 &   0.318730 &  1.598987 & 0.621324 \\
HAR\_20\_False\_False.txt &    0.903008 &   3.670307 &  0.644721 &   0.298359 &  2.362422 & 0.372274 \\
 HAR\_20\_False\_True.txt &    0.984049 &   3.476323 &  0.578090 &   0.264966 &  1.622384 & 0.504921 \\
 HAR\_20\_True\_False.txt &    0.897455 &   3.655432 &  0.649077 &   0.296358 &  2.336517 & 0.380663 \\
  HAR\_20\_True\_True.txt &    0.983667 &   3.474

  latex_table = df.to_latex(index=False, caption='Table Caption', label='tab:table_label')
