In [2]:
import pandas as pd
import re

# Lendo o arquivo
with open("svr_results.txt", "r") as f:
    linhas = f.readlines()

# Lista para armazenar os dados
dados = []

# # Expressão regular para extrair os valores
# padrao = r"C=(\S+), epsilon=(\S+), gamma=(\S+), kernel=(\S+);, score=(-?\d+\.\d+) total time=\s+(\d+\.\d+)s"

# for linha in linhas:
#     match = re.search(padrao, linha)
#     if match:
#         dados.append(match.groups())
#     else:
#         print(f"Erro: {linha}")

# Expressão regular corrigida para capturar corretamente o tempo, independentemente do formato
padrao = r"C=(\S+), epsilon=(\S+), gamma=(\S+), kernel=(\S+);, score=(-?\d+\.\d+) total time=\s*([\d.]+)(s|min)"

for linha in linhas:
    match = re.search(padrao, linha)
    if match:
        C, epsilon, gamma, kernel, score, tempo, unidade = match.groups()
        tempo = float(tempo) * 60 if unidade == "min" else float(tempo)  # Convertendo minutos para segundos
        dados.append((float(C), float(epsilon), gamma, kernel, float(score), tempo))
    else:
        print(f"Erro: {linha}")


# Criando DataFrame
df = pd.DataFrame(dados, columns=["C", "epsilon", "gamma", "kernel", "score", "time"])

# Convertendo os tipos numéricos
df[["C", "epsilon", "score", "time"]] = df[["C", "epsilon", "score", "time"]].astype(float)

print(df)

        C  epsilon  gamma kernel  score  time
0    10.0    0.100  scale    rbf -0.070   1.5
1    10.0    0.100  scale    rbf -0.058   4.0
2     0.1    0.500  scale   poly -0.358   0.0
3     0.1    0.500  scale   poly -0.355   0.0
4     0.1    0.500  scale   poly -0.366   0.0
..    ...      ...    ...    ...    ...   ...
186   0.1    1.000   auto   poly -0.363   0.0
187   0.1    0.001   auto    rbf -0.035  13.2
188   0.1    0.001   auto    rbf -0.037  30.4
189   0.1    0.001   auto    rbf -0.044  50.5
190   0.1    0.001   auto    rbf -0.033  78.0

[191 rows x 6 columns]


In [3]:
# groupby com mean para score e time, ordenando por maior score
df_grouped = (
    df.groupby(["C", "epsilon", "gamma", "kernel"])
    .mean()
    .sort_values(by="score", ascending=False)
)

df_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,score,time
C,epsilon,gamma,kernel,Unnamed: 4_level_1,Unnamed: 5_level_1
10.0,0.001,auto,rbf,-0.035,74.6
0.1,0.001,auto,rbf,-0.03725,43.025
1.0,0.001,scale,rbf,-0.0376,200.74
10.0,0.01,auto,poly,-0.0418,20.54
1.0,0.001,scale,poly,-0.0426,2715.8
1.0,0.01,scale,poly,-0.0448,545.98
0.1,0.01,auto,sigmoid,-0.045,35.92
10.0,0.1,auto,rbf,-0.0522,5.12
10.0,0.1,scale,rbf,-0.06,12.98
0.1,0.1,scale,poly,-0.06,6.2


In [5]:
df_grouped_2 = (
    df.groupby(["C", "epsilon", "gamma", "kernel"])
    .agg(score_mean=("score", "mean"), score_count=("score", "count"), time_mean=("time", "mean"))
    .sort_values(by="score_mean", ascending=False)
)

df_grouped_2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,score_mean,score_count,time_mean
C,epsilon,gamma,kernel,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10.0,0.001,auto,rbf,-0.035,5,74.6
0.1,0.001,auto,rbf,-0.03725,4,43.025
1.0,0.001,scale,rbf,-0.0376,5,200.74
10.0,0.01,auto,poly,-0.0418,5,20.54
1.0,0.001,scale,poly,-0.0426,5,2715.8
1.0,0.01,scale,poly,-0.0448,5,545.98
0.1,0.01,auto,sigmoid,-0.045,5,35.92
10.0,0.1,auto,rbf,-0.0522,5,5.12
10.0,0.1,scale,rbf,-0.06,5,12.98
0.1,0.1,scale,poly,-0.06,5,6.2
