In [25]:
import os
import pandas as pd
import duckdb

projects = []

log_dir = "logs/"
for filename in os.listdir(log_dir):
    if filename.endswith("_all_files.csv"):
        task_id = filename.replace("_all_files.csv", "")
        projects.append(task_id)

df_report = pd.DataFrame(projects, columns=['project'])

In [26]:
#df_report = duckdb.query(f"""
#    select r.project, rr.GitHub from df_report r
#        left join repositories.csv rr on r.project = rr.Project
#""").to_df()

In [27]:
df_report = duckdb.query(f"""
    select r.*, ra.R1, ra.R5, ra.R10 from df_report r
        left join reliability_projects.csv ra on r.project = ra.project
""").to_df()

In [28]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-summary.csv"):
        task_id = filename.replace("_logfile-summary.csv", "")
        try:
            df_summary_line = pd.read_csv(f"analysis/{filename}")
            df_summary_line = df_summary_line.set_index('statistic').T
            df_summary_line['project'] = task_id
            df_summary = pd.concat([df_summary, df_summary_line], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, s."number-of-commits", s."number-of-entities", s."number-of-entities-changed", s."number-of-authors" 
        from df_report r
        left join df_summary s on r.project = s.project
""").to_df()

In [29]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-age.csv"):
        task_id = filename.replace("_logfile-age.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                select max("age-months") as "max-age-months", avg("age-months") as "avg-age-months"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."max-age-months", t."avg-age-months" 
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [30]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-coupling.csv"):
        task_id = filename.replace("_logfile-coupling.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                select avg("avg-degree") as "avg2-degree"  from (
                    select entity, avg(degree) as "avg-degree"
                        from "{log_dir}{filename}"
                        group by entity
                )
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            #raise 
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg2-degree" as "avg2-degree-coupling"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [31]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-communication.csv"):
        task_id = filename.replace("_logfile-communication.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                select avg("avg-strength") as "avg2-strength"  from (
                    select author, avg(strength) as "avg-strength"
                        from "{log_dir}{filename}"
                        group by author
                )
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            #raise 
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg2-strength" as "avg2-strength-communication"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [32]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-entity-effort.csv"):
        task_id = filename.replace("_logfile-entity-effort.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                select avg("avg-author-revs") as "avg2-author-revs", avg("avg-total-revs") as "avg2-total-revs"  from (
                    select entity, avg("author-revs") as "avg-author-revs", avg("total-revs") as "avg-total-revs"
                        from "{log_dir}{filename}"
                        group by entity
                )                
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            #raise 
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg2-author-revs" as "avg2-author-revs-effort", t."avg2-total-revs" as "avg2-total-revs-effort"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [33]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-entity-ownership.csv"):
        task_id = filename.replace("_logfile-entity-ownership.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                select avg("avg-added") as "avg2-added", avg("avg-deleted") as "avg2-deleted"  from (
                    select entity, avg("added") as "avg-added", avg("deleted") as "avg-deleted"
                        from "{log_dir}{filename}"
                        group by entity
                )                
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            #raise 
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg2-added" as "avg2-ownership-added", t."avg2-deleted" as "avg2-ownership-deleted"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [34]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-author-churn.csv"):
        task_id = filename.replace("_logfile-author-churn.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("added") as "avg-author-churn-added", 
                            avg("deleted") as "avg-author-churn-deleted", 
                            avg("commits") as "avg-author-churn-commits"  
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-author-churn-added", t."avg-author-churn-deleted" , t."avg-author-churn-commits" 
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [35]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-entity-churn.csv"):
        task_id = filename.replace("_logfile-entity-churn.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("added") as "avg-entity-churn-added", 
                            avg("deleted") as "avg-entity-churn-deleted", 
                            avg("commits") as "avg-entity-churn-commits"  
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-entity-churn-added", t."avg-entity-churn-deleted" , t."avg-entity-churn-commits" 
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [36]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-fragmentation.csv"):
        task_id = filename.replace("_logfile-fragmentation.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("fractal-value") as "avg-fragmentation-fractal-value", 
                            avg("total-revs") as "avg-fragmentation-total-revs"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-fragmentation-fractal-value", t."avg-fragmentation-total-revs" 
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [37]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-main-dev.csv"):
        task_id = filename.replace("_logfile-main-dev.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("ownership") as "avg-main-dev-ownership"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-main-dev-ownership"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [38]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-refactoring-main-dev.csv"):
        task_id = filename.replace("_logfile-refactoring-main-dev.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("ownership") as "avg-refactoring-main-dev-ownership"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-refactoring-main-dev-ownership"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [39]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-revisions.csv"):
        task_id = filename.replace("_logfile-revisions.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("n-revs") as "avg-revisions"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-revisions"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [40]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-soc.csv"):
        task_id = filename.replace("_logfile-soc.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("soc") as "avg-soc"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-soc"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [41]:
duckdb.query(f"""
                select avg("avg-author-revs") as "avg2-author-revs", avg("avg-total-revs") as "avg2-total-revs"  from (
                    select entity, avg("author-revs") as "avg-author-revs", avg("total-revs") as "avg-total-revs"
                        from "analysis/spring-boot_logfile-entity-effort.csv"
                        group by entity
                )
""").to_df()

Unnamed: 0,avg2-author-revs,avg2-total-revs
0,1.54057,4.682579


In [42]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None,'display.max_colwidth', 100):
    display(df_report)

Unnamed: 0,project,R1,R5,R10,number-of-commits,number-of-entities,number-of-entities-changed,number-of-authors,max-age-months,avg-age-months,avg2-degree-coupling,avg2-strength-communication,avg2-author-revs-effort,avg2-total-revs-effort,avg2-ownership-added,avg2-ownership-deleted,avg-author-churn-added,avg-author-churn-deleted,avg-author-churn-commits,avg-entity-churn-added,avg-entity-churn-deleted,avg-entity-churn-commits,avg-fragmentation-fractal-value,avg-fragmentation-total-revs,avg-main-dev-ownership,avg-refactoring-main-dev-ownership,avg-revisions,avg-soc
0,logging-log4j2,0.7131,0.1844,0.034,13848,17573,83688,261,177,60.694759,55.381898,6.610356,1.712696,4.762306,30.969037,14.005907,4774.022989,2809.747126,53.057471,70.905366,41.731292,4.762306,0.238578,4.762306,0.594201,0.48577,4.762306,3894.627838
1,validation,0.9916,0.9585,0.9187,441,396,1662,20,185,58.54798,63.676176,17.493837,1.695875,4.19697,18.309133,7.05463,938.55,389.05,22.05,47.401515,19.64899,4.19697,0.209091,4.19697,0.900025,0.83947,4.19697,182.025
2,java-classmate,0.9922,0.9614,0.9242,279,124,750,18,172,100.798387,64.436012,21.904139,2.269854,6.048387,52.636617,8.847043,868.166667,193.833333,15.5,126.024194,28.137097,6.048387,0.339839,6.048387,0.845887,0.696048,6.048387,46.652632
3,attoparser,0.9924,0.9627,0.9268,328,1333,2930,4,149,114.787697,62.38189,6.958333,1.789822,2.19805,72.387472,36.593148,28533.5,13866.25,82.0,85.621905,41.609152,2.19805,0.03906,2.19805,0.96988,0.465574,2.19805,310.911247
4,checker-framework,0.6125,0.0862,0.0074,17185,19996,88758,175,187,92.65128,59.864855,7.301288,1.789335,4.438788,50.596532,38.748727,13555.56,11020.902857,98.2,118.634877,96.45219,4.438788,0.261782,4.438788,0.668968,0.63715,4.438788,2437.355846
5,webjars-locator-lite,0.9898,0.9501,0.9027,348,63,631,28,143,34.444444,55.214286,29.31904,2.148541,10.015873,25.925327,22.04411,291.25,266.035714,12.428571,129.444444,118.238095,10.015873,0.447302,10.015873,0.637778,0.62381,10.015873,50.4
6,HikariCP,0.9131,0.6349,0.4031,2732,867,5840,159,136,111.016148,66.535256,40.972043,3.088386,6.735871,64.118838,50.701647,587.100629,450.169811,17.18239,107.668973,82.557093,6.735871,0.080429,6.735871,0.613599,0.587843,6.735871,79.568241
7,common-annotations-api,0.9944,0.9722,0.9451,67,185,323,17,83,35.145946,37.0,15.97314,1.245946,1.745946,60.212387,9.353829,907.411765,231.470588,3.941176,83.383784,21.27027,1.745946,0.070541,1.745946,0.776486,0.428432,1.745946,42.114458
8,error-prone,0.7834,0.2951,0.0871,6521,5220,31806,342,161,66.561494,58.9094,8.908217,1.826302,6.093103,85.309378,23.319619,2828.269006,1036.008772,19.067251,185.300383,67.876437,6.093103,0.350059,6.093103,0.747611,0.668404,6.093103,1077.935023
9,tomcat,0.3856,0.0085,0.0001,26849,8884,93653,173,227,79.092413,50.910677,5.96109,2.845424,10.54176,88.824933,57.813233,13791.774566,8998.115607,155.196532,268.570126,175.222197,10.54176,0.272302,10.54176,0.761687,0.676666,10.54176,902.030314


In [43]:
df_report.to_csv("report.csv")
df_report.to_latex("report.tex")
#df_report.to_markdown("report.md")

In [44]:
import pandas as pd
import numpy as np

# Carregar os dados
df = pd.read_csv('report.csv')

# Calculando primeiro a R1 (QoS), conforme modelo linear anterior
df['R1_predicted'] = (
    0.91918
    - 0.00701 * df['avg-revisions']
    - 0.04142 * df['avg-refactoring-main-dev-ownership']
    + 0.12267 * df['avg-main-dev-ownership']
    + 0.37781 * df['avg-fragmentation-fractal-value']
    - 0.000149 * df['avg-entity-churn-deleted']
    - 0.00125 * df['avg-entity-churn-added']
    - 0.00114 * df['avg-author-churn-commits']
    - 0.000019 * df['avg-author-churn-deleted']
    + 0.0000123 * df['avg-author-churn-added']
    + 0.00072 * df['avg2-ownership-deleted']
    + 0.00103 * df['avg2-ownership-added']
    + 0.01392 * df['avg2-author-revs-effort']
    - 0.000284 * df['avg2-strength-communication']
    - 0.00000319 * df['avg2-degree-coupling']
    - 0.000188 * df['avg-age-months']
    + 0.000192 * df['max-age-months']
    - 0.00000345 * df['number-of-authors']
    - 0.00000221 * df['number-of-entities-changed']
    + 0.00000203 * df['number-of-entities']
    - 0.00000253 * df['number-of-commits']
)

# Parâmetros atualizados da transformação QoS para QoE
alpha = 0.11786467779171651
beta = -1.4824915844102664
gamma = 0.45366165030276323

# Aplicando a transformação exponencial para obter QoE a partir de QoS (R1_predicted)
df['QoE_predicted'] = alpha * np.exp(beta * df['R1_predicted']) + gamma

# Salvando o resultado atualizado em um novo arquivo CSV
output_path = 'report_with_qoe_predictions.csv'
df.to_csv(output_path, index=False)


In [45]:
import pandas as pd
import numpy as np

# Carregando os dados com as previsões já calculadas
df = pd.read_csv('report_with_qoe_predictions.csv')

# Calculando a confiabilidade do sistema em série
# Nesse caso, a confiabilidade global é o produto das confiabilidades individuais
system_reliability = np.prod(df['R1_predicted'])

# Exibindo o resultado no terminal
print(f"System Series Reliability: {system_reliability:.4f}")


#system_reliability = np.prod(df['R1'])

# Exibindo o resultado no terminal
#print(f"System Series Reliability: {system_reliability:.4f}")

#QoE

system_reliability = alpha * np.exp(beta * system_reliability) + gamma

# Exibindo o resultado no terminal
print(f"System Series Reliability: {system_reliability:.4f}")



System Series Reliability: 0.0017
System Series Reliability: 0.5712


In [46]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Valores verdadeiros e previstos
y_true = df['R1']
y_pred = df['R1_predicted']

# Cálculo das métricas adequadas
mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_true, y_pred)

# Exibição dos resultados
print(f"MAE (Erro Médio Absoluto): {mae:.4f}")
print(f"MSE (Erro Quadrático Médio): {mse:.4f}")
print(f"RMSE (Raiz do Erro Quadrático Médio): {rmse:.4f}")
print(f"R² (Coeficiente de Determinação): {r2:.4f}")

MAE (Erro Médio Absoluto): 0.0351
MSE (Erro Quadrático Médio): 0.0023
RMSE (Raiz do Erro Quadrático Médio): 0.0477
R² (Coeficiente de Determinação): 0.9076
