In [1]:
import os
import pandas as pd
import duckdb

projects = []

log_dir = "logs/"
for filename in os.listdir(log_dir):
    if filename.endswith("_all_files.csv"):
        task_id = filename.replace("_all_files.csv", "")
        projects.append(task_id)

df_report = pd.DataFrame(projects, columns=['project'])

In [2]:
#df_report = duckdb.query(f"""
#    select r.project, rr.GitHub from df_report r
#        left join repositories.csv rr on r.project = rr.Project
#""").to_df()

In [3]:
df_report = duckdb.query(f"""
    select r.*, ra.R1, ra.R5, ra.R10 from df_report r
        left join reliability_projects.csv ra on r.project = ra.project
""").to_df()

In [4]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-summary.csv"):
        task_id = filename.replace("_logfile-summary.csv", "")
        try:
            df_summary_line = pd.read_csv(f"analysis/{filename}")
            df_summary_line = df_summary_line.set_index('statistic').T
            df_summary_line['project'] = task_id
            df_summary = pd.concat([df_summary, df_summary_line], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, s."number-of-commits", s."number-of-entities", s."number-of-entities-changed", s."number-of-authors" 
        from df_report r
        left join df_summary s on r.project = s.project
""").to_df()

In [5]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-age.csv"):
        task_id = filename.replace("_logfile-age.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                select max("age-months") as "max-age-months", avg("age-months") as "avg-age-months"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."max-age-months", t."avg-age-months" 
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [6]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-coupling.csv"):
        task_id = filename.replace("_logfile-coupling.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                select avg("avg-degree") as "avg2-degree"  from (
                    select entity, avg(degree) as "avg-degree"
                        from "{log_dir}{filename}"
                        group by entity
                )
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            #raise 
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg2-degree" as "avg2-degree-coupling"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [7]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-communication.csv"):
        task_id = filename.replace("_logfile-communication.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                select avg("avg-strength") as "avg2-strength"  from (
                    select author, avg(strength) as "avg-strength"
                        from "{log_dir}{filename}"
                        group by author
                )
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            #raise 
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg2-strength" as "avg2-strength-communication"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [8]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-entity-effort.csv"):
        task_id = filename.replace("_logfile-entity-effort.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                select avg("avg-author-revs") as "avg2-author-revs", avg("avg-total-revs") as "avg2-total-revs"  from (
                    select entity, avg("author-revs") as "avg-author-revs", avg("total-revs") as "avg-total-revs"
                        from "{log_dir}{filename}"
                        group by entity
                )                
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            #raise 
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg2-author-revs" as "avg2-author-revs-effort", t."avg2-total-revs" as "avg2-total-revs-effort"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [9]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-entity-ownership.csv"):
        task_id = filename.replace("_logfile-entity-ownership.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                select avg("avg-added") as "avg2-added", avg("avg-deleted") as "avg2-deleted"  from (
                    select entity, avg("added") as "avg-added", avg("deleted") as "avg-deleted"
                        from "{log_dir}{filename}"
                        group by entity
                )                
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            #raise 
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg2-added" as "avg2-ownership-added", t."avg2-deleted" as "avg2-ownership-deleted"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [10]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-author-churn.csv"):
        task_id = filename.replace("_logfile-author-churn.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("added") as "avg-author-churn-added", 
                            avg("deleted") as "avg-author-churn-deleted", 
                            avg("commits") as "avg-author-churn-commits"  
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-author-churn-added", t."avg-author-churn-deleted" , t."avg-author-churn-commits" 
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [11]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-entity-churn.csv"):
        task_id = filename.replace("_logfile-entity-churn.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("added") as "avg-entity-churn-added", 
                            avg("deleted") as "avg-entity-churn-deleted", 
                            avg("commits") as "avg-entity-churn-commits"  
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-entity-churn-added", t."avg-entity-churn-deleted" , t."avg-entity-churn-commits" 
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [12]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-fragmentation.csv"):
        task_id = filename.replace("_logfile-fragmentation.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("fractal-value") as "avg-fragmentation-fractal-value", 
                            avg("total-revs") as "avg-fragmentation-total-revs"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-fragmentation-fractal-value", t."avg-fragmentation-total-revs" 
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [13]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-main-dev.csv"):
        task_id = filename.replace("_logfile-main-dev.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("ownership") as "avg-main-dev-ownership"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-main-dev-ownership"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [14]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-refactoring-main-dev.csv"):
        task_id = filename.replace("_logfile-refactoring-main-dev.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("ownership") as "avg-refactoring-main-dev-ownership"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-refactoring-main-dev-ownership"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [15]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-revisions.csv"):
        task_id = filename.replace("_logfile-revisions.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("n-revs") as "avg-revisions"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-revisions"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [16]:
df_summary = pd.DataFrame()

log_dir = "analysis/"
for filename in os.listdir(log_dir):
    if filename.endswith("_logfile-soc.csv"):
        task_id = filename.replace("_logfile-soc.csv", "")
        #print(filename)
        try:
            df_task = pd.DataFrame()
            
            df_task = duckdb.query(f"""
                    select avg("soc") as "avg-soc"
                    from "{log_dir}{filename}" r
            """).to_df()

            df_task['project'] = task_id
            df_summary = pd.concat([df_summary, df_task], ignore_index=True)
        except: 
            print(f"Erro ao ler o arquivo analysis/{filename}")
            pass

df_report = duckdb.query(f"""
    select r.*, t."avg-soc"
        from df_report r
        left join df_summary t on r.project = t.project
""").to_df()

In [17]:
duckdb.query(f"""
                select avg("avg-author-revs") as "avg2-author-revs", avg("avg-total-revs") as "avg2-total-revs"  from (
                    select entity, avg("author-revs") as "avg-author-revs", avg("total-revs") as "avg-total-revs"
                        from "analysis/spring-boot_logfile-entity-effort.csv"
                        group by entity
                )
""").to_df()

Unnamed: 0,avg2-author-revs,avg2-total-revs
0,1.54057,4.682579


In [18]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None,'display.max_colwidth', 100):
    display(df_report.round(3))

Unnamed: 0,project,R1,R5,R10,number-of-commits,number-of-entities,number-of-entities-changed,number-of-authors,max-age-months,avg-age-months,avg2-degree-coupling,avg2-strength-communication,avg2-author-revs-effort,avg2-total-revs-effort,avg2-ownership-added,avg2-ownership-deleted,avg-author-churn-added,avg-author-churn-deleted,avg-author-churn-commits,avg-entity-churn-added,avg-entity-churn-deleted,avg-entity-churn-commits,avg-fragmentation-fractal-value,avg-fragmentation-total-revs,avg-main-dev-ownership,avg-refactoring-main-dev-ownership,avg-revisions,avg-soc
0,logging-log4j2,0.713,0.184,0.034,13848,17573,83688,261,177,60.695,55.382,6.61,1.713,4.762,30.969,14.006,4774.023,2809.747,53.057,70.905,41.731,4.762,0.239,4.762,0.594,0.486,4.762,3894.628
1,validation,0.992,0.958,0.919,441,396,1662,20,185,58.548,63.676,17.494,1.696,4.197,18.309,7.055,938.55,389.05,22.05,47.402,19.649,4.197,0.209,4.197,0.9,0.839,4.197,182.025
2,java-classmate,0.992,0.961,0.924,279,124,750,18,172,100.798,64.436,21.904,2.27,6.048,52.637,8.847,868.167,193.833,15.5,126.024,28.137,6.048,0.34,6.048,0.846,0.696,6.048,46.653
3,attoparser,0.992,0.963,0.927,328,1333,2930,4,149,114.788,62.382,6.958,1.79,2.198,72.387,36.593,28533.5,13866.25,82.0,85.622,41.609,2.198,0.039,2.198,0.97,0.466,2.198,310.911
4,checker-framework,0.612,0.086,0.007,17185,19996,88758,175,187,92.651,59.865,7.301,1.789,4.439,50.597,38.749,13555.56,11020.903,98.2,118.635,96.452,4.439,0.262,4.439,0.669,0.637,4.439,2437.356
5,webjars-locator-lite,0.99,0.95,0.903,348,63,631,28,143,34.444,55.214,29.319,2.149,10.016,25.925,22.044,291.25,266.036,12.429,129.444,118.238,10.016,0.447,10.016,0.638,0.624,10.016,50.4
6,HikariCP,0.913,0.635,0.403,2732,867,5840,159,136,111.016,66.535,40.972,3.088,6.736,64.119,50.702,587.101,450.17,17.182,107.669,82.557,6.736,0.08,6.736,0.614,0.588,6.736,79.568
7,common-annotations-api,0.994,0.972,0.945,67,185,323,17,83,35.146,37.0,15.973,1.246,1.746,60.212,9.354,907.412,231.471,3.941,83.384,21.27,1.746,0.071,1.746,0.776,0.428,1.746,42.114
8,error-prone,0.783,0.295,0.087,6521,5220,31806,342,161,66.561,58.909,8.908,1.826,6.093,85.309,23.32,2828.269,1036.009,19.067,185.3,67.876,6.093,0.35,6.093,0.748,0.668,6.093,1077.935
9,tomcat,0.386,0.008,0.0,26849,8884,93653,173,227,79.092,50.911,5.961,2.845,10.542,88.825,57.813,13791.775,8998.116,155.197,268.57,175.222,10.542,0.272,10.542,0.762,0.677,10.542,902.03


In [19]:
df_report.round(3).to_csv("report.csv")
df_report.round(3).to_latex("report.tex")
#df_report.to_markdown("report.md")

In [20]:
import pandas as pd
import numpy as np

# Carregar os dados
df = pd.read_csv('report.csv')

# Calculando primeiro a R1 (QoS), conforme modelo linear anterior
df['R1_predicted'] = (
    0.91918
    - 0.00701 * df['avg-revisions']
    - 0.04142 * df['avg-refactoring-main-dev-ownership']
    + 0.12267 * df['avg-main-dev-ownership']
    + 0.37781 * df['avg-fragmentation-fractal-value']
    - 0.000149 * df['avg-entity-churn-deleted']
    - 0.00125 * df['avg-entity-churn-added']
    - 0.00114 * df['avg-author-churn-commits']
    - 0.000019 * df['avg-author-churn-deleted']
    + 0.0000123 * df['avg-author-churn-added']
    + 0.00072 * df['avg2-ownership-deleted']
    + 0.00103 * df['avg2-ownership-added']
    + 0.01392 * df['avg2-author-revs-effort']
    - 0.000284 * df['avg2-strength-communication']
    - 0.00000319 * df['avg2-degree-coupling']
    - 0.000188 * df['avg-age-months']
    + 0.000192 * df['max-age-months']
    - 0.00000345 * df['number-of-authors']
    - 0.00000221 * df['number-of-entities-changed']
    + 0.00000203 * df['number-of-entities']
    - 0.00000253 * df['number-of-commits']
)

# Parâmetros atualizados da transformação QoS para QoE
alpha = 0.11786467779171651
beta = -1.4824915844102664
gamma = 0.45366165030276323

# Aplicando a transformação exponencial para obter QoE a partir de QoS (R1_predicted)
df['QoE_predicted'] = alpha * np.exp(beta * df['R1_predicted']) + gamma

# Salvando o resultado atualizado em um novo arquivo CSV
output_path = 'report_with_qoe_predictions.csv'
df.round(3).to_csv(output_path, index=False)


In [21]:
import pandas as pd
import numpy as np

# Carregando os dados com as previsões já calculadas
df = pd.read_csv('report_with_qoe_predictions.csv')

# Calculando a confiabilidade do sistema em série
# Nesse caso, a confiabilidade global é o produto das confiabilidades individuais
system_reliability = np.prod(df['R1_predicted'])

# Exibindo o resultado no terminal
print(f"System Series Reliability: {system_reliability:.4f}")


#system_reliability = np.prod(df['R1'])

# Exibindo o resultado no terminal
#print(f"System Series Reliability: {system_reliability:.4f}")

#QoE

system_reliability = alpha * np.exp(beta * system_reliability) + gamma

# Exibindo o resultado no terminal
print(f"System Series Reliability: {system_reliability:.4f}")



System Series Reliability: 0.0017
System Series Reliability: 0.5712


In [22]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Valores verdadeiros e previstos
y_true = df['R1']
y_pred = df['R1_predicted']

# Cálculo das métricas adequadas
mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_true, y_pred)

# Exibição dos resultados
print(f"MAE (Erro Médio Absoluto): {mae:.4f}")
print(f"MSE (Erro Quadrático Médio): {mse:.4f}")
print(f"RMSE (Raiz do Erro Quadrático Médio): {rmse:.4f}")
print(f"R² (Coeficiente de Determinação): {r2:.4f}")

MAE (Erro Médio Absoluto): 0.0352
MSE (Erro Quadrático Médio): 0.0023
RMSE (Raiz do Erro Quadrático Médio): 0.0479
R² (Coeficiente de Determinação): 0.9069
