# Libs

In [None]:
from datetime import datetime
import os
import pandas as pd
import zipfile

# Utils 

## Consts

In [None]:
TODAY = datetime.now()
NUMBER_OF_REPOSITORIES=1000
PRIMARY_LANGUANGE = 'java'

ROOT_PATH = os.getcwd().split('\\lab-experimentacao-02')[0].replace('\\', '/')
BASE_URL_GITHUB = 'https://github.com/'
CK_REPO = 'https://github.com/mauricioaniche/ck'

## Functions

In [None]:
def clone_repo(dir_path: str, nameWithOwner: str)-> bool:
  repo_url = f'{BASE_URL_GITHUB}/{nameWithOwner}'
  os.chdir(dir_path)
  return os.system(f'git clone {repo_url}') == 0

    
def execute_ck(repo_name: str, path_destiny_name: str= None ,use_jars: str = 'true',max_files_per_partition: int = 0,  variables_and_fields_metrics: str = 'false')-> bool:
  if path_destiny_name == None:
      path_destiny_name = repo_name
      
  ck_path = f'{ROOT_PATH}/ck/target/ck-0.7.1-SNAPSHOT-jar-with-dependencies.jar'
  project_path = f'{ROOT_PATH}/{repo_name}'
  destiny_path = f'{os.getcwd().replace('\\', '/')}/lab-experimentacao-02/scripts/output/{path_destiny_name}/'
  
  if not os.path.exists(destiny_path):
    os.makedirs(destiny_path)
    
  command_to_run_ck = f'java -jar {ck_path} {project_path} {use_jars} {max_files_per_partition} {variables_and_fields_metrics} {destiny_path}'
#   print(command_to_run_ck)
  return os.system(command_to_run_ck) == 0
  
    
def delete_repo(repo_name: str = None, *repo_url: str)->bool:
  if not repo_name:
        if repo_url:
            repo_name = repo_url[0].split('/')[-1].replace('.git', '')
        else:
            raise ValueError("repo_name must be provided or calculated from repo_url")
  
  return os.system(f'rmdir /S /Q {repo_name}') == 0

def join_csv(path_files: list):
  if not path_files:
      raise Exception("Lista de arquivos vazia.")
    
  dataframes = []
  
  for path in path_files:
      try:
          df = pd.read_csv(path)
          dataframes.append(df)
      except FileNotFoundError:
          print(f"Arquivo {path} não encontrado.")
          continue
    
  concatenated_df = pd.concat(dataframes, ignore_index=True)
    
  # try:
  #     concatenated_df.to_csv('output.csv', index=False)
  #     print("Arquivo CSV gerado com sucesso: output.csv")
  # except Exception as e:
  #     print(f"Erro ao escrever arquivo CSV: {e}")

def calculate_metrics(repo_name: str) -> tuple:
    path_ck_result_class = f'{ROOT_PATH}/lab-experimentacao-02/scripts/output/{repo_name}/class.csv'
    data = pd.read_csv(path_ck_result_class)
    mean_cbo = data['cbo'].mean()
    standard_deviation_cbo = data['cbo'].std(ddof=0) # desvio padrão com grau de liberdade 0
    dit_high = data['dit'].max()
    standard_deviation_dit = data['dit'].std(ddof=0) # desvio padrão com grau de liberdade 0
    lcom_high = data['lcom'].mean()
    standard_deviation_lcom = data['lcom'].std(ddof=0) # desvio padrão com grau de liberdade 0
    return (mean_cbo, standard_deviation_cbo, standard_deviation_cbo, dit_high, standard_deviation_dit, lcom_high, standard_deviation_lcom)

def delete_csv(path_csv: str = None)-> None:
    os.system(f'del {path_csv}')
  
def create_file(list_of_values: list, file_path: str)-> None:
    try:
        with open(file_path, 'w') as arquivo:
            
            for valor in list_of_values:
                arquivo.write(valor + '\n')
        print(f"Arquivo '{file_path}' criado com sucesso.")
    except Exception as e:
        print(f"Ocorreu um erro ao criar o arquivo: {e}")


# Script to Clone

In [None]:
data = pd.read_csv('../dataset/dados_tratados.csv', sep=';')

In [None]:
destiny_path = ROOT_PATH
repo_teste = 'pabloaugustocm17/rinha-backend-2024-java'
means_cbo = []
standards_cbo = []
means_lcom = []
standards_lcom = []
dit_highs = []
locs = []

# java.lang.IllegalStateException: invalid
repo_with_erros = ['nameWithOwner;error']

for i in range(data.shape[0]):
    repo_name = data['Nome'][i]
    owner = data['Repositorio'][i].split('/')[0]
    path_destiny_name = f'{owner}_{repo_name}'
    
    try:
        clone_repo(dir_path=destiny_path, nameWithOwner=data['Repositorio'][i])
        execute_ck(repo_name=repo_name, path_destiny_name=path_destiny_name)
        path_ck_result_class = f'{ROOT_PATH}/lab-experimentacao-02/scripts/output/{path_destiny_name}/class.csv'
        data_ck = pd.read_csv(path_ck_result_class)
        if data_ck.shape[0] != 0:
            mean_cbo = data_ck['cbo'].mean()
            standard_deviation_cbo = data_ck['cbo'].std(ddof=0) # desvio padrão com grau de liberdade 0
            mean_lcom = data_ck['lcom'].mean()
            standard_deviation_lcom = data_ck['lcom'].std(ddof=0) # desvio padrão com grau de liberdade 0
            dit_high = data_ck['dit'].max()
            sum_locs = data_ck['loc'].sum()
        else:
            mean_cbo = None
            standard_deviation_cbo = None
            mean_lcom = None
            standard_deviation_lcom = None
            dit_high = None
            sum_locs = None
            
    except Exception as ex:
        print("Ocorreu uma exceção:", ex)
        print('Deu erro para: ', path_destiny_name)
        repo_with_erros.append(f'{path_destiny_name};{ex}')
        mean_cbo = None
        standard_deviation_cbo = None
        mean_lcom = None
        standard_deviation_lcom = None
        dit_high = None
        sum_locs = None
        pass
    finally:
        means_cbo.append(mean_cbo)
        standards_cbo.append(standard_deviation_cbo)
        means_lcom.append(mean_lcom)
        standards_lcom.append(standard_deviation_lcom)
        dit_highs.append(dit_high)
        locs.append(sum_locs)
        delete_repo(repo_name=data['Nome'][i])
        continue
    

data['média CBO'] = means_cbo
data['desvio padão CBO'] = standards_cbo
data['média LCOM'] = means_lcom
data['desvio padão LCOM'] = standards_lcom
data['DIT máximo'] = dit_highs
data['LOC Total'] = locs

data.tail()

In [None]:
print('TOTAL DE ERROS: ', len(repo_with_erros))
data.to_csv(f'{ROOT_PATH}/lab-experimentacao-02/scripts/dataset/dados_tratados.csv', index=False, sep=';')
create_file(list_of_values=repo_with_erros, file_path= f'{ROOT_PATH}/lab-experimentacao-02/scripts/dataset/repo_with_erros.csv')