<a href="https://colab.research.google.com/github/mining-software-repositories/treinamento/blob/main/test_treemap.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [None]:
# Install and update plotly
!pip install plotly==5.5.0

In [None]:
!pip install lizard

In [2]:
# https://github.com/plotly/plotly.py
import pandas as pd
import lizard

In [3]:
import os
from pathlib import Path
import plotly.graph_objects as go

# Clone repository

In [6]:
![ -d promocity ] && echo "Remove diretório promocity antigo" && rm -rf promocity
!git clone https://github.com/armandossrecife/promocity.git
!if [ $? -eq 0 ]; then echo "Repositorio promocity clonado com sucesso!"; else echo "Falha ao clonar o repositorio promocity"; fi

Cloning into 'promocity'...
remote: Enumerating objects: 841, done.[K
remote: Counting objects: 100% (43/43), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 841 (delta 10), reused 21 (delta 2), pack-reused 798[K
[KReceiving objects: 100% (841/841), 4.00 MiB | 209.00 KiB/s, done.
[KResolving deltas: 100% (324/324), done.
Repositorio promocity clonado com sucesso!


In [None]:
#For google colab
!sudo apt install tree

In [4]:
# Cria um arquivo contendo a quantidade de LOC por arquivo
!find promocity -name *.java | xargs wc -l > locarquivosjava.txt
list_locs_of_files = !cat locarquivosjava.txt

# Cria uma lista com elementos que representam o LOC e o arquivo
# (Loc, arquivo)
list_locs_of_files_updated = []
for each in list_locs_of_files:
  elementos = each.split(' ') 
  item = elementos[-2], elementos[-1]
  list_locs_of_files_updated.append(item)

In [5]:
def search_loc_of_file(file_name, list):
  for each in list:
    if file_name in each[1]:
      return int(each[0])

In [6]:
search_loc_of_file('UserController.java', list_locs_of_files_updated)

587

In [7]:
# Pega a complexidade ciclomatica maxima de um arquivo (classe) analisado
def get_max_cyclomatic_complexity(filename_with_path):
  """
  Get max cyclomatic complexity of class based on cyclomatic complexity method
  @param: str filename_with_path: filename with full path
  @return max cyclomatic complexity of filename
  """
  file_to_analyse = lizard.analyze_file(filename_with_path)
  cc = 0
  list_temp = []
  if len(file_to_analyse.function_list) > 0:
    for each in file_to_analyse.function_list:
      list_temp.append( each.__dict__['cyclomatic_complexity'] )
    cc = max(list_temp)
  return cc

In [8]:
del list_locs_of_files_updated[-1]

# Lista com CCs de cada arquivo: (cc, arquivo)
list_cc_files = []

for each in list_locs_of_files_updated:
  filename_with_path = each[1]
  cc = get_max_cyclomatic_complexity(filename_with_path)
  elemento = (cc, filename_with_path)
  list_cc_files.append(elemento)

  # (cc, arquivo com o path completo)
list_cc_src_java_files = [each for each in list_cc_files if ('src/main/java' in each[1]) ]

# Analyzer

In [9]:
# Lista todos os arquivos e diretorios
list_of_files_and_directories = !cd promocity && tree -i -f

In [10]:
# Substitui o . por promocity/
list_of_files_and_directories_updated = [each.replace('./', 'promocity/') for each in list_of_files_and_directories]

In [11]:
# Escolhe o diretorio do source java
# Lista apenas arquivos e diretorios do src/main/java
list_of_files_and_directories_src = []

for item in list_of_files_and_directories_updated:
  if 'src' in item:
    if 'src/main/java/' in item:
      list_of_files_and_directories_src.append(item)

In [12]:
# popula os labels, parents
# adaptado de https://plotly.com/python/treemaps/
labels = []
parents = []
for each in list_of_files_and_directories_src:
  nome_item = each.split('/')[-1]
  labels.append(nome_item)
  if os.path.isdir(each):
      pai = os.path.dirname(each)
      pai = pai.split('/')[-1]
      if pai == 'java':
        pai = ' '
      parents.append(pai)
  else: 
    elemento = Path(each).parent
    elemento = str(elemento)
    elemento = elemento.split('/')[-1]
    parents.append( str(elemento) )

In [13]:
values = []
for i in range(0, len(labels) ):
  each = list_of_files_and_directories_src[i]
  nome_item = each.split('/')[-1]
  loc = search_loc_of_file(nome_item, list_locs_of_files_updated)
  if '.java' in nome_item: 
    values.append(loc)
  else:
    values.append(1)

In [14]:
# Calcula a complexidade ciclomática máxima de cada arquivo
values_cc = []
for i in range(0, len(labels) ):
  each = list_of_files_and_directories_src[i]
  nome_item = each.split('/')[-1]
  loc = search_loc_of_file(nome_item, list_cc_src_java_files)
  if '.java' in nome_item: 
    values_cc.append(loc)
  else:
    values_cc.append(1)

values_series = pd.Series(values_cc)

In [15]:
fig = go.Figure(go.Treemap(
    labels = labels,
    values = values,
    parents = parents, 
    hoverinfo="value+text",
    customdata=values_series,
    hovertext=values_series,
    hovertemplate='LoC: %{value}<br>CC: %{customdata}', 
    marker=dict(
      colors=values_series,
      colorscale='dense',
      showscale=True, colorbar={"title": 'CC'}))
)

fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))

fig.show()