# Code Duplication Visualization

In [None]:
from benchmark.environment import IMPROVEMENTS, STATES
from os import chdir
from pathlib import Path
import pandas as pd


SONAR_PORT = 9999
SONAR_PASSWORD = "password"


if not "PROJECT_DIR" in globals():
    PROJECT_DIR = Path().resolve().parent
    chdir(PROJECT_DIR)


if not "OUT_DIR" in globals():
    OUT_DIR = PROJECT_DIR / "code-duplication" / "out"

We can export the results of the code replication analysis previously stored in CSV files as latex tables, to include them in the bachelor's thesis.

In [None]:
df = pd.read_csv(OUT_DIR / "overall-duplication.csv")
df.columns = df.columns.str.replace("-", " ").str.title()
df["Improvement"] = df["Improvement"].str.replace("-", " ").str.title()
df["State"] = df["State"].str.title()
df.set_index(["Improvement", "State"], inplace=True)
print(df.to_latex())

\begin{tabular}{llrrr}
\toprule
 &  & Lines & Duplicated & Duplicated Density \\
Improvement & State &  &  &  \\
\midrule
\multirow[t]{2}{*}{Environment Independence} & Before & 60417 & 3977 & 5.300000 \\
 & After & 59232 & 4300 & 5.900000 \\
\cline{1-5}
\multirow[t]{2}{*}{Service Merge} & Before & 71913 & 6135 & 7.000000 \\
 & After & 57515 & 2245 & 3.200000 \\
\cline{1-5}
\bottomrule
\end{tabular}



In [None]:
for improvement in IMPROVEMENTS:
    for state in STATES:
        df = pd.read_csv(OUT_DIR / f"service-duplication_{improvement}_{state}.csv")
        df.columns = df.columns.str.replace("-", " ").str.title()
        df = df.drop(columns=["Improvement", "State"])
        df["Service"] = (
            df["Service"]
            .str.replace("dbrepo-", "")
            .str.replace("-", " ")
            .str.replace("db", "database")
            .str.title()
            .replace("Ui", "UI Service")
        )
        print(df.to_latex(index=False))

\begin{tabular}{lrrr}
\toprule
Service & Lines & Duplicated & Duplicated Density \\
\midrule
Analyse Service & 550 & 0 & 0.000000 \\
Authentication Service & 30 & 0 & 0.000000 \\
Broker Service & 14 & 0 & 0.000000 \\
Container Service & 3710 & 269 & 5.600000 \\
Database Service & 5810 & 504 & 6.600000 \\
Identifier Service & 4517 & 389 & 6.700000 \\
Metadata Database & 11212 & 604 & 3.900000 \\
Metadata Service & 1513 & 178 & 9.600000 \\
Query Service & 9527 & 872 & 7.200000 \\
Semantics Service & 3279 & 299 & 7.300000 \\
Table Service & 4530 & 547 & 9.500000 \\
UI Service & 12343 & 141 & 1.100000 \\
User Service & 3382 & 174 & 4.000000 \\
\bottomrule
\end{tabular}

\begin{tabular}{lrrr}
\toprule
Service & Lines & Duplicated & Duplicated Density \\
\midrule
Analyse Service & 550 & 0 & 0.000000 \\
Authentication Service & 30 & 0 & 0.000000 \\
Broker Service & 14 & 0 & 0.000000 \\
Container Service & 2964 & 269 & 7.000000 \\
Database Service & 5728 & 689 & 9.200000 \\
Identifier Service 