In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from pathlib import Path
prediction_directory = Path("/content/drive/MyDrive/runi_nlp/nlp-project/models-eden")

In [None]:
pred_files = list(prediction_directory.glob('*eval-preds*.json'))

In [None]:
[x.stem for x in pred_files]

['plbart-trans-75-1-15-ep-eval-preds-0',
 'plbart-trans-1-ep-eval-preds-0',
 'codet5-base-trans-80-1-1-ep-eval-preds-0',
 'codet5-base-trans-1-0-0-ep-eval-preds-0',
 'plbart-trans-80-1-1-ep-eval-preds-0',
 'codet5-base-half-train-data-trans-80-1-1-ep-eval-preds-0',
 'plbart-half-data-trans-1-0-0-ep-eval-preds-0 (1)',
 'plbart-half-data-trans-80-10-10-200-ep-eval-preds-0',
 'codet5-small-trans-functions-concat-80-10-10-ep-eval-preds-0',
 'codet5-1-eval-preds']

In [None]:
!pip install codebleu
!pip install torchmetrics

Collecting codebleu
  Downloading codebleu-0.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tree-sitter<1.0.0,>=0.20.0 (from codebleu)
  Downloading tree_sitter-0.20.1.tar.gz (126 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.2/126.2 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: tree-sitter
  Building wheel for tree-sitter (pyproject.toml) ... [?25l[?25hdone
  Created wheel for tree-sitter: filename=tree_sitter-0.20.1-cp310-cp310-linux_x86_64.whl size=424714 sha256=af1b33894c51db16ad1a342bf846588602ee34dbe7f7dfae47b9ed45ade424e9
  Stored in directory: /root/.cache/pip/wheels/e6/d0/7a/a108b

In [None]:
def python_program_parser(python_densed: str) -> str:
    """This function will take a densed python code and parse it back to normal python representation."""

    # Constants
    INDENT_TOKEN = "INDENT"
    DEDENT_TOKEN = "DEDENT"
    NEW_LINE_TOKEN = "NEW_LINE"
    INDENT_SIZE = 4
    PUNCTUATIONS = set("()[]{}:.,;+-*/%&|^<>=")

    # Split by space
    tokens = python_densed.split()

    # Initialize
    lines = [[]]
    current_indent = 0

    for token in tokens:
        if token == NEW_LINE_TOKEN:
            lines.append([])  # Start a new line
        elif token == INDENT_TOKEN:
            current_indent += 1
            lines[-1].extend([' ' * (current_indent * INDENT_SIZE)])  # Add the current indentation
        elif token == DEDENT_TOKEN:
            current_indent -= 1
            lines[-1].extend([' ' * (current_indent * INDENT_SIZE)])  # Add the current indentation
        else:
            if lines[-1] and lines[-1][-1] not in PUNCTUATIONS and token not in PUNCTUATIONS:
                lines[-1].append(' ')
            lines[-1].append(token)

    # Construct the final result
    result = '\n'.join([''.join(line) for line in lines])

    return result


In [None]:

from codebleu import calc_codebleu
from torchmetrics.text import BLEUScore, MatchErrorRate, CharErrorRate, TranslationEditRate, SacreBLEUScore
from torchmetrics.text.rouge import ROUGEScore
from torchmetrics import MetricCollection
import json
from tqdm import tqdm

def compute_metrics(preds):
    true_py = [p['python_t'] for p in preds]
    pred_py = [p['python_p'] for p in preds]

    bleu = BLEUScore()
    sbleu = SacreBLEUScore()
    ter = TranslationEditRate()

    rouge = ROUGEScore()
    mer = MatchErrorRate()
    cer = CharErrorRate()

    metrics_a = MetricCollection(bleu,sbleu,ter)
    metrics_b = MetricCollection(rouge,mer,cer)

    for p in tqdm(preds):
        metrics_a.update([p['python_p']], [[p['python_t']]])
        metrics_b.update(p['python_p'], p['python_t'])

    # metrics_a.plot(together=True)
    # metrics_b.plot(together=True)

    return metrics_a.compute(), metrics_b.compute(), calc_codebleu(true_py,pred_py, lang="python", weights=(0.25, 0.25, 0.25, 0.25), tokenizer=None)

model_metrics = {}
for pf in pred_files:
  with pf.open("r") as f:
      eval_preds = json.load(f)
      print(f'Calculating metrics for: {pf.stem}')
      v_m_a, v_m_b, v_codebleu = compute_metrics(eval_preds)
      model_metrics[pf.stem] = {
          'v_m_a': v_m_a,
          'v_m_b': v_m_b,
          'v_codebleu': v_codebleu
      }

Calculating metrics for: plbart-trans-75-1-15-ep-eval-preds-0


100%|██████████| 3938/3938 [04:32<00:00, 14.46it/s] 


Calculating metrics for: plbart-trans-1-ep-eval-preds-0


100%|██████████| 3938/3938 [04:44<00:00, 13.85it/s] 


Calculating metrics for: codet5-base-trans-80-1-1-ep-eval-preds-0


100%|██████████| 3938/3938 [03:53<00:00, 16.89it/s]


Calculating metrics for: codet5-base-trans-1-0-0-ep-eval-preds-0


100%|██████████| 3938/3938 [04:23<00:00, 14.96it/s]


Calculating metrics for: plbart-trans-80-1-1-ep-eval-preds-0


100%|██████████| 3938/3938 [05:19<00:00, 12.31it/s]


Calculating metrics for: codet5-base-half-train-data-trans-80-1-1-ep-eval-preds-0


100%|██████████| 3938/3938 [05:13<00:00, 12.56it/s]


Calculating metrics for: plbart-half-data-trans-1-0-0-ep-eval-preds-0 (1)


100%|██████████| 3938/3938 [05:43<00:00, 11.47it/s]


Calculating metrics for: plbart-half-data-trans-80-10-10-200-ep-eval-preds-0


100%|██████████| 3938/3938 [05:10<00:00, 12.70it/s] 


Calculating metrics for: codet5-small-trans-functions-concat-80-10-10-ep-eval-preds-0


100%|██████████| 480/480 [1:09:20<00:00,  8.67s/it]


Calculating metrics for: codet5-1-eval-preds


100%|██████████| 3938/3938 [04:58<00:00, 13.18it/s]


In [None]:
pf = pred_files[-1]
with pf.open("r") as f:
    eval_preds = json.load(f)
    print(f'Calculating metrics for: {pf.stem}')
    v_m_a, v_m_b, v_codebleu = compute_metrics(eval_preds)
    model_metrics[pf.stem] = {
        'v_m_a': v_m_a,
        'v_m_b': v_m_b,
        'v_codebleu': v_codebleu
    }

Calculating metrics for: codet5-1-eval-preds


  0%|          | 12/3938 [00:00<00:34, 115.02it/s]

In [None]:
import pandas as pd

pandas_results = []
for k, v in model_metrics.items():
  results_row = {'model': k}
  for sk, sv in v['v_m_a'].items():
    results_row[sk] = sv.item()

  for sk, sv in v['v_m_b'].items():
    results_row[sk] = sv.item()

  for sk, sv in v['v_codebleu'].items():
    results_row[sk] = sv

  pandas_results.append(results_row)

df = pd.DataFrame(pandas_results)

In [None]:
df.head()

In [None]:
csv_path = "/content/drive/MyDrive/runi_nlp/nlp-project/model_metrics_2.csv"
df.to_csv(csv_path)