# Эксперименты с Git

In [1]:
import sys
import json
import os
import re

from typing import Dict, Set, Any, List, Tuple
from tqdm import tqdm

In [2]:
reports_dir = "/Users/Denis.Sushentsev/Work/intellij_fixed_201007_raw/reports"
repo_path = "/Users/Denis.Sushentsev/Work/intellij-community"

Немного посмотрим данные.

In [3]:
report_ids = sorted([int(file_name[:-5]) for file_name in os.listdir(reports_dir)])

def load_report(report_id: int) -> Dict[str, Any]:
    report_path = os.path.join(reports_dir, f"{report_id}.json")
    with open(report_path) as file:
        return json.load(file)

In [4]:
has_commit = sum(load_report(rid)["commit"] is not None for rid in tqdm(report_ids))
print(f"{has_commit} of {len(report_ids)} reports have not None commit.")

100%|██████████| 11013/11013 [00:07<00:00, 1477.16it/s]

5873 of 11013 reports have not None commit.





In [7]:
all_counts = not_none_counts = java_counts = kt_counts = 0

for rid in tqdm(report_ids):
    report = load_report(rid)
    all_counts += len(report["frames"])

    for frame in report["frames"]:
        file_name = frame["file_name"]
        if file_name is not None:
            not_none_counts += 1

            if file_name.endswith(".java"):
                java_counts += 1
            elif file_name.endswith(".kt"):
                kt_counts += 1

print(f"Total frames: {all_counts}")
print(f"Frames with not none file names: {not_none_counts}")
print(f"Java frames: {java_counts}")
print(f"Kotlin frames: {kt_counts}")

100%|██████████| 11013/11013 [00:03<00:00, 3252.26it/s]

Total frames: 1252942
Frames with not none file names: 1235551
Java frames: 1193077
Kotlin frames: 41599





Нужно найти commits, которые исправляют ошибку:
```
git -C repo_path log --grep="(^|\s)EA-[\d]+" -P -- repo_path
```

После того, как получили все коммит, нужно найти все измененные методы (файл ```get_all_changed_methods.py```).

In [39]:
commits_log = "\n" \
              "commit 6c81477198f895109a65a85fc663dfac89205f4c\n" \
              "Author: Gregory.Shrago <gregory.shrago@jetbrains.com>\n" \
              "Date:   Mon Apr 11 01:47:41 2022 +0300\n" \
              "\n" \
              "fix context rule results caching\n" \
              "Also fixes IDEA-291789 and EA-488030.\n" \
              "\n" \
              "GitOrigin-RevId: 7e7d12798e65fef2a49d598acd96c5db09ef39d9\n" \
              "\n" \
              "commit b8e8f83c3cebb3f84e9daac22acdc65085bea52a\n" \
              "Author: Alexey Kudravtsev <cdr@intellij.com>\n" \
              "Date:   Sat Apr 9 22:38:47 2022 +0200\n" \
              "\n" \
              "EA-487955 (plugin) - AE: DaemonCodeAnalyzerImpl.putPreferredFileEditorFirst\n" \
              "\n" \
              "GitOrigin-RevId: 7ed44767d01ad6c36ba16d47cd31e73bb14e7e6a\n" \
              "\n" \
              "commit 5713c90def9bce30e7e4b5a7f249c2131f0dc022\n" \
              "Author: Gregory.Shrago <gregory.shrago@jetbrains.com>\n" \
              "Date:   Fri Apr 8 18:44:57 2022 +0300\n" \
              "\n" \
              "EA-257958 - PCE: AbstractProgressIndicatorBase.throwIfCanceled\n" \
              "\n" \
              "GitOrigin-RevId: 7e7b4e77b6c208d39b0da4f498650c1b57d09d91\n"

In [41]:
commit_pattern = re.compile("(?<=\ncommit )\w{40,40}")
issue_pattern = re.compile("(?<=EA-)\d+")

for commit in re.finditer(commit_pattern, commits_log):
    print(commit)
    print(commit.group(0))
    print(commit.start())

<re.Match object; span=(8, 48), match='6c81477198f895109a65a85fc663dfac89205f4c'>
6c81477198f895109a65a85fc663dfac89205f4c
8
<re.Match object; span=(281, 321), match='b8e8f83c3cebb3f84e9daac22acdc65085bea52a'>
b8e8f83c3cebb3f84e9daac22acdc65085bea52a
281
<re.Match object; span=(549, 589), match='5713c90def9bce30e7e4b5a7f249c2131f0dc022'>
5713c90def9bce30e7e4b5a7f249c2131f0dc022
549
