In [52]:
# https://pydriller.readthedocs.io/en/latest/tutorial.html

In [53]:
import pydriller
from pydriller import Repository
import json
import pandas
import os
from tqdm import tqdm as tq

In [54]:
bug_related_msg = ['bug','bugs','fix','fixes','patch','fault','corrected','tweaked','problem','problems','issue']

In [55]:
# exception_messages = []
def bugs_from_commits_in_repo(repo_name):
    commits_info = {}
    non_dvc_not_bug_count = 0
    dvc_not_bug_count = 0
    non_dvc_bug_count = 0
    dvc_bug_count = 0
    number_of_exceptions = 0
    total_commits = 0

    for commit in Repository(repo_name).traverse_commits():
        total_commits += 1
        if commit.in_main_branch == False:
            continue
        is_bug = False
        is_dvc = False
        for bug_msg in bug_related_msg:
            if bug_msg in commit.msg:
                is_bug = True
                break
        single_commmit = {}
        changed_files = []
        try:
            for file in commit.modified_files:
                if 'dvc' in str(file.old_path).lower() or 'dvc' in str(file.new_path).lower():
                    is_dvc = True
                    if is_bug == False:
                        break
                if is_bug == True:
                    file_obj = {}
                    file_obj['file_name'] = file.filename
                    file_obj['old_path'] = file.old_path
                    file_obj['new_path'] = file.new_path
                    file_obj['nloc'] = file.nloc
                    changed_files.append(file_obj)

            if is_bug == False:
                if is_dvc == False:
                    non_dvc_not_bug_count += 1
                else:
                    dvc_not_bug_count += 1
                continue
            if is_dvc == True:
                dvc_bug_count += 1
            else:
                non_dvc_bug_count += 1

            single_commmit['committer_date'] = str(commit.committer_date) 
            single_commmit['author_date'] = str(commit.author_date)
            single_commmit['commit_message'] = commit.msg
            single_commmit['in_main_branch'] = commit.in_main_branch
            single_commmit['committer'] = commit.committer.email
            single_commmit["modified_files"]=changed_files
            single_commmit['is_dvc_related'] = is_dvc
            commits_info[commit.hash] = single_commmit        
        except Exception as e:
            number_of_exceptions += 1
            # exception_messages.append(str(e))
            # print(str(e))
    commits_info["non_dvc_not_bug_count"] = non_dvc_not_bug_count
    commits_info["dvc_not_bug_count"] = dvc_not_bug_count
    commits_info["non_dvc_bug_count"] = non_dvc_bug_count
    commits_info["dvc_bug_count"] = dvc_bug_count
    print("Repo Name", repo_name)
    print(non_dvc_not_bug_count, dvc_not_bug_count, non_dvc_bug_count, dvc_bug_count, non_dvc_not_bug_count + dvc_not_bug_count + non_dvc_bug_count + dvc_bug_count, total_commits)
    print("Number of Exceptions", str(number_of_exceptions))
    print()
    return commits_info

In [56]:
def save_to_file(root, repo_name, file_name, json_obj):
    repo_name = repo_name.split("/")[1]
    folder_path = os.path.join(root,repo_name)
    file_path = os.path.join(folder_path,file_name)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    with open(file_path, 'w') as f:
        json.dump(json_obj, f, indent=4)

In [57]:
repos = [
    'weiji14/deepicedrain',
    'ipno-llead/processing',
    'nasaharvest/crop-mask',
    'BlueBrain/Search',
    'rootski-io/rootski',
    'dna-seq/dna-seq',
    'GenericMappingTools/pygmt',
    'GenericMappingTools/gmt',
]

In [58]:
commits_infos = []
for i in tq(range(len(repos))):
    commits_infos.append(bugs_from_commits_in_repo("https://github.com/"+repos[i]))
    save_to_file("C:\\Users\\F_RABBI\\Desktop\\se4a1_class_group_project\\data",repos[i],"bugs_from_commits.json",commits_infos[i])

 12%|██████████▌                                                                         | 1/8 [01:32<10:49, 92.81s/it]

Repo Name https://github.com/weiji14/deepicedrain
457 1 65 1 524 536
Number of Exceptions 12



 25%|████████████████████▊                                                              | 2/8 [05:01<16:06, 161.08s/it]

Repo Name https://github.com/ipno-llead/processing
1312 89 350 2 1753 1753
Number of Exceptions 0



 38%|███████████████████████████████▏                                                   | 3/8 [09:49<18:15, 219.14s/it]

Repo Name https://github.com/nasaharvest/crop-mask
1312 107 88 2 1509 1509
Number of Exceptions 0



 50%|█████████████████████████████████████████▌                                         | 4/8 [10:44<10:16, 154.11s/it]

Repo Name https://github.com/BlueBrain/Search
357 28 50 1 436 436
Number of Exceptions 0



 62%|███████████████████████████████████████████████████▉                               | 5/8 [11:40<05:56, 118.68s/it]

Repo Name https://github.com/rootski-io/rootski
377 4 76 1 458 458
Number of Exceptions 0



 75%|███████████████████████████████████████████████████████████████                     | 6/8 [11:55<02:46, 83.49s/it]

Repo Name https://github.com/dna-seq/dna-seq
69 18 18 4 109 109
Number of Exceptions 0



 88%|████████████████████████████████████████████████████████████████████████▋          | 7/8 [16:49<02:32, 152.43s/it]

Repo Name https://github.com/GenericMappingTools/pygmt
1219 78 209 15 1521 1521
Number of Exceptions 0

Repo Name https://github.com/GenericMappingTools/gmt
22404 26 3277 13 25720 25720
Number of Exceptions 0



100%|█████████████████████████████████████████████████████████████████████████████████| 8/8 [1:58:26<00:00, 888.32s/it]


In [59]:
str(None)

'None'