# Mining Challenge Sample Notebook

This notebook gives a small example of working with our data.

## Imports and Database Connection

In [1]:
from mongoengine import connect, DoesNotExist
from pycoshark.mongomodels import Commit, FileAction, Project, VCSSystem, Hunk, Issue, IssueSystem, IssueComment
from pycoshark.utils import create_mongodb_uri_string


# You may have to update this dict to match your DB credentials
credentials = {'db_user': '',
               'db_password': '',
               'db_hostname': 'localhost',
               'db_port': 27017,
               'db_authentication_database': '',
               'db_ssl_enabled': False}

uri = create_mongodb_uri_string(**credentials)

connect("smartshark_rel1", host=uri, alias='default')

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary())

## Working with commits

In [2]:
# We first need the the project from the database
project = Project.objects(name='commons-dbcp').get()

# We now select the version control system of the project
vcs_system = VCSSystem.objects(project_id=project.id).get()

print('VCS System:', vcs_system.url)

# We can now fetch the commits and analyze them
num_commits = Commit.objects(vcs_system_id=vcs_system.id).count()

print('Number of commits:', num_commits)

count_bugfix = 0
count_linked_issue = 0
count_hunks = 0

# Only limits the fields we are reading to the required fields. This is important for the performance.
for commit in Commit.objects(vcs_system_id=vcs_system.id).only('labels', 'linked_issue_ids'):
    if commit.labels is not None and 'validated_bugfix' in commit.labels and commit.labels['validated_bugfix']==True:
        count_bugfix += 1
    if commit.linked_issue_ids is not None and len(commit.linked_issue_ids)>0:
        count_linked_issue += 1
        
    # File actions group all changed hunks in a commit of the same file
    for fa in FileAction.objects(commit_id=commit.id):
        count_hunks += Hunk.objects(file_action_id=fa.id).count()

print('Number of bug fixing commits:', count_bugfix)
print('Number of commits that link to a Jira issue:', count_linked_issue)
print('Number of hunks for all commits:', count_hunks)

VCS System: https://github.com/apache/commons-dbcp.git
Number of commits: 2205
Number of bug fixing commits: 106
Number of commits that link to a Jira issue: 323
Number of hunks for all commits: 29290


## Working with issues

In [3]:
# We first need the the project from the database
project = Project.objects(name='commons-dbcp').get()

# We now select the issue tracking system of the project
# Please note that some projects have multiple issue trackers
# In this case get() would fail and you would need to loop over them
issue_tracker = IssueSystem.objects(project_id=project.id).get()

print('Issue Tracker:', issue_tracker.url)

# we can now work with the issues
num_issues = Issue.objects(issue_system_id=issue_tracker.id).count()

print('Number of issues:', num_issues)

count_comments = 0
count_referenced_by_commits = 0
count_bugs_dev_label = 0
count_bugs_validated = 0

for issue in Issue.objects(issue_system_id=issue_tracker.id):
    count_comments += IssueComment.objects(issue_id=issue.id).count()
    if issue.issue_type is not None and issue.issue_type.lower()=='bug':
        count_bugs_dev_label += 1
    if issue.issue_type_verified is not None and issue.issue_type_verified.lower()=='bug':
        count_bugs_validated += 1
    if Commit.objects(linked_issue_ids=issue.id).count()>0:
        count_referenced_by_commits += 1
        
print('Number of comments in discussions:', count_comments)
print('Number of issues referenced by commits:', count_referenced_by_commits)
print('Number of issues labeled as bugs by developers:', count_bugs_dev_label)
print('Number of issues labeled validated as bug by researchers:', count_bugs_validated)

Issue Tracker: https://issues.apache.org/jira/rest/api/2/search?jql=project=DBCP
Number of issues: 553
Number of comments in discussions: 1696
Number of issues referenced by commits: 219
Number of issues labeled as bugs by developers: 367
Number of issues labeled validated as bug by researchers: 71
