# Jira HADOOP data analysis

In [104]:
import json
from itertools import chain
import pandas
import jira

In [5]:
with open('hadoop_data.json') as f:
    hadoop_data = json.load(f)
hadoop_data = hadoop_data['hadoop']

In [29]:
with open('hadoop_issues.json') as f:
    issues = json.load(f)
issues = issues['issues']

## Important attributes (Project HADOOP)

In [63]:
hadoop_data['id'], hadoop_data['key'], hadoop_data['description'], \
hadoop_data['lead']['key'], hadoop_data['components'][0]['id'], \
hadoop_data['issueTypes'][0]['name'], hadoop_data['url'], \
hadoop_data['assigneeType'], hadoop_data['name'], \
hadoop_data['roles'].keys(), hadoop_data['projectTypeKey']

('12310240',
 'HADOOP',
 'Hadoop Common is the common library for Apache Hadoop.',
 'owen.omalley',
 '12334403',
 'Bug',
 'http://hadoop.apache.org/',
 'UNASSIGNED',
 'Hadoop Common',
 dict_keys(['Developers', 'Contributors', 'PMC', 'Committers', 'Administrators', 'ASF Members', 'Users', 'Contributors 1']),
 'software')

## Important attributes (Per Issue)

In [70]:
issues[0]['id'], issues[0]['key'], \
issues[0]['fields']['issuetype']['name'], \
issues[0]['fields']['issuetype']['subtask'], \
issues[0]['fields']['timespent'], \
issues[0]['fields']['project']['id'], issues[0]['fields']['project']['key'], \
issues[0]['fields']['aggregatetimespent'], issues[0]['fields']['resolution']['name'], \
issues[0]['fields']['resolutiondate'], issues[0]['fields']['lastViewed'], \
issues[0]['fields']['watches']['watchCount'], issues[0]['fields']['created'], \
issues[0]['fields']['priority']['name'], issues[0]['fields']['labels'], \
issues[0]['fields']['timeestimate'], issues[0]['fields']['aggregatetimeoriginalestimate'], \
issues[0]['fields']['issuelinks'][0]['type']['inward'], \
issues[0]['fields']['issuelinks'][0]['type']['outward'], \
issues[0]['fields']['issuelinks'][0]['outwardIssue']['key'], \
issues[0]['fields']['issuelinks'][0]['outwardIssue']['fields']['summary'], \
issues[0]['fields']['issuelinks'][0]['outwardIssue']['fields']['status']['statusCategory']['name'], \
issues[0]['fields']['issuelinks'][0]['outwardIssue']['fields']['priority']['name'], \
issues[0]['fields']['issuelinks'][0]['outwardIssue']['fields']['issuetype']['name'], \
issues[0]['fields']['issuelinks'][0]['outwardIssue']['fields']['issuetype']['subtask'], \
issues[0]['fields']['assignee']['key'], issues[0]['fields']['updated'], \
issues[0]['fields']['status']['name'], issues[0]['fields']['components'][0]['name'], \
issues[0]['fields']['timeoriginalestimate'], issues[0]['fields']['description'], \
issues[0]['fields']['aggregatetimeestimate'], issues[0]['fields']['summary'], \
issues[0]['fields']['creator']['key'], issues[0]['fields']['subtasks'], \
issues[0]['fields']['reporter']['key'], issues[0]['fields']['aggregateprogress']['progress'], \
issues[0]['fields']['aggregateprogress']['total'], issues[0]['fields']['duedate'], \
issues[0]['fields']['progress']['progress'], issues[0]['fields']['progress']['total'], \
issues[0]['fields']['votes']['votes']

('13203378',
 'HADOOP-15994',
 'Improvement',
 False,
 None,
 '12310240',
 'HADOOP',
 None,
 'Fixed',
 '2018-12-10T14:17:17.000+0000',
 None,
 1,
 '2018-12-10T09:12:49.000+0000',
 'Major',
 [],
 None,
 None,
 'is related to',
 'relates to',
 'HADOOP-9991',
 'Fix up Hadoop POMs, roll up JARs to latest versions',
 'To Do',
 'Major',
 'Improvement',
 False,
 'jack-lee',
 '2018-12-10T14:17:17.000+0000',
 'Resolved',
 'security',
 None,
 "Now Jackson 2.9.5 is used and it is vulnerable (CVE-2018-11307). Let's upgrade to 2.9.6 or upper.",
 None,
 'Upgrade Jackson2 to the latest version',
 'ajisakaa',
 [],
 'ajisakaa',
 0,
 0,
 None,
 0,
 0,
 0)

## Attributes' categorical values

### Issues' type values

In [79]:
set(issue['fields']['issuetype']['name'] for issue in issues)

{'Bug', 'Improvement', 'New Feature', 'Sub-task', 'Task', 'Test', 'Wish'}

### Issues' resolution values

In [92]:
set(issue['fields']['resolution']['name'] if issue['fields']['resolution'] else 'NONE_VALUE' for issue in issues)

{'Cannot Reproduce',
 'Done',
 'Duplicate',
 'Fixed',
 'Implemented',
 'Invalid',
 'NONE_VALUE',
 'Not A Bug',
 'Not A Problem',
 'Resolved',
 "Won't Fix"}

### Issues' importance/priority values

In [94]:
set(issue['fields']['priority']['name'] for issue in issues)

{'Blocker', 'Critical', 'Major', 'Minor', 'Trivial'}

### Issues' label values

In [105]:
set(chain.from_iterable(issue['fields']['labels'] for issue in issues))

{'BB2015-05-TBR',
 'GraphiteSink,',
 'IPv6',
 'Java9',
 'KeyStore',
 'Security',
 'WASB',
 'Windows',
 'bash',
 'build',
 'common',
 'distcp',
 'docker',
 'docuentation',
 'documentation',
 'easy-fix',
 'easyfix',
 'features',
 'filesystem',
 'flaky-test',
 'fs',
 'hadoop-tools',
 'incompatibleChange',
 'javadoc',
 'jdk9',
 'kms',
 'log',
 'metrics',
 'newbie',
 'patch',
 'performance',
 'security',
 'shell-script',
 'statistics',
 'supportability',
 'tpm',
 'trunk',
 'wavefront',
 'windows'}

### Issues' dependency values

In [112]:
set(issuelink['type']['inward'] for issue in issues for issuelink in issue['fields']['issuelinks'])

{'Blocked',
 'Dependent',
 'Is contained by',
 'is a child of',
 'is blocked by',
 'is broken by',
 'is caused by',
 'is cloned by',
 'is depended upon by',
 'is duplicated by',
 'is part of',
 'is related to',
 'is required by',
 'is superceded by'}

In [113]:
set(issuelink['type']['outward'] for issue in issues for issuelink in issue['fields']['issuelinks'])

{'Blocked',
 'Dependent',
 'blocks',
 'breaks',
 'causes',
 'contains',
 'depends upon',
 'duplicates',
 'incorporates',
 'is a clone of',
 'is a parent of',
 'relates to',
 'requires',
 'supercedes'}

### Dependent Issues' status values

In [133]:
set(issuelink['outwardIssue']['fields']['status']['statusCategory']['name'] if issuelink.get('outwardIssue', None) else issuelink['inwardIssue']['fields']['status']['statusCategory']['name'] for issue in issues for issuelink in issue['fields']['issuelinks'])

{'Done', 'In Progress', 'To Do'}

### Dependent Issues' importance/priority values

In [137]:
set(issuelink['outwardIssue']['fields']['priority']['name'] if issuelink.get('outwardIssue', None) else issuelink['inwardIssue']['fields']['priority']['name'] for issue in issues for issuelink in issue['fields']['issuelinks'])

{'Blocker', 'Critical', 'Major', 'Minor', 'Trivial'}

### Issues' status values

In [146]:
set(issue['fields']['status']['name'] for issue in issues)

{'Closed', 'In Progress', 'Open', 'Patch Available', 'Reopened', 'Resolved'}

## Insights - Results