In [321]:
#importing dependencies
import pandas as pd

In [322]:
#creating DataFrame from csv file
path = "Resources/EclipseBugs.csv"
bugs_df = pd.read_csv(path)
bugs_df.head()

Unnamed: 0,Bug\nID,Product,Component,Assignee,Status,Resolution,Summary,Changed,Assignee\nReal\nName,Classification,...,Number of\nComments,Opened,OS,Priority,Reporter,Reporter\nReal\nName,Severity,Target\nMilestone,Version,Votes
0,3638,JDT,UI,aeschli,VERIFIED,FIXED,Package Viewer: order resource folders before ...,1/17/2002 7:28,Martin Aeschlimann,Eclipse,...,3,10/10/2001 22:58,Windows NT,P1,aeschli,Martin Aeschlimann,major,---,2,0
1,3854,JDT,UI,aeschli,VERIFIED,FIXED,Wrong execution's classpath. (1GEY0W0),1/18/2002 4:02,Martin Aeschlimann,Eclipse,...,5,10/10/2001 23:01,Windows NT,P1,david_audel,David Audel,normal,---,2,0
2,4188,JDT,UI,aeschli,VERIFIED,FIXED,type hierachy - typo (1GJW2XJ),1/28/2002 3:12,Martin Aeschlimann,Eclipse,...,3,10/10/2001 23:07,Windows 2000,P1,erich_gamma,Erich Gamma,normal,---,2,0
3,5115,JDT,Debug,aeschli,VERIFIED,FIXED,Workspace source locator fails with mulitple p...,11/13/2001 10:11,Martin Aeschlimann,Eclipse,...,10,10/19/2001 13:41,Windows 2000,P1,darin.eclipse,Darin Wright,normal,---,2,0
4,5820,JDT,UI,aeschli,VERIFIED,FIXED,Close all editors brings up hierarchy of object,11/20/2001 16:22,Martin Aeschlimann,Eclipse,...,5,11/12/2001 18:18,Windows 2000,P1,jed.anderson,Jed Anderson,normal,---,2,0


In [323]:
bugs_df.count()

Bug\nID                 10000
Product                 10000
Component               10000
Assignee                10000
Status                  10000
Resolution              10000
Summary                 10000
Changed                 10000
Assignee\nReal\nName    10000
Classification          10000
Hardware                10000
Number of\nComments     10000
Opened                  10000
OS                      10000
Priority                10000
Reporter                10000
Reporter\nReal\nName     9967
Severity                10000
Target\nMilestone       10000
Version                 10000
Votes                   10000
dtype: int64

In [324]:
#fixing naming conventions of some columns
renamed_bugs_df = bugs_df.rename(columns={
    "Bug\nID":"Bug ID",
    "Assignee\nReal\nName":"Assignee Real Name",
    "Number of\nComments":"Number of Comments",
    "Reporter\nReal\nName": "Reporter Real Name",
    "Target\nMilestone":"Target Milestone"
})
renamed_bugs_df.columns

Index(['Bug ID', 'Product', 'Component', 'Assignee', 'Status', 'Resolution',
       'Summary', 'Changed', 'Assignee Real Name', 'Classification',
       'Hardware', 'Number of Comments', 'Opened', 'OS', 'Priority',
       'Reporter', 'Reporter Real Name', 'Severity', 'Target Milestone',
       'Version', 'Votes'],
      dtype='object')

In [325]:
renamed_bugs_df.dtypes

Bug ID                 int64
Product               object
Component             object
Assignee              object
Status                object
Resolution            object
Summary               object
Changed               object
Assignee Real Name    object
Classification        object
Hardware              object
Number of Comments     int64
Opened                object
OS                    object
Priority              object
Reporter              object
Reporter Real Name    object
Severity              object
Target Milestone      object
Version               object
Votes                  int64
dtype: object

In [326]:
#grouping by the assignee to show how many of each component was resolved
assignee_group = renamed_bugs_df.groupby("Assignee")
assignee_df = pd.DataFrame(assignee_group["Component"].value_counts())
assignee_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Component
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3
Claude_Knaus,UI,31
Claude_Knaus,Text,7


In [327]:
#specifying the naming convention
assignee_df = assignee_df.rename(columns={"Component":"Component Bug Count"})
assignee_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Component Bug Count
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3
Claude_Knaus,UI,31
Claude_Knaus,Text,7


In [328]:
#calculating the percentage of bugs fixed based on total
total_bugs = len(bugs_df)
total_bugs

10000

In [329]:
bugs_per_user = assignee_group["Assignee"].count()
bugs_per_user.head()

Assignee
Aaron_Ferguson    10
Adam_Schlegel      7
ChrisAustin        3
Claude_Knaus      38
Curtis_Windatt     6
Name: Assignee, dtype: int64

In [330]:
bug_percentage_df = pd.DataFrame(bugs_per_user/total_bugs*100)
bug_percentage_df.head()

Unnamed: 0_level_0,Assignee
Assignee,Unnamed: 1_level_1
Aaron_Ferguson,0.1
Adam_Schlegel,0.07
ChrisAustin,0.03
Claude_Knaus,0.38
Curtis_Windatt,0.06


In [331]:
bug_percentage_df = bug_percentage_df.rename(columns={"Assignee":"% of Bugs Assigned"})
bug_percentage_df.head()

Unnamed: 0_level_0,% of Bugs Assigned
Assignee,Unnamed: 1_level_1
Aaron_Ferguson,0.1
Adam_Schlegel,0.07
ChrisAustin,0.03
Claude_Knaus,0.38
Curtis_Windatt,0.06


In [332]:
assignee_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Component Bug Count
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3
Claude_Knaus,UI,31
Claude_Knaus,Text,7


In [333]:
assignee_df = assignee_df.reset_index()
assignee_df.head()

Unnamed: 0,Assignee,Component,Component Bug Count
0,Aaron_Ferguson,UI,10
1,Adam_Schlegel,UI,7
2,ChrisAustin,User Assistance,3
3,Claude_Knaus,UI,31
4,Claude_Knaus,Text,7


In [334]:
#merging % of bugs fixed into one DataFrame
merged_assignee_df = pd.merge(assignee_df, bug_percentage_df, on="Assignee")
merged_assignee_df.head()

Unnamed: 0,Assignee,Component,Component Bug Count,% of Bugs Assigned
0,Aaron_Ferguson,UI,10,0.1
1,Adam_Schlegel,UI,7,0.07
2,ChrisAustin,User Assistance,3,0.03
3,Claude_Knaus,UI,31,0.38
4,Claude_Knaus,Text,7,0.38


In [335]:
renamed_bugs_df["Severity"].value_counts()

normal         6517
major          1375
enhancement     913
critical        459
minor           327
trivial         300
blocker         109
Name: Severity, dtype: int64

In [336]:
assignee_severity_df = pd.DataFrame(assignee_group["Severity"].value_counts())
assignee_severity_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Severity
Assignee,Severity,Unnamed: 2_level_1
Aaron_Ferguson,normal,10
Adam_Schlegel,normal,5
Adam_Schlegel,major,2
ChrisAustin,normal,2
ChrisAustin,minor,1


In [337]:
assignee_severity_df = assignee_severity_df.rename(columns={"Severity":"Count of Bugs"})
assignee_severity_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Count of Bugs
Assignee,Severity,Unnamed: 2_level_1
Aaron_Ferguson,normal,10
Adam_Schlegel,normal,5
Adam_Schlegel,major,2
ChrisAustin,normal,2
ChrisAustin,minor,1
...,...,...
veronika_irvine,normal,1
wassim.melhem,normal,2
wassim.melhem,blocker,1
wassim.melhem,major,1


In [338]:
critical_bugs_df = renamed_bugs_df.loc[renamed_bugs_df["Severity"]=="critical",:]
critical_bugs_df.head()

Unnamed: 0,Bug ID,Product,Component,Assignee,Status,Resolution,Summary,Changed,Assignee Real Name,Classification,...,Number of Comments,Opened,OS,Priority,Reporter,Reporter Real Name,Severity,Target Milestone,Version,Votes
12,30991,JDT,UI,aeschli,VERIFIED,FIXED,NPE in New Java Project Wizard,2/24/2003 10:28,Martin Aeschlimann,Eclipse,...,5,2/5/2003 12:51,Windows XP,P1,csmclaren,Chris McLaren,critical,2.1 RC1,2.1,0
14,43518,JDT,UI,aeschli,VERIFIED,FIXED,Stepping through session bean is very slow in ...,10/17/2003 5:27,Martin Aeschlimann,Eclipse,...,14,9/23/2003 11:52,Windows 2000,P1,stevenyj,Steven Jin,critical,2.1.2,2.1.1,0
26,16283,Platform,Compare,akiezun,VERIFIED,FIXED,Add from Local History items are unsorted,6/3/2002 6:38,Adam Kiezun,Eclipse,...,8,5/17/2002 13:29,Windows 2000,P1,nick_edgar,Nick Edgar,critical,2.0 F2,2,0
28,18079,JDT,UI,akiezun,VERIFIED,FIXED,We must guard our Action against a Java editor...,6/3/2002 6:32,Adam Kiezun,Eclipse,...,5,5/28/2002 13:13,Windows 2000,P1,dirk_baeumer,Dirk Baeumer,critical,2.0 F2,2,0
30,18581,JDT,UI,akiezun,VERIFIED,FIXED,Single element view - markers from other eleme...,6/3/2002 8:08,Adam Kiezun,Eclipse,...,9,6/1/2002 8:01,Windows 2000,P1,erich_gamma,Erich Gamma,critical,2.0 F2,2,0


In [339]:
grouped_critical = critical_bugs_df.groupby("Assignee")
critical_count = grouped_critical["Severity"].count()
critical_count

Assignee
Claude_Knaus            3
Darin_Swanson          33
Kevin_McGuire           1
Michael_Rennie          5
Olivier_Thomann         5
                       ..
sxenos                  7
szymon.ptaszkiewicz     1
till_bay                3
valentam                2
veronika_irvine         1
Name: Severity, Length: 89, dtype: int64

In [340]:
total_critical_bugs = len(critical_bugs_df)
total_critical_bugs

459

In [341]:
critical_df = pd.DataFrame(critical_count)
critical_df.head()

Unnamed: 0_level_0,Severity
Assignee,Unnamed: 1_level_1
Claude_Knaus,3
Darin_Swanson,33
Kevin_McGuire,1
Michael_Rennie,5
Olivier_Thomann,5


In [342]:
critical_df = critical_df.rename(columns={"Severity":"Critical Bug Count"})
critical_df.head()

Unnamed: 0_level_0,Critical Bug Count
Assignee,Unnamed: 1_level_1
Claude_Knaus,3
Darin_Swanson,33
Kevin_McGuire,1
Michael_Rennie,5
Olivier_Thomann,5


In [343]:
critical_df["% of Critical Cases Done"] = critical_df["Critical Bug Count"]/total_critical_bugs*100
critical_df.head()

Unnamed: 0_level_0,Critical Bug Count,% of Critical Cases Done
Assignee,Unnamed: 1_level_1,Unnamed: 2_level_1
Claude_Knaus,3,0.653595
Darin_Swanson,33,7.189542
Kevin_McGuire,1,0.217865
Michael_Rennie,5,1.089325
Olivier_Thomann,5,1.089325


In [344]:
critical_df["% of Critical Cases Done"] = critical_df["% of Critical Cases Done"].map("{:.2f}%".format)
critical_df.head()

Unnamed: 0_level_0,Critical Bug Count,% of Critical Cases Done
Assignee,Unnamed: 1_level_1,Unnamed: 2_level_1
Claude_Knaus,3,0.65%
Darin_Swanson,33,7.19%
Kevin_McGuire,1,0.22%
Michael_Rennie,5,1.09%
Olivier_Thomann,5,1.09%


In [347]:
critical_df.sort_values("Critical Bug Count")

Unnamed: 0_level_0,Critical Bug Count,% of Critical Cases Done
Assignee,Unnamed: 1_level_1,Unnamed: 2_level_1
emoffatt,1,0.22%
dean.t.roberts,1,0.22%
debbie_wilson,1,0.22%
djo,1,0.22%
eclipse.felipe,1,0.22%
...,...,...
eclipse,23,5.01%
Tod_Creasey,25,5.45%
Darin_Swanson,33,7.19%
daniel_megert,37,8.06%
