In [1]:
# Create a reference to the CSV and import it into a Pandas DataFrame
import pandas as pd
csv_path = "../Resources/EclipseBugs.csv"
eclipse_df = pd.read_csv(csv_path)
eclipse_df.head(3)

Unnamed: 0,Bug ID,Product,Component,Assignee,Status,Resolution,Summary,Changed,Assignee Real Name,Classification,...,Number of Comments,Opened,OS,Priority,Reporter,Reporter Real Name,Severity,Target Milestone,Version,Votes
0,3638,JDT,UI,aeschli,VERIFIED,FIXED,Package Viewer: order resource folders before ...,1/17/2002 7:28,Martin Aeschlimann,Eclipse,...,3,10/10/2001 22:58,Windows NT,P1,aeschli,Martin Aeschlimann,major,---,2,0
1,3854,JDT,UI,aeschli,VERIFIED,FIXED,Wrong execution's classpath. (1GEY0W0),1/18/2002 4:02,Martin Aeschlimann,Eclipse,...,5,10/10/2001 23:01,Windows NT,P1,david_audel,David Audel,normal,---,2,0
2,4188,JDT,UI,aeschli,VERIFIED,FIXED,type hierachy - typo (1GJW2XJ),1/28/2002 3:12,Martin Aeschlimann,Eclipse,...,3,10/10/2001 23:07,Windows 2000,P1,erich_gamma,Erich Gamma,normal,---,2,0


In [2]:
# Finding the average number of comments per bug
average_comments = eclipse_df["Number of\nComments"].mean()
average_comments

8.75

In [3]:
# Grouping the DataFrame by "Assignee"
assignee_group = eclipse_df.groupby("Assignee")

# Count how many of each component Assignees worked on and create DataFrame
assignee_work = pd.DataFrame(assignee_group["Component"].value_counts())
assignee_work.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Component
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3
Claude_Knaus,UI,31
Claude_Knaus,Text,7


In [4]:
# Rename the "Component" column to "Component Bug Count"
assignee_work = assignee_work.rename(columns = {'Component':'Component Bug Count'})
assignee_work.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Component Bug Count
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3


In [5]:
# Find the percentage of bugs overall fixed by each Assignee
total_bugs = pd.DataFrame(assignee_group["Assignee"].count())
total_bugs.columns = ['Bug Count']
total_bugs.reset_index(inplace=True)
total_bugs_object = total_bugs['Bug Count'].sum()
print(total_bugs_object)
bug_perc_list = []
for bugs in total_bugs['Bug Count']:
    bug_perc = bugs/total_bugs_object*100
    bug_perc_list.append(bug_perc)
    
total_bugs['Percent of Total Bugs'] = bug_perc_list
    

total_bugs.head(3)

10000


Unnamed: 0,Assignee,Bug Count,Percent of Total Bugs
0,Aaron_Ferguson,10,0.1
1,Adam_Schlegel,7,0.07
2,ChrisAustin,3,0.03


In [6]:
# Merge the "Percent of Total Bugs Assigned" into the DataFrame
assignee_work.reset_index(inplace=True)
assignee_work = assignee_work.merge(total_bugs,on="Assignee")
display(assignee_work.head(3))

# Remove the extra columns
assignee_work = assignee_work[["Assignee","Percent of Total Bugs","Component","Component Bug Count"]]
assignee_work.head()

Unnamed: 0,Assignee,Component,Component Bug Count,Bug Count,Percent of Total Bugs
0,Aaron_Ferguson,UI,10,10,0.1
1,Adam_Schlegel,UI,7,7,0.07
2,ChrisAustin,User Assistance,3,3,0.03


Unnamed: 0,Assignee,Percent of Total Bugs,Component,Component Bug Count
0,Aaron_Ferguson,0.1,UI,10
1,Adam_Schlegel,0.07,UI,7
2,ChrisAustin,0.03,User Assistance,3
3,Claude_Knaus,0.38,UI,31
4,Claude_Knaus,0.38,Text,7
