In [1]:
import pandas as pd
# Create a reference to the CSV and import it into a Pandas DataFrame
csv_path = "Resources/EclipseBugs.csv"
eclipse_df = pd.read_csv(csv_path)

In [2]:
eclipse_df.columns

Index(['Bug\nID', 'Product', 'Component', 'Assignee', 'Status', 'Resolution',
       'Summary', 'Changed', 'Assignee\nReal\nName', 'Classification',
       'Hardware', 'Number of\nComments', 'Opened', 'OS', 'Priority',
       'Reporter', 'Reporter\nReal\nName', 'Severity', 'Target\nMilestone',
       'Version', 'Votes'],
      dtype='object')

In [3]:
# Finding the average number of comments per bug
average_comments = eclipse_df["Number of\nComments"].mean()
average_comments

8.75

In [4]:
# Grouping the DataFrame by "Assignee"
assignee_group = eclipse_df.groupby("Assignee")

# Count how many of each component Assignees worked on and create DataFrame
assignee_work_df = pd.DataFrame(assignee_group["Component"].value_counts())
assignee_work_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Component
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3
Claude_Knaus,UI,31
Claude_Knaus,Text,7


In [5]:
assignee_work_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Component
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3
Claude_Knaus,UI,31
Claude_Knaus,Text,7
...,...,...
valentam,Compare,6
veronika_irvine,SWT,2
wassim.melhem,UI,3
wassim.melhem,Update (deprecated - use RT>Equinox>p2),1


In [6]:
# Rename the "Component" column to "Component Bug Count"
assignee_work_df = assignee_work_df.rename(
    columns={"Component": "Component Bug Count"})
assignee_work_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Component Bug Count
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3
Claude_Knaus,UI,31
Claude_Knaus,Text,7


In [7]:
# Find the percentage of bugs overall fixed by each Assignee
# total_bugs = assignee_group["Assignee"].count()
# total_bugs = assignee_work_df['Component Bug Count'].sum()
total_bugs = len(eclipse_df)
# bugs_per_user = assignee_group["Assignee"].value_counts()
bugs_per_user = assignee_group['Assignee'].count()
# bugs_per_user
user_bug_percent_df = pd.DataFrame((bugs_per_user/total_bugs)*100)
user_bug_percent_df.head()

Unnamed: 0_level_0,Assignee
Assignee,Unnamed: 1_level_1
Aaron_Ferguson,0.1
Adam_Schlegel,0.07
ChrisAustin,0.03
Claude_Knaus,0.38
Curtis_Windatt,0.06


In [8]:
# Rename the "Assignee" column to "Percent of Total Bugs Assigned"
user_bug_percent_df = user_bug_percent_df.rename(
   columns={"Assignee": "Percent of Total Bugs Assigned"})

# Reset the index for this DataFrame so "Assignee" is a column
user_bug_percent_df = user_bug_percent_df.reset_index()
user_bug_percent_df.head()

Unnamed: 0,Assignee,Percent of Total Bugs Assigned
0,Aaron_Ferguson,0.1
1,Adam_Schlegel,0.07
2,ChrisAustin,0.03
3,Claude_Knaus,0.38
4,Curtis_Windatt,0.06


In [9]:
# Reset the index of "assignee_group" so that "Assignee" and "Component" are columns
assignee_work_df = assignee_work_df.reset_index()
assignee_work_df

Unnamed: 0,Assignee,Component,Component Bug Count
0,Aaron_Ferguson,UI,10
1,Adam_Schlegel,UI,7
2,ChrisAustin,User Assistance,3
3,Claude_Knaus,UI,31
4,Claude_Knaus,Text,7
...,...,...,...
345,valentam,Compare,6
346,veronika_irvine,SWT,2
347,wassim.melhem,UI,3
348,wassim.melhem,Update (deprecated - use RT>Equinox>p2),1


In [10]:
# Merge the "Percent of Total Bugs Assigned" into the DataFrame
assignee_work_df = assignee_work_df.merge(user_bug_percent_df, on="Assignee")
assignee_work_df.head()

Unnamed: 0,Assignee,Component,Component Bug Count,Percent of Total Bugs Assigned
0,Aaron_Ferguson,UI,10,0.1
1,Adam_Schlegel,UI,7,0.07
2,ChrisAustin,User Assistance,3,0.03
3,Claude_Knaus,UI,31,0.38
4,Claude_Knaus,Text,7,0.38
