In [None]:
import pandas as pd
import networkx as nx
import pickle
import numpy as np
import sklearn.cluster
from itertools import permutations
import matplotlib.pyplot as plt

## Issue Topic

In [None]:
with open('topdistdf', 'rb') as file:
    issue_topic = pd.DataFrame(pickle.load(file))

In [None]:
issue_topic.head()

In [None]:
issue_topic['top3topic'] = issue_topic.apply(lambda x: np.array(x['topdist']).argsort()[0:3], axis=1)

In [None]:
issue_topic['1st'] = np.array(list(map(list, issue_topic['top3topic'].values.tolist())))[:, 0]
issue_topic['2nd'] = np.array(list(map(list, issue_topic['top3topic'].values.tolist())))[:, 1]
issue_topic['3rd'] = np.array(list(map(list, issue_topic['top3topic'].values.tolist())))[:, 2]

In [None]:
issue_topic['1st'].value_counts()

In [None]:
issue_topic['2nd'].value_counts()

In [None]:
issue_topic['3rd'].value_counts()

## Issue dependency

In [None]:
issue_dependency = pd.read_csv('issuelink.csv', sep=';')
issue_dependency = issue_dependency.drop('test', axis=1)

In [None]:
issue_dependency['relation'].unique()

In [None]:
issue_dependency.head()

In [None]:
graph_issue_dependency = nx.MultiDiGraph()
graph_issue_dependency.add_weighted_edges_from(issue_dependency.to_numpy())

In [None]:
with open('issue_dependency_graph.pickle', 'wb') as handle:
    pickle.dump(graph_issue_dependency, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Issue dependency (Grouped)

In [None]:
issue_dependency_group = issue_dependency.copy()

In [None]:
issue_dependency_group['relation'] = 'depends'
issue_dependency_group = issue_dependency_group.drop_duplicates()

In [None]:
issue_dependency_group.head()

In [None]:
graph_issue_dependency_group = nx.MultiDiGraph()
graph_issue_dependency_group.add_weighted_edges_from(issue_dependency_group.to_numpy())

In [None]:
with open('issue_dependency_group_graph.pickle', 'wb') as handle:
    pickle.dump(graph_issue_dependency_group, handle, protocol=pickle.HIGHEST_PROTOCOL)

# User collaboration

In [None]:
user_collab = pd.read_csv('team.csv')
user_collab['edge'] = 'collab'

In [None]:
assignee = pd.read_csv('assignee.csv', sep=';')

In [None]:
assignee.head()

In [None]:
user_collab = user_collab.set_index('issuekey').join(assignee.set_index('issuekey')).reset_index()
user_collab.columns = ['issuekey', 'dev', 'integrator', 'peer', 'tester', 'edge', 'assignee']

In [None]:
user_collab.head()

In [None]:
perm = list(permutations(user_collab.drop(['issuekey', 'edge'], axis=1).columns, 2))

In [None]:
triples_collab = []
for pair in perm:
    triples_collab.extend(user_collab[list(pair) + ['edge']].dropna().values.tolist())

In [None]:
triples_collab = set(map(tuple, list(filter(lambda x: x[0]!=x[1], triples_collab))))

In [None]:
graph_user_collab = nx.MultiDiGraph()

In [None]:
graph_user_collab.add_weighted_edges_from(triples_collab)

In [None]:
with open('user_collab_graph.pickle', 'wb') as handle:
    pickle.dump(graph_user_collab, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
df_collab = pd.DataFrame(triples_collab)

In [None]:
np.unique(df_collab[0].tolist() + df_collab[1].tolist()).shape

## User in issue
* same csv file as user collaboration

In [None]:
issue_dev = user_collab[['issuekey', 'dev']].copy()
issue_dev = issue_dev.dropna()
issue_dev['role'] = 'dev'
issue_dev = issue_dev.drop_duplicates()
issue_dev = issue_dev.values.tolist()

issue_integrator = user_collab[['issuekey', 'integrator']].copy()
issue_integrator = issue_integrator.dropna()
issue_integrator['role'] = 'integrator'
issue_integrator = issue_integrator.drop_duplicates()
issue_integrator = issue_integrator.values.tolist()

issue_peer = user_collab[['issuekey', 'peer']].copy()
issue_peer = issue_peer.dropna()
issue_peer['role'] = 'peer'
issue_peer = issue_peer.drop_duplicates()
issue_peer = issue_peer.values.tolist()

issue_tester = user_collab[['issuekey', 'tester']].copy()
issue_tester = issue_tester.dropna()
issue_tester['role'] = 'tester'
issue_tester = issue_tester.drop_duplicates()
issue_tester = issue_tester.values.tolist()

issue_assignee = assignee.copy()
issue_assignee = issue_assignee.dropna()
issue_assignee['role'] = 'assignee'
issue_assignee = issue_assignee.drop_duplicates()
issue_assignee = issue_assignee.values.tolist()

In [None]:
role_all = issue_dev+issue_integrator+issue_peer+issue_tester+issue_assignee

In [None]:
graph_issue_work = nx.MultiDiGraph()

In [None]:
graph_issue_work.add_weighted_edges_from(role_all)

In [None]:
with open('user_work_graph.pickle', 'wb') as handle:
    pickle.dump(graph_issue_work, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
pd.DataFrame(role_all)

## User Interaction

In [None]:
def k_means(data, n_clusters, log=False):
    data = data.to_numpy().reshape(-1,1)
    data = np.log(data + 1) if log else data

    model = sklearn.cluster.KMeans(n_clusters=n_clusters, n_init=5, random_state=0)
    model.fit(data)
    
    values = np.linspace(data.min(), data.max(), 10000).reshape(-1,1)
    cls    = model.predict(values)
    values = values.flatten()
    cls    = pd.DataFrame({'value' : values, 'class' : cls})
    
    thresholds = cls.groupby('class').min().to_numpy().flatten()
    thresholds = np.exp(thresholds) - 1 if log else thresholds
    thresholds = np.sort(thresholds)[1:]
    thresholds = thresholds.tolist()
    
    return thresholds

In [None]:
user_interaction = pd.read_csv('global_pair_score.csv')

In [None]:
user_interaction.head()

In [None]:
user_interaction_pos = user_interaction[['tagger', 'taggee', 'positivescore']].copy()
user_interaction_pos.columns = ['tagger', 'taggee', 'score']
user_interaction_neg = user_interaction[['tagger', 'taggee', 'negativescore']].copy()
user_interaction_neg.columns = ['tagger', 'taggee', 'score']

In [None]:
thresholds_pos = k_means(user_interaction['positivescore'], 3, log=True)
thresholds_neg = k_means(user_interaction['negativescore'], 3, log=True)

In [None]:
user_interaction_pos.loc[user_interaction_pos['score']<thresholds_pos[0], 'relation'] = 'low_positive_interaction_score'
user_interaction_pos.loc[(user_interaction_pos['score']>=thresholds_pos[0]) & (user_interaction_pos['score']<thresholds_pos[1]), 'relation'] = 'medium_positive_interaction_score'
user_interaction_pos.loc[user_interaction_pos['score']>=thresholds_pos[1], 'relation'] = 'high_positive_interaction_score'

In [None]:
user_interaction_pos.head()

In [None]:
user_interaction_neg.loc[user_interaction_neg['score']<thresholds_neg[0], 'relation'] = 'low_negative_interaction_score'
user_interaction_neg.loc[(user_interaction_neg['score']>=thresholds_neg[0]) & (user_interaction_neg['score']<thresholds_neg[1]), 'relation'] = 'medium_negative_interaction_score'
user_interaction_neg.loc[user_interaction_neg['score']>=thresholds_neg[1], 'relation'] = 'high_negative_interaction_score'

In [None]:
user_interaction_neg.head()

In [None]:
triples_interaction_pos = user_interaction_pos.drop('score', axis=1)
triples_interaction_pos = triples_interaction_pos.values.tolist()
triples_interaction_neg = user_interaction_neg.drop('score', axis=1)
triples_interaction_neg = triples_interaction_neg.values.tolist()

In [None]:
graph_user_interaction = nx.MultiDiGraph()

In [None]:
graph_user_interaction.add_weighted_edges_from(triples_interaction_pos+triples_interaction_neg)

## User Interaction (grouped)

In [None]:
user_interaction_group = user_interaction[['tagger', 'taggee']].copy()

In [None]:
user_interaction_group['relation'] = 'interacts'

In [None]:
user_interaction_group.head()

In [None]:
graph_user_interaction_group = nx.MultiDiGraph()
graph_user_interaction_group.add_weighted_edges_from(user_interaction_group.to_numpy())

## User expertise & Issue skill required

In [None]:
with open('PtoI', 'rb') as file:
    p_to_i = pickle.load(file)

In [None]:
with open('ItoC', 'rb') as file:
    i_to_c = pickle.load(file)

In [None]:
triples_user_skill = []
for user in p_to_i:
    if user=='dobedobedoh':
        print('aaa')
    for issue in p_to_i[user]:
        try:
            user_skill = [(user, skill, 'expert') for skill in i_to_c[issue]]
            triples_user_skill.extend(user_skill)
        except KeyError:
            continue
triples_user_skill = set(triples_user_skill)

In [None]:
triples_issue_skill = []
for issue in i_to_c:
    issue_skill = [(issue, skill, 'required') for skill in i_to_c[issue]]
    triples_issue_skill.extend(issue_skill)
triples_issue_skill = set(triples_issue_skill)

In [None]:
graph_user_expertise = nx.MultiDiGraph()
graph_user_expertise.add_weighted_edges_from(triples_user_skill)

In [None]:
pd.DataFrame(triples_user_skill)

In [None]:
graph_issue_skill = nx.MultiDiGraph()
graph_issue_skill.add_weighted_edges_from(triples_issue_skill)

In [None]:
pd.DataFrame(triples_issue_skill)

In [None]:
skill_graph = nx.compose(graph_user_expertise, graph_issue_skill)

In [None]:
with open('skill_graph.pickle', 'wb') as handle:
    pickle.dump(skill_graph, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Issue priority

In [None]:
issue_information = pd.read_csv('issueinformation.csv', sep=';')

In [None]:
issue_information.head()

In [None]:
issue_information['priority'].unique()

In [None]:
issue_information['priority'] = issue_information['priority'].fillna('None')
issue_information['relation'] = 'prioritize'

In [None]:
triples_priority = issue_information[['issuekey', 'priority', 'relation']].values.tolist()

In [None]:
graph_priority = nx.MultiDiGraph()
graph_priority.add_weighted_edges_from(triples_priority)

## Issue - Project

In [None]:
issue_information['project'] = issue_information['issuekey'].apply(lambda x: x.split('-')[0])

In [None]:
issue_information['relation'] = 'contains issue'

In [None]:
issue_information.head()

In [None]:
triples_project_issue = issue_information[['project', 'issuekey', 'relation']].values.tolist()

In [None]:
graph_project_issue = nx.MultiDiGraph()
graph_project_issue.add_weighted_edges_from(triples_project_issue)

In [None]:
with open('issue_project_graph.pickle', 'wb') as handle:
    pickle.dump(graph_project_issue, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
pd.DataFrame(triples_project_issue).shape[0]

## Issue - Type

In [None]:
issue_information['relation'] = 'type'

In [None]:
triples_issue_type = issue_information[['issuekey', 'type', 'relation']].values.tolist()

In [None]:
graph_issue_type = nx.MultiDiGraph()
graph_issue_type.add_weighted_edges_from(triples_issue_type)

In [None]:
with open('issue_type_graph.pickle', 'wb') as handle:
    pickle.dump(graph_issue_type, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Month-Year Issue

In [None]:
month_year_issue = issue_information[['issuekey']].copy()
month_year_issue['createdate'] = pd.to_datetime(issue_information['createdate']).dt.strftime("%m-%Y")
month_year_issue['relation'] = 'created_on'

In [None]:
month_year_issue.head()

In [None]:
triples_month_year_issue = month_year_issue.values.tolist()

In [None]:
graph_month_year_issue = nx.MultiDiGraph()
graph_month_year_issue.add_weighted_edges_from(triples_month_year_issue)

In [None]:
pd.DataFrame(triples_month_year_issue).shape[0]

## Month-Year

In [None]:
month_year = pd.to_datetime(issue_information['createdate']).sort_values().dt.strftime("%m-%Y")
month_year = pd.DataFrame(pd.to_datetime(month_year).unique(), columns=['month_year'])
month_year['month_year'] = month_year['month_year'].dt.strftime("%m-%Y")
month_year = month_year.values.flatten()

In [None]:
triples_month_year = []
for i in range(month_year.size-1):
    triples_month_year.append((month_year[i], month_year[i+1], 'next_month'))

In [None]:
graph_month_year = nx.MultiDiGraph()
graph_month_year.add_weighted_edges_from(triples_month_year)

In [None]:
pd.DataFrame(triples_month_year).shape

## Timezone

In [None]:
timezone = pd.read_csv('./user_timezone.csv')

In [None]:
timezone.head()

In [None]:
triples_user_city = timezone.drop('continent', axis=1)
triples_user_city['relation'] = 'lives in'
triples_user_city = triples_user_city.drop_duplicates()
triples_user_city = triples_user_city.to_numpy()

In [None]:
triples_city_continent = timezone.drop('username', axis=1)
triples_city_continent = triples_city_continent.drop_duplicates()[['city','continent']]
triples_city_continent['relation'] = 'locates in'
triples_city_continent = triples_city_continent.to_numpy()

In [None]:
graph_timezone = nx.MultiDiGraph()
graph_timezone.add_weighted_edges_from(triples_user_city)
graph_timezone.add_weighted_edges_from(triples_city_continent)

## Pair Score

In [None]:
pair_score = pd.read_csv('pair_score.csv')

In [None]:
pair_score.head()

In [None]:
triples_pair_score = pair_score[['tagger', 'taggee']].copy()
triples_pair_score['relation'] = 'interacts'
triples_pair_score = triples_pair_score.to_numpy()

In [None]:
graph_pair_score = nx.MultiDiGraph()
graph_pair_score.add_weighted_edges_from(triples_pair_score)

## Graph union

In [None]:
# user work + skill
graph01 = nx.compose(graph_issue_skill, graph_user_expertise)
graph01 = nx.compose(graph01, graph_issue_work)

In [None]:
# user work + skill + issue_dependencies
graph02 = nx.compose(graph_issue_skill, graph_user_expertise)
graph02 = nx.compose(graph02, graph_issue_work)
graph02 = nx.compose(graph02, graph_issue_dependency)

In [None]:
# user work + skill + issue_dependencies + user collabolation
graph03 = nx.compose(graph_issue_skill, graph_user_expertise)
graph03 = nx.compose(graph03, graph_issue_work)
graph03 = nx.compose(graph03, graph_issue_dependency)
graph03 = nx.compose(graph03, graph_user_collab)

In [None]:
# user work + skill + issue_dependencies + user collabolation + project-issue
graph04 = nx.compose(graph_issue_skill, graph_user_expertise)
graph04 = nx.compose(graph04, graph_issue_work)
graph04 = nx.compose(graph04, graph_issue_dependency)
graph04 = nx.compose(graph04, graph_user_collab)
graph04 = nx.compose(graph04, graph_project_issue)

In [None]:
# user work + skill + issue_dependencies + user collabolation + project-issue + date
graph05 = nx.compose(graph_issue_skill, graph_user_expertise)
graph05 = nx.compose(graph05, graph_issue_work)
graph05 = nx.compose(graph05, graph_issue_dependency)
graph05 = nx.compose(graph05, graph_user_collab)
graph05 = nx.compose(graph05, graph_project_issue)
graph05 = nx.compose(graph05, graph_month_year_issue)
graph05 = nx.compose(graph05, graph_month_year)

In [None]:
# user work + skill + issue_dependencies + user collabolation + project-issue + date + user_interaction
graph06 = nx.compose(graph_issue_skill, graph_user_expertise)
graph06 = nx.compose(graph06, graph_issue_work)
graph06 = nx.compose(graph06, graph_issue_dependency)
graph06 = nx.compose(graph06, graph_user_collab)
graph06 = nx.compose(graph06, graph_project_issue)
graph06 = nx.compose(graph06, graph_month_year_issue)
graph06 = nx.compose(graph06, graph_month_year)
graph06 = nx.compose(graph06, graph_user_interaction)

In [None]:
# user work + skill + issue_dependencies (grouped)
graph07 = nx.compose(graph_issue_skill, graph_user_expertise)
graph07 = nx.compose(graph07, graph_issue_work)
graph07 = nx.compose(graph07, graph_issue_dependency_group)

In [None]:
# user work + skill + user_interaction (grouped)
graph08 = nx.compose(graph_issue_skill, graph_user_expertise)
graph08 = nx.compose(graph08, graph_issue_work)
graph08 = nx.compose(graph08, graph_user_interaction_group)

In [None]:
# user work + skill + user collabolation + project-issue + date
graph09 = nx.compose(graph_issue_skill, graph_user_expertise)
graph09 = nx.compose(graph09, graph_issue_work)
graph09 = nx.compose(graph09, graph_user_collab)
graph09 = nx.compose(graph09, graph_project_issue)
graph09 = nx.compose(graph09, graph_month_year_issue)
graph09 = nx.compose(graph09, graph_month_year)

In [None]:
# user work + skill + issue_dependencies + user collabolation + project-issue + date + user_interaction (grouped)
graph10 = nx.compose(graph_issue_skill, graph_user_expertise)
graph10 = nx.compose(graph10, graph_issue_work)
graph10 = nx.compose(graph10, graph_issue_dependency)
graph10 = nx.compose(graph10, graph_user_collab)
graph10 = nx.compose(graph10, graph_project_issue)
graph10 = nx.compose(graph10, graph_month_year_issue)
graph10 = nx.compose(graph10, graph_month_year)
graph10 = nx.compose(graph10, graph_user_interaction_group)

In [None]:
# user work + skill + issue_dependencies + user collabolation + project-issue + date + timezone
graph11 = nx.compose(graph_issue_skill, graph_user_expertise)
graph11 = nx.compose(graph11, graph_issue_work)
graph11 = nx.compose(graph11, graph_issue_dependency)
graph11 = nx.compose(graph11, graph_user_collab)
graph11 = nx.compose(graph11, graph_project_issue)
graph11 = nx.compose(graph11, graph_month_year_issue)
graph11 = nx.compose(graph11, graph_month_year)
graph11 = nx.compose(graph11, graph_timezone)

In [None]:
# user work + skill + issue_dependencies + user collabolation + project-issue + date + user_interaction (grouped + no trust propagation)
graph12 = nx.compose(graph_issue_skill, graph_user_expertise)
graph12 = nx.compose(graph12, graph_issue_work)
graph12 = nx.compose(graph12, graph_issue_dependency)
graph12 = nx.compose(graph12, graph_user_collab)
graph12 = nx.compose(graph12, graph_project_issue)
graph12 = nx.compose(graph12, graph_month_year_issue)
graph12 = nx.compose(graph12, graph_month_year)
graph12 = nx.compose(graph12, graph_pair_score)

In [None]:
# user work + skill + user_interaction (grouped + no trust propagation)
graph13 = nx.compose(graph_issue_skill, graph_user_expertise)
graph13 = nx.compose(graph13, graph_issue_work)
graph13 = nx.compose(graph13, graph_pair_score)

In [None]:
# user work + skill + issue_dependencies + user collabolation + project-issue + date
graph14 = nx.compose(graph_issue_skill, graph_user_expertise)
graph14 = nx.compose(graph14, graph_issue_work)
graph14 = nx.compose(graph14, graph_issue_dependency)
graph14 = nx.compose(graph14, graph_user_collab)
graph14 = nx.compose(graph14, graph_project_issue)
graph14 = nx.compose(graph14, graph_month_year_issue)
graph14 = nx.compose(graph14, graph_month_year)
graph14 = nx.compose(graph14, graph_issue_type)

## Write to file

In [None]:
with open('graph14.pickle', 'wb') as handle:
    pickle.dump(graph14, handle, protocol=pickle.HIGHEST_PROTOCOL)