In [None]:
import pandas as pd
import pickle
import networkx as nx
import os.path

In [None]:
# SA2-141E
# cache_file = 'TDocsByAgenda_TSGS2_141e_Electronic_745f0abc6d641cb65afd5d8aef4476a7.pickle'
# SA2-137E
cache_file = 'TDocsByAgenda_TSGS2_137e_Electronic_f758bcb52ead1c245c3797d880ae986c.pickle'

In [None]:
output_file_prefix = cache_file.split('.')[0]
with open(cache_file, 'rb') as f:
    # Unpickle the 'data' dictionary using the highest protocol available.
    cache               = pickle.load(f)
    tdocs               = cache['tdocs']
    contributor_columns = cache['contributor_columns']
    others_cosigners    = cache['others_cosigners']

In [None]:
# Count sources per TDoc
tdocs['Source count'] = tdocs['Source (summary)'].apply(lambda x: len(x.split(',')))

In [None]:
contributors = [e.replace('Contributed by ','') for e in contributor_columns]
contributors.append('Others')
pd.options.display.max_columns = None
pd.options.display.max_rows    = 50
pd.options.display.min_rows    = 7
display(tdocs)

In [None]:
# Make the diagram simpler by plotting only companies
contributors_to_ignore_in_diagram = [
    'RAN WG1',
    'RAN WG2',
    'RAN WG3',
    'SA WG1',
    'SA WG2',
    'SA WG3',
    'SA WG4',
    'SA WG5',
    'SA WG6',
    'TSG SA',
    'TSG CT',
    'TSG RAN',
    'CT WG1',
    'CT WG2',
    'CT WG3',
    'CT WG4',
    'IETF',
    'IEEE',
    'BBF',
    'GSMA'
]

In [None]:
tdocs_with_multiple_sources = tdocs[tdocs['Source count']>1]

G = nx.Graph()
contributors_in_meeting = []
nodes = {}
edge_sizes = {}
for idx,contributor in enumerate(contributors):
    # Each contributor is a node
    contributor_size = tdocs_with_multiple_sources[(tdocs_with_multiple_sources['Contributed by {0}'.format(contributor)]==True)].count()['AI']
    if contributor_size > 0:
        contributors_in_meeting.append(contributor)
        nodes[contributor] = contributor_size
        G.add_node(contributor, size=contributor_size)
        
print('{0} contributors counted in this meeting'.format(len(contributors_in_meeting)))
contributors_in_meeting = [e for e in contributors_in_meeting if e not in contributors_to_ignore_in_diagram]
print('{0} contributors counted in this meeting after filtering non-company entities'.format(len(contributors_in_meeting)))
        
# Generate co-signing matrix
# Initialize N (row) x M (columns)
N = len(contributors_in_meeting)
M = N
 
# using list comprehension 
# to initializing matrix
co_sign_numbers = [ [ 0 for i in range(M) ] for j in range(N) ]
 
# printing result 
print("Relation matrix after initializing {0}x{1}: ".format(N,M))

total_contributors = len(contributors_in_meeting)
for idx_1,contributor_1 in enumerate(contributors_in_meeting):
    # print('{0}: Checking {1}'.format(idx_1, contributor_1))
    for idx_2,contributor_2 in enumerate(contributors_in_meeting[idx_1+1:total_contributors]):
        tdoc_count = int(tdocs_with_multiple_sources[(tdocs_with_multiple_sources['Contributed by {0}'.format(contributor_1)]==True) & (tdocs_with_multiple_sources['Contributed by {0}'.format(contributor_2)]==True)].count()['AI'])
        if tdoc_count > 0:
            edge_sizes['{0}-{1}'.format(contributor_1,contributor_2)] = tdoc_count
            G.add_edge(contributor_1, contributor_2, weight=tdoc_count)
            full_idx2 = idx_1+1+idx_2
            if idx_1 != full_idx2:
                co_sign_numbers[idx_1][full_idx2] = tdoc_count
                co_sign_numbers[full_idx2][idx_1] = tdoc_count
                # print('  and {0} ({1}): {2}'.format(contributor_2, contributors_in_meeting[full_idx2], tdoc_count))
                
print("Contributors in meeting ({0})".format(len(contributors_in_meeting)))
print(contributors_in_meeting)
# display(pd.DataFrame(co_sign_numbers))

In [None]:
# Wwill not work if Chor is called before, so let's put Chord at the end
import plotly.express as px
# import plotly.io as pio
# pio.renderers.default='notebook'
fig = px.histogram(
    tdocs, 
    x="Source count")
fig.show()

# fig.write_html( output_file_prefix + "source count.html")
# fig.write_image(output_file_prefix + "source count.png")

In [None]:
tdocs_sorted_by_source_count = tdocs.sort_values(by ='Source count' , ascending=False, inplace=False)
tdocs_to_display = tdocs_sorted_by_source_count.iloc[0:21,:]
tdocs_to_display = tdocs_to_display.loc[:,['AI', 'Type', 'Doc For', 'Title', 'Source', 'Source count']]
display(tdocs_to_display)

In [None]:
# Call Chord after all plotly calls. If not, it breaks. Maybe some Jupyter Lab/Notebook incompatibility
from chord import Chord
chord_diagram = Chord(
    co_sign_numbers,
    names=contributors_in_meeting,
    wrap_labels=False,
    margin=50,
    font_size_large="10px",
    noun="contributions",
    padding=0.03)
chord_diagram.show()
chord_diagram.to_html(filename='{0}_Co-signing.html'.format(output_file_prefix))

In [None]:
# Generate graph file to be used with Cytoscape
[cache_folder, picke_file] = os.path.split(cache_file)
graphml_path = os.path.join(cache_folder, '{0}.graphml'.format(output_file_prefix))
nx.write_graphml(G, graphml_path)
display(graphml_path)

In [None]:
display(tdocs.Result.unique())
display(tdocs.Type.unique())

In [None]:
# We will show for the following document types:
#  - P-CR
#  - CR
#  - LS In
#  - LS OUT
# For each of the categories, a document can end in either Merged/Revised, Withdrawn, Unhandled, Approved/Agreed
# Left we have initial documents, middle are merged/revisions (some may be repeated, just a sum of what was revised/merged from the left part), left final documents

In [None]:
# Just a check as I found out that CATT and ATT's regex are clashing
# tdocs[(tdocs['Contributed by AT&T']==True) & (tdocs['Contributed by CATT']==True)]
# check_1 = 'XXX'
# check_2 = 'XXX'
# tdocs[(tdocs['Contributed by {0}'.format(check_1)]==True) & (tdocs['Contributed by {0}'.format(check_2)]==True)]