## Preparation

In [100]:
%pip install gremlinpython
%pip install bokeh


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [101]:
# General imports
import numpy as np
import matplotlib
from statistics import mean
import itertools

# Graph related imports
import nest_asyncio
from gremlin_python.driver import client, serializer
import networkx as nx

# Bokeh imports
from bokeh.plotting import figure, from_networkx
from bokeh.models import Range1d, Circle, MultiLine, ColumnDataSource, LabelSet, NodesAndLinkedEdges
from bokeh.io import save
from bokeh.transform import linear_cmap
from bokeh.palettes import Spectral8

### Establish gremlin connection

In [102]:
# Necessary to avoid "RuntimeError: Cannot run the event loop while another loop is running"
nest_asyncio.apply()

In [103]:
gremlin_client = client.Client('wss://leomathesis-cosmos-gremlin.gremlin.cosmos.azure.com:443/', 'g',
                               username="/dbs/mathesisleo-database/colls/year-graph-2010",
                               password="<<password>>",
                               message_serializer=serializer.GraphSONSerializersV2d0()
                              )

## Helper Functions

In [104]:
def get_selected_property_names(prefix, e, start_year=None, end_year=None, total=False):
    assert(bool(start_year or end_year) ^ total)
    selected_property_names = []
    for prop in e['properties'].keys():
        if prop.startswith(prefix):
            if total:
                if prop.endswith('_total'):
                    selected_property_names.append(prop)
            else:
                suffix = prop.split("_")[1]
                if suffix.isnumeric():
                    year = int(suffix)
                    if (start_year is None or start_year <= year) and (end_year is None or end_year >= year):
                        selected_property_names.append(prop)
    return selected_property_names


In [105]:
def safe_add_to_list(target_list, result):
    results = result
    for element in results:
        for list_entry in target_list:
            if list_entry['id'] == element['id']:
                break
        else:
            target_list.append(element)
    return target_list

In [106]:
def is_year_relevant(start_year, end_year, valid_from, valid_to):
    for year in range(start_year, end_year + 1):
        if (valid_from is None or year >= valid_from) and (valid_to is None or year <= valid_to):
            return True
    return False

In [107]:
def transform_string_date(string_date):
    if len(string_date) > 4 and string_date[:4].isnumeric():
        return int(string_date[:4])
    else:
        return None

In [108]:
def get_weight_averages(e_list, start_year=None, end_year=None, total=False):
    assert(bool(start_year or end_year) ^ total)
    weight_list_dict = {'discussed_together': [],
                        'member_discusses': [],
                        'discusses': []}
    for e in e_list:
        if e['label'] in weight_list_dict.keys():
            weight = 0
            for weight_prop in get_selected_property_names('weight', e, start_year, end_year, total):
                weight += e['properties'][weight_prop]
            weight_list_dict[e['label']].append(weight)
    for key, value in weight_list_dict.items():
        if len(value) > 0:
            weight_list_dict[key] = mean(value)
    return weight_list_dict

In [109]:
def s_hash(x):
    return hash(x) % 10**10

## Function to collect data from gremlin queries

In [110]:
def query_graph(topic_list, 
                neighbor_search_types=[], 
                weight_treshold={}, 
                start_year=None, 
                end_year=None, 
                total=False, 
                include_polit_parties=False, 
                all_edges=False,
                edges_among_selection=False):
    assert(bool(start_year or end_year) ^ total)
    assert(len(topic_list) >= 1)
    if include_polit_parties:
        assert('discusses' in neighbor_search_types)
    v_list = []
    e_list = []
    for topic in topic_list:
        callback = gremlin_client.submit_async(f"g.V('{topic}')")
        v_list = safe_add_to_list(v_list, callback.result().all().result())
        for neighbor_search_type in neighbor_search_types:
            if total:
                callback = gremlin_client.submit_async(f"g.V('{topic}').bothE().hasLabel('{neighbor_search_type}').has('weight_total', gt({weight_treshold[neighbor_search_type]})).otherV()")
                v_list = safe_add_to_list(v_list, callback.result().all().result())
            else:
                callback = gremlin_client.submit_async(f"g.V('{topic}').bothE().hasLabel('{neighbor_search_type}').has('weight_total', gt({weight_treshold[neighbor_search_type]}))")
                results = callback.result().all().result()
                for e in results:
                    weight = 0
                    for prop in get_selected_property_names('weight', e, start_year, end_year):
                        weight += e['properties'][prop]
                    if weight >= weight_treshold[neighbor_search_type]:
                        if e['inV'] == topic:
                            new_v = e['outV']
                        else:
                            new_v = e['inV']
                        callback = gremlin_client.submit_async(f"g.V('{new_v}')")
                        v_list = safe_add_to_list(v_list, callback.result().all().result())                   
            callback = gremlin_client.submit_async(f"g.V('{topic}').bothE().hasLabel('{neighbor_search_type}').has('weight_total', gt({weight_treshold[neighbor_search_type]}))")
            if total:
                e_list = safe_add_to_list(e_list, callback.result().all().result())
            else:
                results = callback.result().all().result()
                for e in results:
                    weight = 0
                    for prop in get_selected_property_names('weight', e, start_year, end_year):
                        weight += e['properties'][prop]
                    if weight >= weight_treshold[neighbor_search_type]:
                        e_list = safe_add_to_list(e_list, [e])
    if include_polit_parties:
        for v in v_list:
            if v['properties']['type'][0]['value'] == 'politician':
                callback = gremlin_client.submit_async(f"g.V('{v['id']}').outE().hasLabel('is_member_of')")
                results = callback.result().all().result()
                for e in results:
                    if total or is_year_relevant(start_year, end_year, 
                                                 transform_string_date(e['properties']['valid_from']), transform_string_date(e['properties']['valid_to'])):
                        e_list = safe_add_to_list(e_list, [e])
                        callback = gremlin_client.submit_async(f"g.V('{e['inV']}')")
                        v_list = safe_add_to_list(v_list, callback.result().all().result())  
    if all_edges:
        all_v_ids = [v['id'] for v in v_list]
        for neighbor_search_type in neighbor_search_types:
            callback = gremlin_client.submit_async(f"g.E().hasLabel('{neighbor_search_type}').has('weight_total', gt({weight_treshold[neighbor_search_type]}))")
            all_e = callback.result().all().result()
            for e in all_e:
                if not total:
                    weight = 0
                    for prop in get_selected_property_names('weight', e, start_year, end_year):
                        weight += e['properties'][prop]
                    if weight < weight_treshold[neighbor_search_type]:
                        continue
                if e['inV'] in all_v_ids and e['outV'] in all_v_ids:
                    e_list = safe_add_to_list(e_list, [e])
    if edges_among_selection and len(topic_list) > 1:
        for selected_topic_comb in itertools.combinations(topic_list, 2):
            callback = gremlin_client.submit_async(f"g.V('{selected_topic_comb[0]}').bothE().where(otherV().hasId('{selected_topic_comb[1]}'))")
            result_e = callback.result().all().result()
            for e in result_e:
                weight = 0
                for prop in get_selected_property_names('weight', e, start_year, end_year):
                    weight += e['properties'][prop]
                if total or weight >= weight_treshold['discussed_together']:
                    e_list = safe_add_to_list(e_list, [e])
    return v_list, e_list

## Function to create a networkx graph from the gremlin data

In [111]:
def create_networkx(v_list, e_list, start_year=None, end_year=None, total=False):
    assert(bool(start_year or end_year) ^ total)
    G = nx.Graph()
    for v in v_list:
        G.add_node(s_hash(v['id']), label=v['label'], type=v['properties']['type'][0]['value'], type_color=type_colors[v['properties']['type'][0]['value']])   
    weight_averages = get_weight_averages(e_list, start_year, end_year, total)
    for e in e_list:
        weight = 3
        sentiment = 0
        if e['label'] in ('discussed_together', 'member_discusses', 'discusses'):
            weight = 0
            for weight_prop in get_selected_property_names('weight', e, start_year, end_year, total):
                weight += e['properties'][weight_prop]
            weight = (weight / weight_averages[e['label']]) * 5
        if e['label'] in ('member_discusses', 'discusses'):
            sentiments = []
            for sent_prop in get_selected_property_names('sentiment', e, start_year, end_year, total):
                sentiments.append(e['properties'][sent_prop])
            sentiment = mean(sentiments)
        G.add_edge(s_hash(e['outV']), s_hash(e['inV']), sentiment=sentiment, weight=weight)  
    return G

## Create the bokeh visualization from the networkx graph

In [112]:
type_colors = {'politician': Spectral8[0],
               'party': Spectral8[1],
               'topic': Spectral8[2]}

colors = ["red", "grey", "green"]
cmap1 = matplotlib.colors.LinearSegmentedColormap.from_list("mycmap", colors)
palette = [matplotlib.colors.rgb2hex(c) for c in cmap1(np.linspace(0.1, 0.9, 192))]

In [140]:
def save_bokeh(G, file_name="bokeh_2010.html"):
    hover_tooltips = []
    plot = figure(tooltips=hover_tooltips,
                tools="pan,wheel_zoom,save,reset", active_scroll='wheel_zoom',
                x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), width=1500, height=1000)
    plot.grid.visible = False
    plot.axis.visible = False
    network_graph = from_networkx(G, nx.spring_layout, scale=10, center=(0, 0))
    network_graph.node_renderer.glyph = Circle(size=15, fill_color='type_color')
    network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width='weight', line_color=linear_cmap('sentiment', palette, -0.4, 0.4))

    network_graph.node_renderer.hover_glyph = Circle(size=15, fill_color='type_color', line_width=2)
    network_graph.node_renderer.selection_glyph = Circle(size=15, fill_color='type_color', line_width=2)
    network_graph.edge_renderer.selection_glyph = MultiLine(line_alpha=1, line_width='weight', line_color=linear_cmap('sentiment', palette, -0.4, 0.4))
    network_graph.edge_renderer.hover_glyph = MultiLine(line_alpha=1, line_width='weight', line_color=linear_cmap('sentiment', palette, -0.4, 0.4))

    plot.renderers.append(network_graph)

    network_graph.selection_policy = NodesAndLinkedEdges()
    network_graph.inspection_policy = NodesAndLinkedEdges()

    #Add Labels
    x, y = zip(*network_graph.layout_provider.graph_layout.values())
    node_labels = list(nx.get_node_attributes(G, 'label').values())
    source = ColumnDataSource({'x': x, 'y': y, 'name': [node_labels[i] for i in range(len(x))]})
    labels = LabelSet(x='x', y='y', text='name', source=source, background_fill_color='white', text_font_size='15px', background_fill_alpha=.7)
    plot.renderers.append(labels)

    save(plot, filename=file_name)

# Execute it!

Most Frequent Topics since 2010 and the Relations among them

In [114]:
v_list, e_list = query_graph(topic_list=['topic_0', 'topic_1', 'topic_2', 'topic_3', 'topic_4', 'topic_5', 'topic_6', 'topic_7', 'topic_8', 'topic_9'],
                             #neighbor_search_types=['discussed_together', 'discusses', 'member_discusses'],
                             #weight_treshold={'discussed_together': 1, 'discusses': 100, 'member_discusses': 100},
                             neighbor_search_types=[],
                             weight_treshold={'discussed_together': 17.5},                             
                             start_year=2010, 
                             end_year=2023, 
                             include_polit_parties=False,
                             total=False,
                             all_edges=True,
                             edges_among_selection=True)
G = create_networkx(v_list=v_list, e_list=e_list, total=True)
save_bokeh(G, "bokeh.html")

  save(plot, filename=file_name)
  save(plot, filename=file_name)


ZKB and connected topics

In [115]:
v_list, e_list = query_graph(topic_list=['topic_6'],
                             #neighbor_search_types=['discussed_together', 'discusses', 'member_discusses'],
                             #weight_treshold={'discussed_together': 1, 'discusses': 100, 'member_discusses': 100},
                             neighbor_search_types=['discussed_together'],
                             weight_treshold={'discussed_together': 6.5},                             
                             start_year=2010, 
                             end_year=2023, 
                             include_polit_parties=False,
                             total=False,
                             all_edges=False,
                             edges_among_selection=True)
G = create_networkx(v_list=v_list, e_list=e_list, total=True)
save_bokeh(G, "bokeh.html")

  save(plot, filename=file_name)
  save(plot, filename=file_name)


ZKb connected to politicians

In [116]:
v_list, e_list = query_graph(topic_list=['topic_6'],
                             #neighbor_search_types=['discussed_together', 'discusses', 'member_discusses'],
                             #weight_treshold={'discussed_together': 1, 'discusses': 100, 'member_discusses': 100},
                             neighbor_search_types=['discusses'],
                             weight_treshold={'discusses': 25},                             
                             start_year=2022, 
                             end_year=2022, 
                             include_polit_parties=True,
                             total=False,
                             all_edges=False,
                             edges_among_selection=True)
G = create_networkx(v_list=v_list, e_list=e_list, total=True)
save_bokeh(G, "bokeh.html")

  save(plot, filename=file_name)
  save(plot, filename=file_name)


General topics and politicians

In [117]:
v_list, e_list = query_graph(topic_list=['topic_0', 'topic_1', 'topic_2', 'topic_3', 'topic_4'],
                             #neighbor_search_types=['discussed_together', 'discusses', 'member_discusses'],
                             #weight_treshold={'discussed_together': 1, 'discusses': 100, 'member_discusses': 100},
                             neighbor_search_types=['discusses'],
                             weight_treshold={'discusses': 150, 'discussed_together': 1},                             
                             start_year=2022, 
                             end_year=2022, 
                             include_polit_parties=False,
                             total=False,
                             all_edges=True,
                             edges_among_selection=True)
G = create_networkx(v_list=v_list, e_list=e_list, total=True)
save_bokeh(G, "bokeh.html")

  save(plot, filename=file_name)
  save(plot, filename=file_name)


ZKB conected to parties

In [118]:
v_list, e_list = query_graph(topic_list=['topic_6'],
                             #neighbor_search_types=['discussed_together', 'discusses', 'member_discusses'],
                             #weight_treshold={'discussed_together': 1, 'discusses': 100, 'member_discusses': 100},
                             neighbor_search_types=['member_discusses'],
                             weight_treshold={'member_discusses': 100},                             
                             start_year=2010, 
                             end_year=2023, 
                             include_polit_parties=False,
                             total=False,
                             all_edges=False,
                             edges_among_selection=True)
G = create_networkx(v_list=v_list, e_list=e_list, total=True)
save_bokeh(G, "bokeh.html")

  save(plot, filename=file_name)
  save(plot, filename=file_name)


Show abilities

In [155]:
v_list, e_list = query_graph(topic_list=['topic_6', 'topic_8', 'topic_12'],
                             #neighbor_search_types=['discussed_together', 'discusses', 'member_discusses'],
                             #weight_treshold={'discussed_together': 1, 'discusses': 100, 'member_discusses': 100},
                             neighbor_search_types=['discusses'],
                             weight_treshold={'discussed_together': 3, 'discusses': 150},                             
                             start_year=2020, 
                             end_year=2023, 
                             include_polit_parties=False,
                             total=False,
                             all_edges=True,
                             edges_among_selection=True)
G = create_networkx(v_list=v_list, e_list=e_list, total=True)
save_bokeh(G, "bokeh.html")

  save(plot, filename=file_name)
  save(plot, filename=file_name)
