# 690V - MC3

#### By: Ajay Shaan Shanmugam and Siddharth Chandrasekaran

## Run instructions:

Due to bokeh's limitations we had to install an extra package called python louvian to construct the social network graph

So you will need this package installed to run this notebook.

Please use 'pip install python-louvain' 


In [1]:
__authors__ = "Ajay Shaan Shanmugam;Siddharth Chandrasekaran"
__license__ = "GPL"
__version__ = "1.0.1"
__emails__ = "ashanmugam@umass.edu;schandraseka@umass.edu"

import pandas as pd

from random import randint

import networkx as nx

from collections import namedtuple

from math import sqrt

import bokeh

from bokeh.models import HoverTool

from bokeh.plotting import show, figure

from bokeh.colors import RGB

import random

import community.community_louvain as community

from bokeh.models import ColumnDataSource,HoverTool,ColorBar,LabelSet

data = pd.read_csv('CellPhoneCallRecords.csv')



weights = data['Duration(seconds)'].tolist()


callers = data['From'].tolist()
receivers = data['To'].tolist()


edgelist = pd.DataFrame({'callers':callers,'receivers':receivers,'weights':weights})
#Normalizing for displaying
edgelist['weights'] = edgelist['weights']/100

all_nodes = list(set(callers))

for rec in list(set(receivers)):
    if rec in all_nodes:
        continue
    else:
        all_nodes.append(rec)
        


def generate_coordinates(m, n):
    seen = set()

    x, y = randint(m, n), randint(m, n)

    while True:
        seen.add((x, y))
        yield [x, y]
        x, y = randint(m, n), randint(m, n)
        while (x, y) in seen:
            x, y = randint(m, n), randint(m, n)

g = generate_coordinates(1,400)
xs = []
ys = []

for i in range(len(all_nodes)):
    xy = next(g)
    x = xy[0]
    y = xy[1]
    xs.append(x)
    ys.append(y)
nodelist = pd.DataFrame({'nodes':all_nodes,'x':xs,'y':ys})


def create_bokeh_graph(graph, clus_algo, title = None):

    

    def gen_edge_coordinates(graph, layout):

        xs = []

        ys = []

        val = namedtuple("edges", "xs ys")

        for edge in graph.edges():

            from_node = layout[edge[0]]

            to_node = layout[edge[1]]

            xs.append([from_node[0],to_node[0]])

            ys.append([from_node[1], to_node[1]])

        return val(xs=xs, ys=ys)



    def gen_node_coordinates(layout):

        names, coords = zip(*layout.items())

        xs, ys = zip(*coords)

        val = namedtuple("nodes", "names xs ys")

        return val(names=names, xs=xs, ys=ys)



    plot_layout = nx.spring_layout(graph)

    edges = graph.edges()
    
    weights = [graph[u][v]['weight'] for u,v in edges]

    nx.draw(graph, plot_layout, edges=edges, width=weights)

    _nodes = gen_node_coordinates(plot_layout)

    _edges = gen_edge_coordinates(graph, plot_layout)


    hover = HoverTool(tooltips=[('name', '@name'), 

                                ('node_id', '$index'),

                                ('degree', '@degree'),

                                ('cluster_no', '@community_nr')], names=["show_hover"])



    plot = figure(width=1000, height=1000, 

                 tools=[hover, 'box_zoom', 'resize', 'reset', 'wheel_zoom', 'pan', 'lasso_select'],

                logo = None)
    
    if title == None:
    
        plot.title.text ="Algorithm : " + clus_algo
    else:
        plot.title.text = title

    plot.toolbar.logo = None

    plot.axis.visible = False                            

    plot.xgrid.grid_line_color = None

    plot.ygrid.grid_line_color = None

    

    #Draw Edges

    source_edges = bokeh.models.ColumnDataSource(dict(xs=_edges.xs, ys=_edges.ys))

    plot.multi_line('xs', 'ys', line_color='navy', source=source_edges, alpha=0.17, line_width=weights)

    

    #best partition based on the algorithm selected

    degrees = list(nx.degree(graph).values())
    
    if clus_algo == "triangle":
        
        clustering = list(nx.triangles(graph).values())
    
    elif clus_algo == "square":
        
        clustering = list(nx.square_clustering(graph).values())
    
    else:
        
        clustering = list(nx.clustering(graph).values())
        
    best_partition = community.best_partition(graph)

    nodes, communities = zip(*best_partition.items())

    betw = list(nx.betweenness_centrality(graph).values())

    

    #Color mapper

    colormapper = {x : RGB(random.randrange(0,256),random.randrange(0,256),random.randrange(0,256)) 

                     for x in list(set(communities))}

    cluster_color_list, community_nr = zip(*[(colormapper[best_partition[node]], best_partition[node]) for node in nodes])



    

    graph_nodes = graph.number_of_nodes()

    

    colors = ['firebrick' for node in range(graph_nodes)]

    

    #Drawing circles for nodes

    source_nodes = bokeh.models.ColumnDataSource(dict(xs=_nodes.xs, ys=_nodes.ys, name=_nodes.names, 

                                                      single_color = colors,

                                                      color_by_cluster = cluster_color_list, 

                                                      degree=degrees, 

                                                      clustering=clustering, community_nr=community_nr,

                                                      betweenness = betw))

    

    r_circles = plot.circle('xs', 'ys', fill_color='single_color', line_color='single_color', 

                          source = source_nodes, alpha=0.7, size=9, name="show_hover")
    
    #plot.circle(0.538678941825779,0.586073848441823,fill_color='#000000',size=12)
    
 

    colorcallback = bokeh.models.callbacks.CustomJS(args=dict(source=source_nodes, circles=r_circles), code="""

        var value = cb_obj.get('value');

        circles.glyph.line_color.field = value;

        circles.glyph.fill_color.field = value;

        source.trigger('change')

    """)  

    

    button = bokeh.models.widgets.Select(title="Color", value="single_color", 

                                         options=["single_color", "color_by_cluster"], 

                                         callback=colorcallback)
    
    
    
    return [plot,button]
    




g = nx.Graph()

for i, elrow in edgelist.iterrows():
    g.add_edge(elrow['callers'], elrow['receivers'], weight=elrow['weights'])
    
for i, nlrow in nodelist.iterrows():
    g.node[nlrow['nodes']] = nlrow[1:].to_dict()
    
data = pd.read_csv('CellPhoneCallRecords_New.csv')
data = data[(data['From']==200) | (data['To']==200)]

weights = (data['Duration(seconds)'] * data['Frequency']).tolist()


callers = data['From'].tolist()
receivers = data['To'].tolist()


edgelist1 = pd.DataFrame({'callers':callers,'receivers':receivers,'weights':weights})
#Normalizing for displaying
edgelist1['weights'] = edgelist1['weights']/100

all_nodes = list(set(callers))

for rec in list(set(receivers)):
    if rec in all_nodes:
        continue
    else:
        all_nodes.append(rec)
        


def generate_coordinates(m, n):
    seen = set()

    x, y = randint(m, n), randint(m, n)

    while True:
        seen.add((x, y))
        yield [x, y]
        x, y = randint(m, n), randint(m, n)
        while (x, y) in seen:
            x, y = randint(m, n), randint(m, n)

g3 = generate_coordinates(1,400)
xs = []
ys = []

for i in range(len(all_nodes)):
    xy = next(g3)
    x = xy[0]
    y = xy[1]
    xs.append(x)
    ys.append(y)
nodelist1 = pd.DataFrame({'nodes':all_nodes,'x':xs,'y':ys})

g1 = nx.Graph()

for i, elrow in edgelist1.iterrows():
    g1.add_edge(elrow['callers'], elrow['receivers'], weight=elrow['weights'])
    
for i, nlrow in nodelist1.iterrows():
    g1.node[nlrow['nodes']] = nlrow[1:].to_dict()

layout_plot = []
layout_plot.append(create_bokeh_graph(g, "triangle"))
layout_plot.append(create_bokeh_graph(g, "square"))
layout_plot.append(create_bokeh_graph(g, "clustering"))
layout_plot1 = []
layout_plot1.append(create_bokeh_graph(g1, "clustering", "Visualization of Ferdinando's connections"))
    #Create grid and save

data = pd.read_csv('CellPhoneCallRecords.csv')
data = data[(data['From']==137)| (data['To']==137)| (data['From']==2)| (data['To']==2)| (data['From']==3)| (data['To']==3)]

weights = (data['Duration(seconds)'] ).tolist()


callers = data['From'].tolist()
receivers = data['To'].tolist()


edgelist1 = pd.DataFrame({'callers':callers,'receivers':receivers,'weights':weights})
#Normalizing for displaying
edgelist1['weights'] = edgelist1['weights']/100

all_nodes = list(set(callers))

for rec in list(set(receivers)):
    if rec in all_nodes:
        continue
    else:
        all_nodes.append(rec)
        


def generate_coordinates(m, n):
    seen = set()

    x, y = randint(m, n), randint(m, n)

    while True:
        seen.add((x, y))
        yield [x, y]
        x, y = randint(m, n), randint(m, n)
        while (x, y) in seen:
            x, y = randint(m, n), randint(m, n)

g3 = generate_coordinates(1,400)
xs = []
ys = []

for i in range(len(all_nodes)):
    xy = next(g3)
    x = xy[0]
    y = xy[1]
    xs.append(x)
    ys.append(y)
nodelist1 = pd.DataFrame({'nodes':all_nodes,'x':xs,'y':ys})

g1 = nx.Graph()

for i, elrow in edgelist1.iterrows():
    g1.add_edge(elrow['callers'], elrow['receivers'], weight=elrow['weights'])
    
for i, nlrow in nodelist1.iterrows():
    g1.node[nlrow['nodes']] = nlrow[1:].to_dict()

layout_plot1.append(create_bokeh_graph(g1, "clustering", "Visualization of David Vidro, Juan Vidro and Jorge Vidro's connections"))

layout_plot = bokeh.layouts.gridplot(layout_plot+layout_plot1)
show(layout_plot)

    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  b = plt.ishold()
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  plt.hold(b)
  warn(message)
Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)
  warn(message)
Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)
  warn(message)
Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)
  warn(message)
Supplying a user-defined data source AND iterable values to glyph methods is 

## Inferences:

#### The fourth plot delineates Ferdinando's connections based on the call log dataset. Here, the weight of an edge is determined by both the frequency of calls made between the nodes connecting the edge and the duration of each call made. This gives the best representation of the magnitude of talktime between our nodes of interest.

#### Given that identifier 200 is Ferdinando and based on the provided hint that Ferdinando talks to his brother Estaban most frequently, we identify node 5 (The node connected to node 200 by the thickest edge) to be Estaban.

#### Another clue that's been provided to us is that close relatives and associates that Ferdinando would be calling include David Vidro, Juan Vidro and Jorge Vidro, in addition to his brother Estaban. Therefore, out of the other 5 nodes, the nodes connected to Ferdinando by the next 3 thickest edges would be David Vidro, Juan Vidro and Jorge Vidro.

#### The final hint that we've been given is that David organizes most of the Paraiso activities. This gives us a strong reason to believe that he must have the most number of connections among the three. Therefore, we plot a new graph (5th graph) outlining all connections of David, Juan and Jorge. From this, we infer that node 2 has the highest degree (the most number of connections) and therefore we identify him as the mastermind behind the Paraiso activities, David!

#### As for distinguishing between Juan and Jorge, we don't have enough information in the problem statement or in the dataset to make a judgement on their identities.

#### Another interesting inference from the fifth graph is that nodes 0, 1, 34, 27 are mutual connections of David and Juan/Jorge. Since they're confirmed to be David's associates, we can infer that Juan/Jorge, David, Estaban, and the aforementioned nodes are definitely a part of the Paraiso activities.
