# Data Viz Project

## Importing data


In [1]:
# Basic data manipulation and visualisation libraries
import seaborn as sns
import pandas as pd
import plotly.express as px
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot
%pylab

#network libraries
import networkx as nx

#dash libraries
from jupyter_dash import JupyterDash
from dash import Dash, html, dcc, Input, Output

Using matplotlib backend: <object object at 0x000001E00E477CB0>
Populating the interactive namespace from numpy and matplotlib


Network data

In [3]:
matrix = np.genfromtxt(r".\budapest_edge_list.csv", dtype=int, delimiter=';')

#creating graph
G = nx.Graph()

#creo nodes
nodes=[]
for i in range(0,84):
    nodes.append(i)

G.add_nodes_from(nodes)

#creating links
G.add_edges_from(matrix)

# Removing self-loops
G.remove_edges_from(list(nx.selfloop_edges(G)))

print(G.number_of_nodes())
print(G.number_of_edges())
print(nx.density(G))
degree_list=list(G.degree())
deg=array(degree_list)

84
265
0.07601835915088927


Calculate nodes clustering coefficients

In [6]:
clustering=[]
for i in range (0,G.number_of_nodes()):
    c=nx.clustering(G,i)
    clustering.append(c)
df_clustering=pd.DataFrame(clustering, columns= ['clustering'])
df_clustering.head()

Unnamed: 0,clustering
0,0.0
1,0.733333
2,0.0
3,0.0
4,1.0


DataFrame of Degree

In [7]:
df_deg=pd.DataFrame(deg)
df_deg_sorted=df_deg.sort_values(by=0)
df_deg_sorted.columns=['number','degree']
df_deg_sorted.reset_index(drop=True,inplace=True)
df_deg_sorted.head()

Unnamed: 0,number,degree
0,0,0
1,1,6
2,2,0
3,3,0
4,4,4


Position Dataframe

Import nodes positions

In [8]:
df_pos= pd.read_csv(r".\DKT_positions_new.txt", delimiter=',', dtype=float, header=None, names=['x','y','z'])
df_pos.head()

Unnamed: 0,x,y,z
0,-0.154947,-18.325225,18.71443
1,23.352113,35.802817,-17.323944
2,41.238095,46.857143,-13.047619
3,7.384615,65.846154,-12.153846
4,5.421053,43.157895,-14.578947


Centrality dataframe

In [9]:
closeness =list(nx.closeness_centrality(G).values())
df_closeness=pd.DataFrame(closeness)
df_closeness.columns=['closeness']
df_closeness.head()

Unnamed: 0,closeness
0,0.0
1,0.319675
2,0.0
3,0.0
4,0.315214


Betweenness dataframe

In [11]:
betweenness = list(nx.betweenness_centrality(G, weight='distance', normalized=True).values())
df_betweenness=pd.DataFrame(betweenness)
df_betweenness.columns=['betweenness']
df_betweenness.head()

Unnamed: 0,betweenness
0,0.0
1,0.003366
2,0.0
3,0.0
4,0.0


Community dataframe

In [13]:
import community.community_louvain as community_louvain
parts = community_louvain.best_partition(G)

#Creiamo un dataframe di tutti i nodi ordinati per comunità 
parts_sorted= sorted(parts.items())
type(parts_sorted)
df_partition = pd.DataFrame(parts_sorted, columns=['node name','#partition'])
df_partition.head()

Unnamed: 0,node name,#partition
0,0,0
1,1,4
2,2,2
3,3,3
4,4,4


In [14]:
df_data = pd.concat([df_deg_sorted, df_pos,df_closeness,df_betweenness,df_partition['#partition'],df_clustering], axis=1)
df_data.head()

Unnamed: 0,number,degree,x,y,z,closeness,betweenness,#partition,clustering
0,0,0,-0.154947,-18.325225,18.71443,0.0,0.0,0,0.0
1,1,6,23.352113,35.802817,-17.323944,0.319675,0.003366,4,0.733333
2,2,0,41.238095,46.857143,-13.047619,0.0,0.0,2,0.0
3,3,0,7.384615,65.846154,-12.153846,0.0,0.0,3,0.0
4,4,4,5.421053,43.157895,-14.578947,0.315214,0.0,4,1.0


DataFrame Edges

Import node links

In [16]:
df_edges= pd.read_csv(r".\edges_and_names_and_etc.csv", delimiter=';')
df_edges.head()

Unnamed: 0,parent,child,name_parent,name_child,consensus,weight
0,1,37,ctx_rh_lateralorbitofrontal,Right_Putamen,220,5.000.000
1,1,37,ctx_rh_lateralorbitofrontal,Right_Putamen,386,12.000.000
2,1,5,ctx_rh_lateralorbitofrontal,ctx_rh_parstriangularis,210,2.000.000
3,1,7,ctx_rh_lateralorbitofrontal,ctx_rh_rostralmiddlefrontal,209,2.000.000
4,1,36,ctx_rh_lateralorbitofrontal,Right_Caudate,210,3.000.000


Create nodes names (that we have) uniting names from from parents and childs nodes

In [17]:
df_edge_names=df_edges[['parent','name_parent']].drop_duplicates()
df_edge_names.columns=['node','name']
df_edge_names.head()

Unnamed: 0,node,name
0,1,ctx_rh_lateralorbitofrontal
35,4,ctx_rh_medialorbitofrontal
46,5,ctx_rh_parstriangularis
55,6,ctx_rh_parsopercularis
67,7,ctx_rh_rostralmiddlefrontal


In [18]:
df_edge_names2=df_edges[['child','name_child']].drop_duplicates()
df_edge_names2.columns=['node','name']
df_edge_names2.head()

Unnamed: 0,node,name
0,37,Right_Putamen
2,5,ctx_rh_parstriangularis
3,7,ctx_rh_rostralmiddlefrontal
4,36,Right_Caudate
6,1,ctx_rh_lateralorbitofrontal


In [19]:
df_edge_names_tot=pd.concat([df_edge_names,df_edge_names2])
df_edge_names_tot=df_edge_names_tot.drop_duplicates()
df_edge_names_tot.sort_values(by=['node'],inplace=True)
df_edge_names_tot.reset_index(drop=True,inplace=True)

df_edge_names_tot.set_index(df_edge_names_tot['node'], inplace=True)
df_edge_names_tot=df_edge_names_tot.reindex(nodes)
df_edge_names_tot.head()


Unnamed: 0_level_0,node,name
node,Unnamed: 1_level_1,Unnamed: 2_level_1
0,,
1,1.0,ctx_rh_lateralorbitofrontal
2,,
3,,
4,4.0,ctx_rh_medialorbitofrontal


Total dataframe of the nodes

In [20]:
df_data = pd.concat([df_deg_sorted, df_pos,df_closeness,df_betweenness,df_edge_names_tot['name'],df_partition['#partition'],df_clustering], axis=1)
df_data.head()

Unnamed: 0,number,degree,x,y,z,closeness,betweenness,name,#partition,clustering
0,0,0,-0.154947,-18.325225,18.71443,0.0,0.0,,0,0.0
1,1,6,23.352113,35.802817,-17.323944,0.319675,0.003366,ctx_rh_lateralorbitofrontal,4,0.733333
2,2,0,41.238095,46.857143,-13.047619,0.0,0.0,,2,0.0
3,3,0,7.384615,65.846154,-12.153846,0.0,0.0,,3,0.0
4,4,4,5.421053,43.157895,-14.578947,0.315214,0.0,ctx_rh_medialorbitofrontal,4,1.0


Calculate edge betweenness

In [22]:
edge_betweenness = nx.edge_betweenness_centrality(G) #Dictionary of edges with betweenness centrality as the value.
edge_betweenness.keys()
edge_max_bet = max(edge_betweenness, key=edge_betweenness.get)
max_bet = max(edge_betweenness.values())
max_bet

0.07864293949750026

In [27]:
df_edge_bet = (pd.DataFrame.from_dict(edge_betweenness, orient='index')
                  .rename(columns=lambda x: x+1)
                  .rename_axis('node')
                  .add_prefix('edge betweenness')
                  .reset_index())
                  
df_edge_bet[['node1', 'node2']] = pd.DataFrame(df_edge_bet['node'].tolist(), index=df_edge_bet.index)
df_edge_bet.sort_values(by=['node1'], inplace=True)
df_edge_bet.head()

Unnamed: 0,node,edge betweenness1,node1,node2
0,"(1, 37)",0.004289,1,37
1,"(1, 5)",0.003472,1,5
2,"(1, 7)",0.000359,1,7
3,"(1, 36)",0.01841,1,36
4,"(1, 8)",0.001074,1,8


Let's create a datagrame with all edges data and a column with link positions

In [28]:
df_edges

Unnamed: 0,parent,child,name_parent,name_child,consensus,weight
0,1,37,ctx_rh_lateralorbitofrontal,Right_Putamen,220,5.000.000
1,1,37,ctx_rh_lateralorbitofrontal,Right_Putamen,386,12.000.000
2,1,5,ctx_rh_lateralorbitofrontal,ctx_rh_parstriangularis,210,2.000.000
3,1,7,ctx_rh_lateralorbitofrontal,ctx_rh_rostralmiddlefrontal,209,2.000.000
4,1,36,ctx_rh_lateralorbitofrontal,Right_Caudate,210,3.000.000
...,...,...,...,...,...,...
995,81,81,Left_Hippocampus,Left_Hippocampus,418,16.000.000
996,81,82,Left_Hippocampus,Left_Amygdala,367,4.000.000
997,81,83,Left_Hippocampus,Brain_Stem,258,2.000.000
998,82,82,Left_Amygdala,Left_Amygdala,260,2.000.000


eliminating self loops and duplicates

In [29]:
df_edges_refined=df_edges.drop_duplicates(subset=['parent','child'])
cancel=[]
for index, row in df_edges_refined.iterrows():

    if row['parent']==row['child']:
        df_edges_refined.drop(index=index, inplace=True, axis=0)

df_edges_refined.sort_values(by=['child'],inplace=True)
df_edges_refined.reset_index(drop=True,inplace=True)
df_edges_refined.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,parent,child,name_parent,name_child,consensus,weight
0,1,4,ctx_rh_lateralorbitofrontal,ctx_rh_medialorbitofrontal,274,2.000.000
1,1,5,ctx_rh_lateralorbitofrontal,ctx_rh_parstriangularis,210,2.000.000
2,1,7,ctx_rh_lateralorbitofrontal,ctx_rh_rostralmiddlefrontal,209,2.000.000
3,4,7,ctx_rh_medialorbitofrontal,ctx_rh_rostralmiddlefrontal,230,2.000.000
4,5,7,ctx_rh_parstriangularis,ctx_rh_rostralmiddlefrontal,335,6.000.000


In [30]:
np_edges=df_edges_refined[["parent", "child"]].to_numpy()

Let's create a list of tuples for each coordinate, they'll show links positions

In [32]:
x_edge=[]
y_edge=[]
z_edge=[]


for j in range(0,len(np_edges)):
    x_edge+=[df_pos['x'][np_edges[j][0]],df_pos['x'][np_edges[j][1]], None]
    y_edge+=[df_pos['y'][np_edges[j][0]],df_pos['y'][np_edges[j][1]], None]
    z_edge+=[df_pos['z'][np_edges[j][0]],df_pos['z'][np_edges[j][1]], None]
#len(x_edge)

Let's create a matrix in which each row has the 3 positions of each link

In [33]:
X_pos_matrix=[]
ind=[]
y=0
while y <= (len(x_edge)-3):

    for q in range(0,3):
        ind.append(x_edge[y])
        y=y+1
    X_pos_matrix.append(ind)
    ind=[]

In [34]:
x_pos_df=pd.DataFrame(X_pos_matrix)
x_pos_df.columns=['x1','x2','x3']
x_pos_df.head()

Unnamed: 0,x1,x2,x3
0,23.352113,5.421053,
1,23.352113,47.873684,
2,23.352113,31.959596,
3,5.421053,31.959596,
4,47.873684,31.959596,


In [35]:
Y_pos_matrix=[]
ind=[]
y=0
while y <= (len(y_edge)-3):

    for q in range(0,3):
        ind.append(y_edge[y])
        y=y+1
    Y_pos_matrix.append(ind)
    ind=[]

In [36]:
y_pos_df=pd.DataFrame(Y_pos_matrix)
y_pos_df.columns=['y1','y2','y3']
y_pos_df.head()

Unnamed: 0,y1,y2,y3
0,35.802817,43.157895,
1,35.802817,35.494737,
2,35.802817,49.96633,
3,43.157895,49.96633,
4,35.494737,49.96633,


In [37]:
Z_pos_matrix=[]
ind=[]
y=0
while y <= (len(z_edge)-3):

    for q in range(0,3):
        ind.append(z_edge[y])
        y=y+1
    Z_pos_matrix.append(ind)
    ind=[]

In [38]:
z_pos_df=pd.DataFrame(Z_pos_matrix)
z_pos_df.columns=['z1','z2','z3']
z_pos_df.head()

Unnamed: 0,z1,z2,z3
0,-17.323944,-14.578947,
1,-17.323944,2.505263,
2,-17.323944,16.464646,
3,-14.578947,16.464646,
4,2.505263,16.464646,


In [39]:
df_data_edges= pd.concat([df_edges_refined,x_pos_df,y_pos_df,z_pos_df,df_edge_bet['edge betweenness1']], axis=1)
df_data_edges.head()

Unnamed: 0,parent,child,name_parent,name_child,consensus,weight,x1,x2,x3,y1,y2,y3,z1,z2,z3,edge betweenness1
0,1,4,ctx_rh_lateralorbitofrontal,ctx_rh_medialorbitofrontal,274,2.000.000,23.352113,5.421053,,35.802817,43.157895,,-17.323944,-14.578947,,0.004289
1,1,5,ctx_rh_lateralorbitofrontal,ctx_rh_parstriangularis,210,2.000.000,23.352113,47.873684,,35.802817,35.494737,,-17.323944,2.505263,,0.003472
2,1,7,ctx_rh_lateralorbitofrontal,ctx_rh_rostralmiddlefrontal,209,2.000.000,23.352113,31.959596,,35.802817,49.96633,,-17.323944,16.464646,,0.000359
3,4,7,ctx_rh_medialorbitofrontal,ctx_rh_rostralmiddlefrontal,230,2.000.000,5.421053,31.959596,,43.157895,49.96633,,-14.578947,16.464646,,0.01841
4,5,7,ctx_rh_parstriangularis,ctx_rh_rostralmiddlefrontal,335,6.000.000,47.873684,31.959596,,35.494737,49.96633,,2.505263,16.464646,,0.001074


Actual plot of the brain graph, using some style and plotting the graph as a 3d scatterplot

In [40]:

external_stylesheets = ['./dark-theme.css']
#necessary to run the thing
app = JupyterDash(__name__)

#creating the figure data
x_edge=df_data_edges[['x1','x2','x3']].to_numpy().flatten().tolist()
y_edge=df_data_edges[['y1','y2','y3']].to_numpy().flatten().tolist()
z_edge=df_data_edges[['z1','z2','z3']].to_numpy().flatten().tolist()

#setting the color of the edges
color_edges=list(df_data_edges['edge betweenness1'].to_numpy())
color_edges_list=[]
for i in range(len(color_edges)):
    color_edges_list.append(color_edges[i])
    color_edges_list.append(color_edges[i])
    color_edges_list.append(color_edges[i])
color_edges_list

#edges

trace1=go.Scatter3d(x=x_edge, y=y_edge, z=z_edge, mode='lines',showlegend=False, line=dict(width=1),hoverinfo='none')

#nodes
trace2=go.Scatter3d(x=df_data['x'],
                    y=df_data['y'], 
                    z=df_data['z'],
                    customdata=
                        "name: " + df_data['name']+ 
                        "<br>degree: " + df_data['degree'].apply(str)+
                        "<br>clustering: " + df_data['clustering'].apply(str)+
                        "<br>closeness: "+ df_data['closeness'].apply(str)+
                        "<br>betweenness: "+ df_data['betweenness'].apply(str)+
                        "<br>partition: "+ df_data['#partition'].apply(str),
                    hovertemplate ='<br>%{customdata}</br><extra></extra>',
                    mode='markers+text',
                    showlegend=False, 
                    marker=dict(color=df_data['degree'],
                        autocolorscale= False,
                        symbol='circle', 
                        size=6, 
                        colorscale='inferno',
                        cauto=False,cmin=0,cmax=20,
                        showscale=True))

data=[trace1, trace2]
fig=go.FigureWidget(data=data)

fig.update_layout(uirevision="Don't change",height=700, template= 'plotly_dark')

#creating the webpage
app.layout = html.Div(children=[
    #insert H1 element, children keyword not necessary
    html.H1(children='Brain Graph'),
    html.P(children='''In this graph is depicted the brain network: every node is the center of a brain area, and every link represents the anatomical connection between two areas.'''),
    html.P('Hovering the nodes reveal the information regarding that specific brain area.'),
    html.P('The radio buttons allow to select which properties to show in the node colors, while the slider allows selecting a specific range of elements depending on the button selected.'),


    #insert dcc component, they are more complex than html components, like the plotly graph
    dcc.Graph(
        id='graph',
        figure=fig,
        config=dict(displaylogo=False,
        modeBarButtonsToAdd=['eraseshape','hoverClosest3d']
        )
    ),
    dcc.RangeSlider(0, df_data['degree'].max(), df_data['degree'].max()/10., value=[0, df_data['degree'].max()], id='slider'
    ),
    dcc.RadioItems(id='radio',
                 options=['Degree','Clustering', 'Closeness','Betweenness','Community','Link Betweenness'],
                 value= 'Degree'
    )
],style={})

@app.callback(
    Output('graph', 'figure'),
    Output('slider','max'),
    Output('slider','step'),
    Input('radio', 'value'),
    Input('slider','value'))
def update_figure(button_value,slider_value):
    #important! coping variables, not changing them directly

    #create dataframe that contains  only nodes with degree inside slider values

    df_filtered=df_data
    df_filtered_edges=df_data_edges
    max_slider=1
    step_slider=1
    value_slider=[0, df_data['degree'].max()]
    fig.update_layout(uirevision="Don't change")
    fig.data[0].update(line_color='grey',line_width=1,line_showscale=False)

    if button_value=='Clustering':
        df_filtered=df_data[df_data['clustering'].between(slider_value[0], slider_value[1])]
        max_slider=round(df_data['clustering'].max(),3)
        step_slider=round(df_data['clustering'].max()/10. , 3)
        df_filtered=df_data[df_data['clustering'].between(slider_value[0], slider_value[1])]
        
        fig.data[1].update(marker_cmin=0,marker_cmax=max_slider,
                         marker_colorscale='earth',marker_cauto=False, 
                         marker_color=df_filtered['clustering'],
                         marker_showscale=True
                         )

    elif button_value=='Closeness':
        df_filtered=df_data[df_data['closeness'].between(slider_value[0], slider_value[1])]
        max_slider=round(df_data['closeness'].max(),3)
        step_slider=round(df_data['closeness'].max()/10. , 3)
        df_filtered=df_data[df_data['closeness'].between(slider_value[0], slider_value[1])]
        
        fig.data[1].update(marker_cmin=0,marker_cmax=max_slider,
                         marker_colorscale='viridis',marker_cauto=False, 
                         marker_color=df_filtered['closeness'],
                         marker_showscale=True
                         )

    elif button_value=='Betweenness':
        max_slider=round(df_data['betweenness'].max(),4)
        step_slider=round(df_data['betweenness'].max()/10.,4)
        df_filtered=df_data[df_data['betweenness'].between(slider_value[0], slider_value[1])]
        fig.data[1].update(marker_cmin=0,marker_cmax=max_slider, marker_colorscale='earth',marker_cauto=False, marker_color=df_filtered['betweenness'],marker_showscale=True)
    
    elif button_value=='Community':
        
        max_slider=round(df_data['#partition'].max(),4)
        step_slider=round(df_data['#partition'].max()/10.,4)
        df_filtered=df_data[df_data['#partition'].between(slider_value[0], slider_value[1])]
        fig.data[1].update(marker_colorscale='jet',marker_cauto=True, marker_color=df_data['#partition'],marker_showscale=True)
   
    elif button_value=='Link Betweenness':
        #set slider values
        max_slider=round(df_data_edges['edge betweenness1'].max(),4)
        step_slider=round(df_data_edges['edge betweenness1'].max()/10.,4)
        df_filtered_edges=df_data_edges[df_data_edges['edge betweenness1'].between(slider_value[0], slider_value[1])]
        #set new filtered data
        x_edge_ft=df_filtered_edges[['x1','x2','x3']].to_numpy().flatten().tolist()
        y_edge_ft=df_filtered_edges[['y1','y2','y3']].to_numpy().flatten().tolist()
        z_edge_ft=df_filtered_edges[['z1','z2','z3']].to_numpy().flatten().tolist()

        #setting the color of the edges
        color_edges_ft=list(df_filtered_edges['edge betweenness1'].to_numpy())
        color_edges_list_ft=[]
        for i in range(len(color_edges_ft)):
            color_edges_list_ft.append(color_edges_ft[i])
            color_edges_list_ft.append(color_edges_ft[i])
            color_edges_list_ft.append(color_edges_ft[i])

        fig.data[1].update(marker_color='grey', marker_size=5,marker_showscale=False)
        fig.data[0].update(x=x_edge_ft, y=y_edge_ft, z=z_edge_ft,
                            line_cmin=0,
                            line_cmax=max_slider,
                            line_color=color_edges_list_ft, line_colorscale='jet',line_width=3,line_showscale=True)

    else: 
        max_slider=df_data['degree'].max()
        step_slider=df_data['degree'].max()/10.
        df_filtered=df_data[df_data['degree'].between(slider_value[0], slider_value[1])]

        fig.data[1].update(marker_cmin=0,marker_cmax=max_slider, marker_colorscale='solar',marker_cauto=False, marker_color=df_filtered['degree'],marker_showscale=True)


    #the _ autcomatically access the dictionary in "marker"
    fig.data[1].update(x=df_filtered['x'], y=df_filtered['y'], z=df_filtered['z'],
                        customdata= (
                        "name: " + df_filtered['name']+ 
                        "<br>degree: " + df_filtered['degree'].apply(str)+
                        "<br>clustering: " + df_filtered['clustering'].apply(str)+
                        "<br>closeness: "+df_filtered['closeness'].apply(str)+
                        "<br>betweenness: "+ df_filtered['betweenness'].apply(str)+
                        "<br>partition: "+ df_filtered['#partition'].apply(str)
                        ))

    fig.update_layout(clickmode='event+select')
    return fig,max_slider,step_slider

#sort of like calling the main method
if __name__ == '__main__':
    #inline makes the graph appear inside the notebook
    #app.run_server(mode="inline", debug=True)
    app.run_server(port=8080)

Dash app running on http://127.0.0.1:8080/
