In [None]:
# Import the required python modules.
import numpy as np
import networkx as nx
import networkx.algorithms.bipartite as bipartite
%matplotlib inline

In [None]:
# In this analysis we will load the data file (davis.dat) from the disk and parse it to create the bipartite graph.
# We will then split the graph into the two sub-graphs, one for Women and one for the Meetings.
# We will then generate metrics on the seperate graphs.

In [None]:
# Read the data file from the disk. Update the path as appropriate to your machine.
dataFile = '/Users/burton/000-Semester_06_CUNY/620_Web_Analytics/Week_06/davis.dat'
# Open the raw data file for reading.
f = open(dataFile)
# The following boolean flags are used to determine the begining of various sections of the file like ROWS, COLUMNS etc.
rowsFound = False
columnsFound = False
dataFound = False
# The following lists will store the data for the ROWS, COLUMNS and DATA, which will be used to create our graph.
rowLabels = []
columnLables = []
data = []
# We now iterate over the file to fill in the lists defined above.
for line in iter(f):
    if (("ROW LABELS:" not in line) and (not rowsFound)):
       continue
    rowsFound = True
    if (("COLUMN LABELS:" not in line) and (not columnsFound)):
        if("ROW LABELS:" not in line):
            rowLabels.append(line.strip())
        continue
    columnsFound = True
    if (("DATA:" not in line) and (not dataFound)):
        if("COLUMN LABELS:" not in line):
            columnLables.append(line.strip())
        continue
    dataFound = True
    if("DATA:" not in line):
        data.append(line.strip())
f.close()

In [None]:
# In the next few sections we will print out the contents of the ROWS, COLUMNS and DATA sections from the lists that
# we created by reading the data file.

In [None]:
# Print  the Row labels (Women)
rowLabels

In [None]:
# Print  the Column labels (Meetings)
columnLables

In [None]:
# Print the data for the relationship between the Rows(Women) and the Columns(Meetings)
data

In [None]:
# We will now create the main bipartite graph from the lists that we created above by reading the file.

In [None]:
# The following section creates the main bipartite graph by iterating through the Rows, Columns and Data.
main_graph = nx.Graph()
main_graph.add_nodes_from(rowLabels, bipartite=0)
main_graph.add_nodes_from(columnLables, bipartite=1)
for i in range(len(rowLabels)):
    data_List = data[i].split(' ')
    for  j in range(len(data_List)):
        if("1" in data_List[j]):
            main_graph.add_edge(rowLabels[i],columnLables[j])

In [None]:
# Draw the main bipartite graph.
nx.draw_spring(main_graph, with_labels=True, node_size=600)

In [None]:
# We will now separate the Meeting nodes and the Women nodes from the main bipartite graph.
women_nodes, meeting_nodes = bipartite.sets(main_graph)

In [None]:
# Print the seperated Women nodes.
list(women_nodes)

In [None]:
# We now create the Women graph from the seperated woman nodes.
women_graph = bipartite.projected_graph(main_graph, women_nodes)

In [None]:
# We will now draw the Women grpah.
nx.draw_spring(women_graph, with_labels=True, node_size=600)

In [None]:
# Following are the degree centrality vlaues for the Women.
nx.degree_centrality(women_graph)

In [None]:
# Following are the between centrality vlaues for the Women.
nx.betweenness_centrality(women_graph)

In [None]:
# Following are the closeness centrality vlaues for the Women.
nx.closeness_centrality(women_graph)

In [None]:
# We now count the number of shared contacts for each woman.
print("No. of Friends, Member")
for woman in women_nodes:
    print('%d %s' % (women_graph.degree(woman),woman))

In [None]:
# We now count the number of Friend meetings attended by each woman.
print("No. of Friend meetings, Member")
weighted_women_graph = bipartite.weighted_projected_graph(main_graph, women_nodes)
for woman in women_nodes:
    print('%d %s' % (weighted_women_graph.degree(woman,weight='weight'),woman))

In [None]:
# From the above output we can observer that the higher the number of friends that a member has, the greater the
# the number of meetings she attends. For e.g., THERESA has 57 friends and she attends one of  the highest number (17)
# of meetings, while OLIVIA has 14 friends and she just attends 12 meetings.
# From the above analysis we can see that EVELYN, THERESA, VERNE, HELEN, SYLVIA and RUTH have the highest number of
# shared contacts and they also have the higher measures of centrality that were shown in the earlier calculations 
# above. They are also the members who attend the highest number of meetings. This is what we could expect in a social
# setting.
# This is our conclusion.

In [None]:
# Below are similar metrics for the Meeting nodes. These are just provided for descriptive reasons and completion.

In [None]:
# Print the seperated Meeting nodes.
list(meeting_nodes)

In [None]:
# We now create the Meetings graph from the seperated Meeting nodes.
meeting_graph = bipartite.projected_graph(main_graph, meeting_nodes)

In [None]:
# We will now draw the Meetings grpah.
nx.draw_spring(meeting_graph, with_labels=True, node_size=600)

In [None]:
# Following are the degree centrality vlaues for the Meetings.
nx.degree_centrality(meeting_graph)

In [None]:
# Following are the betweenness centrality vlaues for the Meetings.
nx.betweenness_centrality(meeting_graph)

In [None]:
# Following are the closeness centrality vlaues for the Meetings.
nx.closeness_centrality(meeting_graph)