---

_You are currently looking at **version 1.1** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-social-network-analysis/resources/yPcBs) course resource._

---

# Assignment 1 - Creating and Manipulating Graphs

Eight employees at a small company were asked to choose 3 movies that they would most enjoy watching for the upcoming company movie night. These choices are stored in the file `Employee_Movie_Choices.txt`.

A second file, `Employee_Relationships.txt`, has data on the relationships between different coworkers. 

The relationship score has value of `-100` (Enemies) to `+100` (Best Friends). A value of zero means the two employees haven't interacted or are indifferent.

Both files are tab delimited.

In [42]:
import networkx as nx
import pandas as pd
import numpy as np
from networkx.algorithms import bipartite


# This is the set of employees
employees = set(['Pablo',
                 'Lee',
                 'Georgia',
                 'Vincent',
                 'Andy',
                 'Frida',
                 'Joan',
                 'Claude'])

# This is the set of movies
movies = set(['The Shawshank Redemption',
              'Forrest Gump',
              'The Matrix',
              'Anaconda',
              'The Social Network',
              'The Godfather',
              'Monty Python and the Holy Grail',
              'Snakes on a Plane',
              'Kung Fu Panda',
              'The Dark Knight',
              'Mean Girls'])


# you can use the following function to plot graphs
# make sure to comment it out before submitting to the autograder
def plot_graph(G, weight_name=None):
    '''
    G: a networkx G
    weight_name: name of the attribute for plotting edge weights (if G is weighted)
    '''
    %matplotlib notebook
    import matplotlib.pyplot as plt
    
    plt.figure()
    pos = nx.spring_layout(G)
    edges = G.edges()
    weights = None
    
    if weight_name:
        weights = [int(G[u][v][weight_name]) for u,v in edges]
        labels = nx.get_edge_attributes(G,weight_name)
        nx.draw_networkx_edge_labels(G,pos,edge_labels=labels)
        nx.draw_networkx(G, pos, edges=edges, width=weights);
    else:
        nx.draw_networkx(G, pos, edges=edges);

In [43]:
import pandas as pd

m = pd.read_csv('Employee_Movie_Choices.txt', sep = '\t')
m.head()

Unnamed: 0,#Employee,Movie
0,Andy,Anaconda
1,Andy,Mean Girls
2,Andy,The Matrix
3,Claude,Anaconda
4,Claude,Monty Python and the Holy Grail


In [44]:
G = nx.from_pandas_edgelist(m, '#Employee', 'Movie')
nx.draw_networkx(G)

In [45]:
G.edges()

EdgeView([('Andy', 'Anaconda'), ('Andy', 'Mean Girls'), ('Andy', 'The Matrix'), ('Anaconda', 'Claude'), ('Anaconda', 'Georgia'), ('Mean Girls', 'Joan'), ('Mean Girls', 'Lee'), ('The Matrix', 'Frida'), ('The Matrix', 'Pablo'), ('Claude', 'Monty Python and the Holy Grail'), ('Claude', 'Snakes on a Plane'), ('Monty Python and the Holy Grail', 'Georgia'), ('Snakes on a Plane', 'Georgia'), ('Frida', 'The Shawshank Redemption'), ('Frida', 'The Social Network'), ('The Shawshank Redemption', 'Pablo'), ('The Shawshank Redemption', 'Vincent'), ('The Social Network', 'Vincent'), ('Joan', 'Forrest Gump'), ('Joan', 'Kung Fu Panda'), ('Forrest Gump', 'Lee'), ('Kung Fu Panda', 'Lee'), ('Pablo', 'The Dark Knight'), ('Vincent', 'The Godfather')])

In [46]:
for m in employees:
    nx.set_node_attributes(G, {m:{type: 'employee'}})
    
for n in movies:
    nx.set_node_attributes(G, {n:{type: 'movie'}})
    
nx.get_node_attributes(G, type)

{'Andy': 'employee',
 'Anaconda': 'movie',
 'Mean Girls': 'movie',
 'The Matrix': 'movie',
 'Claude': 'employee',
 'Monty Python and the Holy Grail': 'movie',
 'Snakes on a Plane': 'movie',
 'Frida': 'employee',
 'The Shawshank Redemption': 'movie',
 'The Social Network': 'movie',
 'Georgia': 'employee',
 'Joan': 'employee',
 'Forrest Gump': 'movie',
 'Kung Fu Panda': 'movie',
 'Lee': 'employee',
 'Pablo': 'employee',
 'The Dark Knight': 'movie',
 'Vincent': 'employee',
 'The Godfather': 'movie'}

In [47]:
B = bipartite.weighted_projected_graph(G, employees)
plot_graph(B)

<IPython.core.display.Javascript object>

In [48]:
nx.draw_networkx(B)
nx.draw_networkx_edge_labels(B, edge_labels= 'weight')

TypeError: draw_networkx_edge_labels() missing 1 required positional argument: 'pos'

### Question 1

Using NetworkX, load in the bipartite graph from `Employee_Movie_Choices.txt` and return that graph.

*This function should return a networkx graph with 19 nodes and 24 edges*

In [49]:
def answer_one():
        
    G = nx.read_edgelist('Employee_Movie_Choices.txt', delimiter = '\t')
    
    return G

answer_one()

<networkx.classes.graph.Graph at 0x1fd2b6056c8>

In [50]:
plot_graph(answer_one(), weight_name = None)

<IPython.core.display.Javascript object>

### Question 2

Using the graph from the previous question, add nodes attributes named `'type'` where movies have the value `'movie'` and employees have the value `'employee'` and return that graph.

*This function should return a networkx graph with node attributes `{'type': 'movie'}` or `{'type': 'employee'}`*

In [51]:
def answer_two():
    
    G= answer_one()
    
    for node in G.nodes():
        if node in employees:
            G.add_node(node, type = 'employee')
        else:
            G.add_node(node, type = 'movie')
            
    return G

answer_two()

<networkx.classes.graph.Graph at 0x1fd2998a688>

In [52]:
plot_graph(answer_two(), weight_name = None)

<IPython.core.display.Javascript object>

### Question 3

Find a weighted projection of the graph from `answer_two` which tells us how many movies different pairs of employees have in common.

*This function should return a weighted projected graph.*

In [53]:
def answer_three():
        
    B = answer_two()
    weighted_projection = bipartite.weighted_projected_graph(B, employees)
    
    return weighted_projection

In [54]:
plot_graph(answer_three(), weight_name = None)

<IPython.core.display.Javascript object>

### Question 4

Suppose you'd like to find out if people that have a high relationship score also like the same types of movies.

Find the Pearson correlation ( using `DataFrame.corr()` ) between employee relationship scores and the number of movies they have in common. If two employees have no movies in common it should be treated as a 0, not a missing value, and should be included in the correlation calculation.

*This function should return a float.*

In [55]:
def answer_four():
        
    # Your Code Here
    
    return # Your Answer Here

In [56]:
Rel = nx.read_edgelist('Employee_Relationships.txt', data = [('relationship_score', int)])
Rel_df = pd.DataFrame(Rel.edges(data = True), columns = ['From', 'To', 'relationship_score'])

In [57]:
Rel_df

Unnamed: 0,From,To,relationship_score
0,Andy,Claude,{'relationship_score': 0}
1,Andy,Frida,{'relationship_score': 20}
2,Andy,Georgia,{'relationship_score': -10}
3,Andy,Joan,{'relationship_score': 30}
4,Andy,Lee,{'relationship_score': -10}
5,Andy,Pablo,{'relationship_score': -10}
6,Andy,Vincent,{'relationship_score': 20}
7,Claude,Frida,{'relationship_score': 0}
8,Claude,Georgia,{'relationship_score': 90}
9,Claude,Joan,{'relationship_score': 0}


In [58]:
G = answer_three()
G_df = pd.DataFrame(G.edges(data = True), columns = ['From', 'To', 'movie_score'])

In [59]:
G_df

Unnamed: 0,From,To,movie_score
0,Georgia,Andy,{'weight': 1}
1,Georgia,Claude,{'weight': 3}
2,Vincent,Frida,{'weight': 2}
3,Vincent,Pablo,{'weight': 1}
4,Lee,Andy,{'weight': 1}
5,Lee,Joan,{'weight': 3}
6,Andy,Joan,{'weight': 1}
7,Andy,Claude,{'weight': 1}
8,Andy,Frida,{'weight': 1}
9,Andy,Pablo,{'weight': 1}


In [28]:
G_copy_df = G_df.copy()
G_copy_df.rename(columns = {'From':'From_', 'To':'From'}, inplace = True)
G_copy_df.rename(columns = {'From_':'To'}, inplace = True)
G_final_df = pd.concat([G_df, G_copy_df])

In [60]:
G_final_df

Unnamed: 0,From,To,movie_score
0,Georgia,Andy,{'weight': 1}
1,Georgia,Claude,{'weight': 3}
2,Vincent,Frida,{'weight': 2}
3,Vincent,Pablo,{'weight': 1}
4,Lee,Andy,{'weight': 1}
5,Lee,Joan,{'weight': 3}
6,Andy,Joan,{'weight': 1}
7,Andy,Claude,{'weight': 1}
8,Andy,Frida,{'weight': 1}
9,Andy,Pablo,{'weight': 1}


In [61]:
final_df = pd.merge(G_final_df, Rel_df, on = ['From', 'To'], how = 'right')

In [62]:
final_df

Unnamed: 0,From,To,movie_score,relationship_score
0,Andy,Joan,{'weight': 1},{'relationship_score': 30}
1,Andy,Claude,{'weight': 1},{'relationship_score': 0}
2,Andy,Frida,{'weight': 1},{'relationship_score': 20}
3,Andy,Pablo,{'weight': 1},{'relationship_score': -10}
4,Frida,Pablo,{'weight': 2},{'relationship_score': 50}
5,Andy,Georgia,{'weight': 1},{'relationship_score': -10}
6,Claude,Georgia,{'weight': 3},{'relationship_score': 90}
7,Frida,Vincent,{'weight': 2},{'relationship_score': 60}
8,Pablo,Vincent,{'weight': 1},{'relationship_score': -20}
9,Andy,Lee,{'weight': 1},{'relationship_score': -10}


In [63]:
final_df['movie_score'] = final_df['movie_score'].map(lambda x : x['weight'] if type(x) == dict else None)
# final_df['movie_score'].fillna(value = 0, inplace = True)

In [64]:
final_df

Unnamed: 0,From,To,movie_score,relationship_score
0,Andy,Joan,1.0,{'relationship_score': 30}
1,Andy,Claude,1.0,{'relationship_score': 0}
2,Andy,Frida,1.0,{'relationship_score': 20}
3,Andy,Pablo,1.0,{'relationship_score': -10}
4,Frida,Pablo,2.0,{'relationship_score': 50}
5,Andy,Georgia,1.0,{'relationship_score': -10}
6,Claude,Georgia,3.0,{'relationship_score': 90}
7,Frida,Vincent,2.0,{'relationship_score': 60}
8,Pablo,Vincent,1.0,{'relationship_score': -20}
9,Andy,Lee,1.0,{'relationship_score': -10}


In [65]:
final_df['relationship_score'] = final_df['relationship_score'].map(lambda x : x['relationship_score'])

In [66]:
final_df

Unnamed: 0,From,To,movie_score,relationship_score
0,Andy,Joan,1.0,30
1,Andy,Claude,1.0,0
2,Andy,Frida,1.0,20
3,Andy,Pablo,1.0,-10
4,Frida,Pablo,2.0,50
5,Andy,Georgia,1.0,-10
6,Claude,Georgia,3.0,90
7,Frida,Vincent,2.0,60
8,Pablo,Vincent,1.0,-20
9,Andy,Lee,1.0,-10


In [67]:
final_df['movie_score'].fillna(value = 0, inplace = True)

In [68]:
final_df

Unnamed: 0,From,To,movie_score,relationship_score
0,Andy,Joan,1.0,30
1,Andy,Claude,1.0,0
2,Andy,Frida,1.0,20
3,Andy,Pablo,1.0,-10
4,Frida,Pablo,2.0,50
5,Andy,Georgia,1.0,-10
6,Claude,Georgia,3.0,90
7,Frida,Vincent,2.0,60
8,Pablo,Vincent,1.0,-20
9,Andy,Lee,1.0,-10


In [69]:
final_df['movie_score'].corr(final_df['relationship_score'])

0.7883962221733474