In [1]:
import networkx as nx
from numpy.linalg import eig
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sentiment_anal import Model
from part2 import trainModel
from functools import reduce

In [6]:
def get_score(name, G):
    friends = G.edges([name])
    score = G.node[name]['score']
    if score is None:
        alpha = lambda friend: 10.0 if G.node[name]["cluster"] != G.node[friend]["cluster"] else 1.0
        beta = lambda friend: 10.0 if friend == "kyle" else 1.0
        scores = map(lambda friend: alpha(friend[1]) * beta(friend[1]) * G.node[friend[1]]['score'], filter(lambda friend: G.node[friend[1]].get("score") is not None, friends))
        len_list = len(list(scores))
        if len_list != 0:
            calc_score = reduce(lambda x, y: x + y, scores, 0) / len_list
        else:
            calc_score = 0
    
        G.node[name]['score'] = calc_score
        return calc_score
    else:
        return score

In [3]:


color_map = {
        0:'y',
        1:'r',
        2:'b',
        3:'g',
        4:'m',
        5:'c',
        6:'k',
        7:'yellow',
        8:'brown',
            } 

# Laplacian method
def find_communities(G):
    L = nx.laplacian_matrix(G).todense()
    eig_values, eig_matrix = eig(L)
    
    # find second minimum in eigen values
    second_min_idx, second_min = sorted(enumerate(eig_values), key=lambda x: x[1])[1]

    print(second_min)
    print(second_min_idx)

    # find the correct coloumn
    target_coloumn = eig_matrix.transpose()[second_min_idx].transpose()
    for x in target_coloumn:
        x = x.real
    
    # pair graph node with eigen vector value
    paired_target_coloumn = zip(G.nodes(), target_coloumn)

    k = 4
    # cluster using knn
    km = KMeans(n_clusters=k, init='k-means++', max_iter=100, n_init=1)

    km.fit(target_coloumn)
    
    communities = {}
    for node, cluster in zip(G.nodes(), km.labels_):
        #print(color_map[cluster])
        G.node[node]['cluster'] = cluster
        lst = communities.get(cluster, [])
        lst.append(node)
        communities[cluster] = lst
        #print(cluster)

    return communities


def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]



with open("friendships.reviews.txt") as inFile:
    lines = inFile.read().split("\n")

    data_rows = [x for x in chunks(lines, 5)]

    print("Making model")
    model = trainModel()
    
    # populate graph
    G = nx.Graph()

    data = []
    for row in data_rows:
        name = row[0].split()[1]
        friends = row[1].split("\t")[1:]
        review = row[3].split()[1:][0]
        score = None
        if review != "*":
            score = model.predict(review)
        for friend in friends:
            G.add_edge(name, friend)
        G.node[name]['score'] = score
        data.append((name, friends, review))

    

Making model
Counter({1: 436161, 0: 122293})
done loading file, starting training
lol
436161.0
122293.0
haha
roflcopter
asdasd
lmao
rofl
yolo


In [4]:
print("aleins")
communities = find_communities(G)



aleins
0.0178874309235
16


In [8]:

for (name, friends, review) in data:
    if review == "*":
        get_score(name, G)
    print(name, G.node[name]["score"])

#print(communities)
#nx.draw_spring(G, node_color=[color_map[G.node[node]['cluster']] for node in G])

#plt.show()
#plt.savefig("res.png")

abagael 0.0
abbey 0.0
abbie 1
Abdul 1
Abe 0.0
Abel 1
abigael 1
abigail 0.0
abigale 0.0
abra 1
Abraham 0.0
Abram 0.0
ada 0.0
Adalberto 0.0
Adam 1
adan 1
Adan 1
adara 0.0
addi 0.0
addia 1
addie 0.0
addy 0.0
adela 0.0
adelaida 0.0
adelaide 0.0
adelheid 1
adelice 0.0
adelina 0.0
adelind 1
adey 1
adi 0.0
adiana 0.0
Adolfo 1
Adolph 0.0
adore 0.0
adoree 0.0
adriaens 0.0
adrian 1
Adrian 1
adriana 1
adrianna 0.0
adrianne 1
adriena 0.0
adrienne 1
aeriel 1
aeriela 1
aeriell 0.0
afton 0.0
ag 1
agace 0.0
agatha 0.0
agathe 0.0
aggy 1
agna 1
agnella 1
agnese 1
agnesse 0.0
agneta 1
agnola 0.0
agretha 0.0
Agustin 0.0
Ahmad 1
Ahmed 1
aida 0.0
aidan 0.0
aigneis 0.0
ailee 0.0
ailene 0.0
aili 1
ailina 0.0
ailis 1
ailyn 0.0
aimee 1
aimil 0.0
aindrea 1
ainslee 1
ainsley 0.0
ainslie 1
ajay 1
Al 0.0
alaine 0.0
alameda 0.0
Alan 0.0
alanah 0.0
alane 0.0
alanna 1
alayne 0.0
Albert 1
alberta 0.0
Alberto 0.0
albina 1
Alden 1
Aldo 0.0
Alec 0.0
aleda 1
aleece 1
alejandra 0.0
Alejandro 1
alena 0.0
alessandra 0.0
aleta