In [1]:
import csv
import re
import json
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
from collections import OrderedDict
import itertools

In [2]:
def create_friends_dict():
    # this dictionnary contains every user's list of friends using user IDs
    friends_dict={}
    with open('data/friends.csv', 'r') as file:
        csvreader = csv.reader(file)
        for line in csvreader:
            if len(line) == 2:
                if line[0] in friends_dict:
                    # append the new number to the existing array at this slot
                    friends_dict[line[0]].append(line[1])
                else:
                    # create a new array in this slot
                    friends_dict[line[0]] = [line[1]]
        return friends_dict

friends_dict = create_friends_dict()


print(friends_dict['360'])


['4719', '4720', '4721', '4722', '14', '4723', '1020', '4724', '4725', '4726', '4727', '4728', '4729', '1286', '4730', '4731', '4732', '4733', '4734', '4735', '4736', '4737', '1075']


In [3]:
def create_concerts_amount_dict():
    #this dictionnary contains the number of concerts for one genre
    concert_numbers={}
    with open('data/n_concerts.txt', 'r') as f:
        lines = f.readlines()
        for l in lines:
            x=re.search('([\w| |/|&|-]+):([0-9]+)',l)
            if x:
                concert_numbers[x.group(1)]=[x.group(2)][0]
    return concert_numbers

concert_numbers = create_concerts_amount_dict()
# call e.g: print('number of classical concerts: ',concert_numbers['Classical'])

In [4]:
with open('data/preferences.json', 'r') as f:
  preferences_data = json.load(f)

In [5]:
with open('data/preferences_description.txt') as f:
  pref_des_data_lines = f.readlines()

In [6]:
# get the genres a user likes using their ID
def genres_of_id(id):
  genres_num=[]
  genres=[]
  preferences=preferences_data[str(id)]
  for i in range(len(preferences)):
    if preferences[i]=='1':
      genres_num.append(i)
  for num in genres_num:
    genres.append(pref_des_data_lines[num+2].rstrip())
  return(genres)
  
genres_of_id(1)
# call: genres_of_id(1612)

['Indie Rock/Rock pop', 'Baroque', 'Nursery Rhymes', 'Hard Rock']

In [7]:
# calculates the number of concert two friends (id1,id2) are attending together
def concerts_of_friends(id1,id2):
  total_number_of_concert=0
  # calculates the number of concert for every genre they both like
  genres_both_like= [element for element in genres_of_id(id1) if element in genres_of_id(id2)]
  for g in genres_both_like:
    total_number_of_concert+=0.393*float(concert_numbers[g])
  # calculates the number of concert for every genre only one of them likes
  genres_one_likes=list(set(genres_of_id(id1)).symmetric_difference(genres_of_id(id2)))
  for g in genres_one_likes:
    total_number_of_concert+=0.018*float(concert_numbers[g])
  genres=concert_numbers.keys()
  # calculates the number of concert for every genre none of them likes
  genres_none_like=[element for element in genres if element not in list(set(genres_both_like)|set(genres_one_likes))]
  for g in genres_none_like:
    total_number_of_concert+=0.002*float(concert_numbers[g])
  return total_number_of_concert

concerts_of_friends(4,4)
# call: concerts_of_friends(360,4719)

33.91200000000002

In [8]:
def all_concerts_of_user(id):
  res=0
  for friend in friends_dict[str(id)]:
    res += concerts_of_friends(id,int(friend))
  return res
  
all_concerts_of_user(360)

212.222

In [9]:
def get_concert_number(concert):
  #retrieving number of concerts
  #this dictionnary contains the number of concerts for one genre
  concert_numbers={}
  with open('data/n_concerts.txt', 'r') as f:
      lines = f.readlines()
      for l in lines:
        x=re.search('([\w| |/|&|-]+):([0-9]+)',l)
        if x:
          concert_numbers[x.group(1)]=[x.group(2)][0]

  return concert_numbers[concert]
print(get_concert_number('Classical'))

24


In [10]:
def create_textfile_of_percentage_with(unsorted_dict, type, percentage):

    # sorted dict of useres and number of concerts they go to
    weights_of_users_list = sorted(unsorted_dict.items(), key=lambda x:x[1], reverse=True)

    # 12% threshold
    twelve_tresh = int(len(weights_of_users_list)*percentage)
    # create a string of users
    long_string = ""
    for counter in range (twelve_tresh):
        user = weights_of_users_list[counter]
        long_string += str(user[0])+"\n"

    # write into txt file
    filename = type + '_team_8.txt'
    with open(filename, 'w') as f:
        f.write(long_string)
    f.close()



In [11]:
def create_network():
    # create an empty graph
    G = nx.Graph()
    terminator = len(friends_dict.keys()) / 1

    # add nodes to the graph representing individual people
    for counter, user in enumerate(friends_dict.keys()):
        if counter == int(terminator):
            print(int(terminator))
            break
        G.add_node(user)
    print(1)
    
    nodes = G.nodes()

    for user in friends_dict.keys():
        #print("User", user)
        #go through all friends between user
        friends_of_user = friends_dict[user]
        
        for friend in friends_of_user:
            if not G.has_edge(user,friend):
                weight = concerts_of_friends(int(user),int(friend))
                G.add_edge(user, friend,weight=1/weight)

    print(2)
    print('num of nodes',G.number_of_nodes())
    print('num of edges',G.number_of_edges())

    return G

In [12]:
G = create_network()

1
2
num of nodes 8311
num of edges 55483


In [13]:
def create_naive_userlist(percent):
    # user concerts with friends sorted
    weights_of_users = OrderedDict({})
    # fill dict
    for user in friends_dict.keys():
        weights_of_users[user] = all_concerts_of_user(int(user))
        
    create_textfile_of_percentage_with(weights_of_users, 'naive', percent)


In [14]:
def create_closeness_centrality_userlist(graph, percent):
    closeness_centrality = nx.closeness_centrality(graph, distance= 'weight')
    create_textfile_of_percentage_with(closeness_centrality, 'closeness_centrality', percent)


In [22]:
def create_betweenness_centrality_userlist(graph, percent):
    betweenness_centrality = nx.betweenness_centrality(graph, weight= 'weight')
    create_textfile_of_percentage_with(betweenness_centrality, 'betweenness_centrality', percent)

In [27]:
def create_eigenvector_centrality_userlist(graph, percent):
    eigenvector_centrality = nx.eigenvector_centrality(graph, max_iter=600, weight= 'weight')
    create_textfile_of_percentage_with(eigenvector_centrality, 'eigenvector_centrality', percent)

In [17]:
create_naive_userlist(percent=1.00)

In [18]:
create_closeness_centrality_userlist(graph = G, percent=1.00)

In [24]:
create_betweenness_centrality_userlist(graph = G, percent=1.00)

In [28]:
create_eigenvector_centrality_userlist(graph = G, percent=1.00)