In [2]:
import numpy as np
import pandas as pd
import networkx as nx
import os
import matplotlib.pyplot as plt
from operator import itemgetter
import math
pd.options.mode.chained_assignment = None 
import random 

In [3]:
def create_results_game_file():
    basicedge_file = open("edges.txt","w")
    edge_file = open("edges_1.txt", 'w')
    edge_file2 = open("edges_2.txt", 'w')
    edge_file3 = open("edges_3.txt", 'w')
    gameids = []
    for season in ["2019", "2020", "2021"]:
        files = os.listdir(f"{season}/Games")
        for file in files:
            if 'players'in file:
                gameids.append([file, season])
    gameids =  sorted(gameids, key=itemgetter(0))
    for i in range(len(gameids)):
        file, season = gameids[i]
        df = pd.read_csv(f"{season}/Games/{file}")
        teams = df.team.unique()
        hometeam = teams[0]
        awayteam = teams[1]
        homedf = df.loc[df["team"]  == hometeam]
        homedf.dropna(subset = ["points"], inplace=True)
        awaydf = df.loc[df["team"]  == awayteam]
        awaydf.dropna(subset = ["points"], inplace=True)
        homepts = sum(homedf["points"])
        awaypts = sum(awaydf["points"])
        if homepts > awaypts:
            winning_team = hometeam
            losing_team = awayteam
        else:
            winning_team = awayteam
            losing_team = hometeam
        basicedge_file.write(f"{winning_team}-{losing_team}-{1}-{season}\n")
        if season == "2019":
            edge_file.write(f"{winning_team}-{losing_team}-{2*(i+1)/len(gameids)}-{season}\n")
            edge_file2.write(f"{winning_team}-{losing_team}-{2*(i+1)/len(gameids)}-{season}\n")
            edge_file3.write(f"{winning_team}-{losing_team}-{1}-{season}\n")
        if season == "2020":
            edge_file.write(f"{winning_team}-{losing_team}-{2*(i+1)/len(gameids)}-{season}\n")
            edge_file2.write(f"{winning_team}-{losing_team}-{1}-{season}\n")
            edge_file3.write(f"{winning_team}-{losing_team}-{0}-{season}\n")
    
        elif season == "2021":
            edge_file.write(f"{winning_team}-{losing_team}-{2*(i+1)/len(gameids)}-{season}\n")
            edge_file2.write(f"{winning_team}-{losing_team}-{0}-{season}\n")
            edge_file3.write(f"{winning_team}-{losing_team}-{0}-{season}\n")
    edge_file.close()
    edge_file2.close()
    edge_file3.close()
    basicedge_file.close()

In [4]:
def create_network_from_edge_file(edge_file):
    graph = nx.DiGraph()
    edge_list = []
    team_to_id = {}
    id_to_team = {}
    team_id = 0
    file = open(edge_file)
    for line in file.readlines():
        split = (line.strip()).split("-")
        source = split[0]
        target = split[1]
        constant = split[2]
        season = split[3]
        if source in team_to_id.keys():
            source_id = team_to_id[source]
        else:
            team_to_id[source] = team_id
            id_to_team[team_id] = source
            source_id = team_id
            graph.add_node(team_id)
            team_id += 1
        if target in team_to_id.keys():
            target_id = team_to_id[target]
        else:
            team_to_id[target] = team_id
            id_to_team[team_id] = target
            target_id = team_id
            graph.add_node(team_id)
            team_id += 1
        graph.add_edge(source_id, target_id)
        edge_list.append((source_id, target_id, constant,season))
    return id_to_team, team_to_id, graph, edge_list

In [5]:
def generate_optimal_rank(alpha, edge_list, node_names):
    #N number of nodes
    #alpha : regularization parameter
    #c : spring constant matrix
    #d :  spring length at rest matrix
    N = len(node_names)
    ## We first want to compute the matrixes A, D_out, D_in 
    A =  np.zeros((N, N))
    D_out = np.zeros((N,N), dtype=float)
    D_in = np.zeros((N,N), dtype=float)
    for (source,target,spring_constant,season) in edge_list:
        A[source, target] += float(spring_constant)
        D_out[source, source] += float(spring_constant)
        D_in[target, target] += float(spring_constant)
    reg = alpha* np.eye(N)
    ones = np.ones((N,1))
    opt_rank = np.matmul(np.matmul(np.linalg.inv(D_out + D_in - (A + np.transpose(A)) + reg),(D_out - D_in)),ones)
    ranking = []
    rank_dict = {}
    for i in range(len(opt_rank)):
        ranking.append([opt_rank[i][0], node_names[i]])
        rank_dict[i] = opt_rank[i][0]
    ordered = sorted(ranking, key=itemgetter(0), reverse=True)
    return A, rank_dict, opt_rank

In [6]:
def calculate_energy_graph(edge_list,node_names):
    A,rank_dict,opt_rank = generate_optimal_rank(1,edge_list,node_names)
    total_energy = 0.0
    for (source,target,c,season) in edge_list:
        rsource = rank_dict[source]
        rtarget = rank_dict[target]
        total_energy += 1/2*(rsource-rtarget-1)*(rsource-rtarget-1)
    return total_energy

In [7]:
def null_model_test():
    id_to_team, team_to_id, graph, edge_list = create_network_from_edge_file("edges.txt")
    N = len(team_to_id)
    A =  np.zeros((N, N))
    for (source,target,spring_constant,season) in edge_list:
        A[source, target] += 1
    ## we want to create a random version of the graph, for each edge between two nodes i and j we want 
    ## to randomize the direction of them 
    base_energy = calculate_energy_graph(edge_list, id_to_team)
    energies = []
    for iteration in range(5000):
        new_edge_list = []
        for i in range(len(A)):
            for j in range(len(A)):
                for k in range(int(A[i,j] + A[j,i])):
                    rand = random.random()
                    if rand > 0.5:
                        new_edge_list.append((i,j,1,"2021"))
                    else:
                        new_edge_list.append((j,i,1, "2021"))


        #Now we want to calculate the energy of this new graph
        energy = calculate_energy_graph(new_edge_list, id_to_team)
        energies.append(energy)
    plt.hist(energies, bins=30, label="H 5000 simulations")
    plt.axvline(x=base_energy, color="r", linestyle="--", label="Ground State energy")
    plt.xlabel("Energy")
    plt.ylabel("Histogram")
    plt.legend()
    plt.savefig('null_model_plot.png')

    
    


In [8]:
def edge_prediction_accuracy_bitwise(edge_list_1,edge_list_2,edge_list_3, id_to_team, alpha, training_value):
    #we only use 80% of the edges to find the ranks: and then test on the remaining 20%
    accurate = 0
    total = 0
    for i in range(100):
        cutoff =  int(training_value*len(edge_list_1))
        c = list(zip(edge_list_1, edge_list_2,edge_list_3))
        random.shuffle(c)
        edge_list_1, edge_list_2,edge_list_3 = zip(*c)

        training_edges_1 = edge_list_1[:cutoff]
        training_edges_2 = edge_list_2[:cutoff]
        training_edges_3 = edge_list_3[:cutoff]
        
        testing_edges = edge_list_1[cutoff+1:]
        A_1, rank_dict_1,opt_rank = generate_optimal_rank(alpha, training_edges_1, id_to_team)
        A_2, rank_dict_2,opt_rank = generate_optimal_rank(alpha, training_edges_2, id_to_team)
        A_3, rank_dict_3,opt_rank = generate_optimal_rank(alpha, training_edges_3, id_to_team)
        
        for (source, target, c,season) in testing_edges:
            if season == "2019":
                if rank_dict_3[source] >= rank_dict_3[target]:
                    accurate += 1
            elif season == "2020":
                #then we use the ranks-2
                if rank_dict_2[source] >= rank_dict_2[target]:
                    accurate += 1
            else:
                #then we use the ranks-1
                if rank_dict_1[source] >= rank_dict_1[target]:
                    accurate += 1
            total += 1
    accuracy = accurate/total
    return accuracy

In [9]:
def edge_prediction_accuracy_probabilistic(edge_list_1,edge_list_2,edge_list_3, id_to_team, alpha, beta,training_value):
    #we only use 80% of the edges to find the ranks: and then test on the remaining 20%
    accurate = 0
    accuracy = 0
    cutoff =  int(training_value*len(edge_list_1))
    for i in range(50):
        c = list(zip(edge_list_1, edge_list_2,edge_list_3))
        random.shuffle(c)
        edge_list_1, edge_list_2,edge_list_3 = zip(*c)
        training_edges_1 = edge_list_1[:cutoff]
        training_edges_2 = edge_list_2[:cutoff]
        training_edges_3 = edge_list_3[:cutoff]
        testing_edges = edge_list_1[cutoff+1:]
        A, rank_dict_1,opt_rank = generate_optimal_rank(alpha, training_edges_1, id_to_team)
        A, rank_dict_2,opt_rank = generate_optimal_rank(alpha, training_edges_2, id_to_team)
        A, rank_dict_3,opt_rank = generate_optimal_rank(alpha, training_edges_3, id_to_team)
        
        P = np.zeros((len(A),len(A)))
        temp_sum = 0
        total = 0
        
        for (source, target, c,season) in testing_edges:
            if season == "2019":
                try:
                    total += 1/(1+math.exp(-2*beta*(rank_dict_3[source]-rank_dict_3[target])))
                except:
                    if rank_dict_3[source]-rank_dict_3[target] > 0:
                        total += 1
                        
            elif season == "2020":
                try:
                    total += 1/(1+math.exp(-2*beta*(rank_dict_2[source]-rank_dict_2[target])))
                except:
                    if rank_dict_2[source]-rank_dict_2[target] > 0:
                        total += 1           
            else:
                try:
                    total += 1/(1+math.exp(-2*beta*(rank_dict_1[source]-rank_dict_1[target])))
                except:
                    if rank_dict_1[source]-rank_dict_1[target] > 0:
                        total += 1

        accuracy += float(total/len(testing_edges))
    return accuracy/50

In [10]:
def BTL_model_bitwise( edge_list,id_to_team,training_value):
    
    accurate = 0
    total = 0
    cutoff =  int(training_value*len(edge_list))
    for i in range(100):
        random.shuffle(edge_list)
        training_edges = edge_list[:cutoff]
        testing_edges = edge_list[cutoff+1:]
        N = len(id_to_team)
        A =  np.zeros((N, N))
        for (source,target,spring_constant,season) in training_edges:
            A[source, target] += 1
        p = np.ones(len(A))/len(A)
        newp = np.zeros(len(A))
        W = np.zeros(len(A))
        for i in range(len(A)):
            W[i]= sum(A[i])

        for iteration in range(500):
            for i in range(len(A)):
                temp_sum = 0
                for j in range(len(A)):
                    temp_sum += (A[i,j] + A[j,i])/(p[i] + p[j])
                newp[i] = W[i]/temp_sum
            newp = newp/sum(newp)
            if np.linalg.norm(newp-p) <1e-15:
                p = newp
                break
                
            p = newp/sum(newp)
        
        for (source, target, c,season) in testing_edges:                
            if p[source] >= p[target]:
                accurate += 1
            total += 1
    accuracy = accurate/total
    return accuracy
    
    

In [11]:
def BTL_model_probabilistic(edge_list,id_to_team, beta,training_value):
    
    accurate = 0
    cutoff =  int(training_value*len(edge_list))
    accuracy = 0
    for i in range(50):
        random.shuffle(edge_list)
        training_edges = edge_list[:cutoff]
        testing_edges = edge_list[cutoff+1:]
        N = len(id_to_team)
        A =  np.zeros((N, N))
        for (source,target,spring_constant,season) in training_edges:
            A[source, target] += 1
        p = np.ones(len(A))/len(A)
        newp = np.zeros(len(A))
        W = np.zeros(len(A))
        for i in range(len(A)):
            W[i]= sum(A[i])
        
        for iteration in range(500):
            for i in range(len(A)):
                temp_sum = 0
                for j in range(len(A)):
                    temp_sum += (A[i,j] + A[j,i])/(p[i] + p[j])
                newp[i] = W[i]/temp_sum
            p = newp/sum(newp)
            
        total = 0
        A_1 = np.zeros((len(A), len(A)))
        for (source, target, c,season) in testing_edges:
            try:
                total += 1/(1+math.exp(-2*beta*(p[source]-p[target])))
            except: 
                if p[source]-p[target] > 0:
                    total += 1
                    
        accuracy += float(total/len(testing_edges))

    return accuracy/50
    

In [12]:
def Pagerank_bitwise_accuracy(alpha,training_value):
    id_to_team, team_to_id, graph, edge_list = create_network_from_edge_file("edges.txt")
    accurate = 0
    total = 0
    cutoff =  int(training_value*len(edge_list))
    for i in range(100):
        random.shuffle(edge_list)
        training_edges = edge_list[:cutoff]
        testing_edges = edge_list[cutoff+1:]
        N = len(id_to_team)
        A =  np.zeros((N, N))
        nodes = []
        G = nx.DiGraph()
        for (source,target,spring_constant,season) in training_edges:
            if source not in nodes:
                G.add_node(source)
                nodes.append(source)
            if target not in nodes:
                G.add_node(target)
                nodes.append(target)
            G.add_edge(source,target)
            
        pr = nx.pagerank(G, alpha=alpha, max_iter=10000 ) 
        for (source, target, c,season) in testing_edges:                
            if pr[source] > pr[target]:
                accurate += 1
            total += 1
    accuracy = accurate/total
    return accuracy

In [13]:
def Pagerank_probabilistic_accuracy(alpha, edge_list,id_to_team, beta,training_value):
    
    accurate = 0
    cutoff =  int(training_value*len(edge_list))
    accuracy = 0
    for i in range(50):
        random.shuffle(edge_list)
        training_edges = edge_list[:cutoff]
        testing_edges = edge_list[cutoff+1:]
        N = len(id_to_team)
        nodes = []
        G = nx.DiGraph()
        for (source,target,spring_constant,season) in training_edges:
            if source not in nodes:
                G.add_node(source)
                nodes.append(source)
            if target not in nodes:
                G.add_node(target)
                nodes.append(target)
            G.add_edge(source,target)
            
        pr = nx.pagerank(G, alpha=alpha, max_iter=10000 ) 
        total = 0
        A_1 = np.zeros((len(A), len(A)))
        for (source, target, c,season) in testing_edges:
            try:
                total += 1/(1+math.exp(-2*beta*(pr[source]-pr[target])))
            except: 
                if pr[source]-pr[target] > 0:
                    total += 1
                    
        accuracy += float(total/len(testing_edges))

    return accuracy/50
    

In [14]:
def RWR_bitwise_accuracy(c, tol,training_value):
    id_to_team, team_to_id, graph, edge_list = create_network_from_edge_file("edges.txt")
    accurate = 0
    total = 0
    cutoff =  int(training_value*len(edge_list))
    for k in range(100):
        random.shuffle(edge_list)
        training_edges = edge_list[:cutoff]
        testing_edges = edge_list[cutoff+1:]
        N = len(id_to_team)
        A =  np.zeros((N, N))
        for (source,target,spring_constant,season) in training_edges:
            A[source,target] += 1

        column_sums = A.sum(axis=0)
        for i in range(len(A)):
            for j in range(len(A)):
                A[i,j]  = A[i,j]/column_sums[j]

        v = np.zeros(len(A))
        v[0] =  1
        u = v.copy()
        iterration = 0
        while iterration < 10000:
            old_u = u.copy()
            u = (1-float(c))*np.matmul(A,u) + float(c)*v
            if(np.linalg.norm(old_u-u) <tol):
                break
            iterration += 1         
        for (source, target, constant,season) in testing_edges:  
            #We want to find the importance of target wrt to source

            if u[source] > u[target]:
                accurate += 1
            total += 1
    return accurate/total


In [15]:
def RWR_probabilistic_accuracy(c, tol, edge_list,id_to_team, beta,training_value):
    
    accurate = 0
    cutoff =  int(training_value*len(edge_list))
    accuracy = 0
    for i in range(50):
        random.shuffle(edge_list)
        training_edges = edge_list[:cutoff]
        testing_edges = edge_list[cutoff+1:]
        N = len(id_to_team)
        A =  np.zeros((N, N))
        for (source,target,spring_constant,season) in training_edges:
            A[source,target] += 1

        column_sums = A.sum(axis=0)
        for i in range(len(A)):
            for j in range(len(A)):
                A[i,j]  = A[i,j]/column_sums[j]

        v = np.zeros(len(A))
        v[0] =  1
        u = v.copy()
        iterration = 0
        while True:
            old_u = u.copy()
            u = (1-float(c))*np.matmul(A,u) + float(c)*v
            if(np.linalg.norm(old_u-u) <tol):
                break
            iterration += 1
            
        total = 0
        for (source, target, c,season) in testing_edges:
            try:
                total += 1/(1+math.exp(-2*beta*(u[source]-u[target])))
            except: 
                if u[source]-u[target] > 0:
                    total += 1
                    
        accuracy += float(total/len(testing_edges))

    return accuracy/50
    

In [16]:
def similarity_generative_SR():
    ## this method will be verifying the similarity of the ranks produced
    ## by maximizing the log likelyhood of the generative model
    ## and the optimal rank from Spring Rank
    ## it is said that as Beta goes to inf the ranks tend to the SR optimal ranks
    id_to_team, team_to_id, graph, edge_list = create_network_from_edge_file("edges.txt")
    ranks = pd.DataFrame()
    
    ## we first want to get to optimal ranking from SR : 
    A, rank_dict,opt_rank = generate_optimal_rank(2, edge_list, id_to_team)
    opt_rank = [x[0] for x in opt_rank]
    ranks["optimal"] = opt_rank
    pearson_list = []
    kendall_list = []
    betas = [1, 3, 5, 10, 25, 50,75,90, 100, 110, 125,150,200,225, 250, 300, 400, 500,600,800,1000,2000,3000,4000,5000]
    for beta in betas:
        print(beta)
        pearson_sum = 0
        kendall_sum = 0
        for i in range(10):
            generative_rank = simmulated_annealing(5000,A,beta,len(edge_list))
            ranks[f"generative"] = generative_rank
            pearson_corr = ranks.corr(method='pearson').at["optimal", "generative"]
            kendall_corr = ranks.corr(method='kendall').at["optimal", "generative"]
            pearson_sum += pearson_corr
            kendall_sum += kendall_corr
        pearson_list.append(pearson_sum/10)
        kendall_list.append(kendall_sum/10)
    lbetas = [math.log(x,10) for x in  betas]
    plt.scatter(lbetas, pearson_list, c='r', label="pearson correlation")
    plt.scatter(lbetas, kendall_list, c='b', label="kendall correlation")
    plt.xlabel("log Beta")
    plt.legend()
    plt.savefig('similarity_rank_plot.png')


    
    
def simmulated_annealing(num_iter, A, beta,M):
    best_ranks = np.zeros(len(A))
    for i in range(len(best_ranks)):
        best_ranks[i] = random.uniform(-1, 1)
    best_score = objective_function(best_ranks, beta,M,A)
    num_since_change = 0
    count = 0
    curr, curr_score = best_ranks, best_score
    for i in range(num_iter):
        T = temperature(i / float(num_iter))
        ## we want to find a candidate to move to
        candidate = select_neighbour(curr.copy(),T, beta, M, A)
        candidate_score = objective_function(candidate, beta, M, A)

        if candidate_score > best_score:
#             print(f"{i} - {candidate_score}")
            num_since_change = 0
            best_score = candidate_score
            best_ranks = candidate.copy() 
        else:
            num_since_change += 1
#             print(f"{i} - {best_score}")
        if acceptance_probability(curr_score, candidate_score, T) > random.random():
            # store the new current point
            curr, curr_score= candidate, candidate_score
            
        if num_since_change >= 20:
            num_since_change = 0
            if random.random() >= 0.5:
                count += 1
                curr = np.zeros(len(A))
                for i in range(len(A)):
                    curr[i] = random.uniform(-1, 1)
            else:
                curr = best_ranks.copy()

    return best_ranks

def select_neighbour(curr, T, beta, M , A):
    node = random.randint(0, len(curr)-1)
    candidate = curr.copy()
    if random.random() > 0.5:
        candidate[node] -= 0.1*max(0,1-T)
    else:
        candidate[node] += 0.1*max(0,1-T)
    return candidate
            
    
def objective_function(s, beta, M, A):
    temp_energy_sum = 0
    temp_sum = 0
    for i in range(len(s)):
        for j in range(len(s)):
            temp_energy_sum += A[i,j]*math.pow((s[i]-s[j] - 1), 2)
            temp_sum += math.exp(-0.5*beta*math.pow((s[i]-s[j]-1),2))
    
    value1 =  -0.5*temp_energy_sum
    if temp_sum <=  0.1:
        value2 = - (M/beta) * (math.pow(temp_sum, temp_sum) - 1)/temp_sum
    else:
        value2 = - (M/beta)*math.log(temp_sum)
        
    return value1 + value2


def acceptance_probability(cost, new_cost, temperature):
    if new_cost >= cost:
        return 1
    else:
        #p = (new_cost-cost)/cost*(1-temperature) #np.exp(- (new_cost - cost)*temperature)
        p = (1-temperature)/2
        return p

def temperature(fraction):
    return max(0.001,  fraction)


In [17]:
def create_graph_comparison_accuracy(training_value):

    id_to_team, team_to_id, graph, edge_list = create_network_from_edge_file("edges.txt")
    id_to_team, team_to_id, graph, edge_list_1 = create_network_from_edge_file("edges_1.txt")
    id_to_team, team_to_id, graph, edge_list_2 = create_network_from_edge_file("edges_2.txt")
    id_to_team, team_to_id, graph, edge_list_3 = create_network_from_edge_file("edges_3.txt")

    betas = np.concatenate((np.linspace(15,100, 10), np.linspace(110, 1000, 20)))
    alphas = [0.01,0.1, 0.5, 1, 2, 5, 10]
    alphas = np.arange(0.1, 10.6, 0.5)
    differences1 = []
    differences2 = []
    baseline = []
    max_acc_btl = 0
    max_acc_rwr = 0
    max_acc_pagerank = 0
    for beta in betas:
        print(beta)
        acc_btl = BTL_model_probabilistic(edge_list, id_to_team, beta,training_value)
        acc_rwr = RWR_probabilistic_accuracy(0.01, 1e-10, edge_list,id_to_team, beta,training_value)
        acc_pagerank = Pagerank_probabilistic_accuracy(0.9, edge_list,id_to_team, beta,training_value)
        if acc_btl > max_acc_btl:
            max_acc_btl = acc_btl
        if acc_rwr > max_acc_rwr:
            max_acc_rwr = acc_rwr
        if acc_pagerank > max_acc_pagerank:
            max_acc_pagerank = acc_pagerank


    print(f"BTL : max prob accuracy {max_acc_btl}, tv : {training_value}")
    print(f"RWR : max prob accuracy {max_acc_rwr}, tv : {training_value}")
    print(f"Pagerank : max prob accuracy {max_acc_pagerank}, tv : {training_value}")
    
    max_acc_SR = 0
    max_acc_SR2 = 0
    for alpha in alphas:
        max_acc_1 = 0
        max_params = 0
        max_acc_2 = 0
        max_params2 = 0
        for beta in betas:
            acc1 = edge_prediction_accuracy_probabilistic(edge_list_1, edge_list_2,edge_list_3, id_to_team, alpha, beta, training_value)
            if acc1 > max_acc_1:
                max_acc_1 = acc1
            if acc1 > max_acc_SR2:
                max_acc_SR2 = acc1
                
            acc2 = edge_prediction_accuracy_probabilistic(edge_list, edge_list,edge_list, id_to_team,  alpha, beta, training_value)
            if acc2 > max_acc_2:
                max_acc_2 = acc2
            if acc2 > max_acc_SR:
                max_acc_SR = acc1
                
        differences1.append((max_acc_1 - max_acc_btl)*100)
        differences2.append((max_acc_2 - max_acc_btl)*100)
        baseline.append(0)


    print(f"SR : max prob accuracy {max_acc_SR}, tv : {training_value}")
    print(f"SR2 : max prob accuracy {max_acc_SR2}, tv : {training_value}")
                
       

    plt.figure(figsize=(10,8))
    plt.scatter(alphas, differences1, c='b', label="Varying spring constants")
    plt.scatter(alphas, differences2, c='r', label="Spring constants = 1 ")
    plt.plot(alphas, baseline, c="k", linestyle="--")
    plt.xlabel("Alpha regularization value")
    plt.ylabel("Accuracy improvement over BTL")
    plt.legend(bbox_to_anchor=(0,1.02,1,0.2), loc="lower left", mode="expand", ncol=3)
    plt.savefig(f"Comparison_probabilistic_accuracy_{training_value}.png")

    
    
def create_graph_comparison_bitwise_accuracy(training_value):
    id_to_team, team_to_id, graph, edge_list = create_network_from_edge_file("edges.txt")
    id_to_team, team_to_id, graph, edge_list_1 = create_network_from_edge_file("edges_1.txt")
    id_to_team, team_to_id, graph, edge_list_2 = create_network_from_edge_file("edges_2.txt")
    id_to_team, team_to_id, graph, edge_list_3 = create_network_from_edge_file("edges_3.txt")


    alphas = np.arange(0.1, 30, 0.5)

    differences1 =[]
    differences2 = []
    baseline = []
    max_acc_SR = 0
    max_acc_SR2 = 0
    max_acc_btl = BTL_model_bitwise(edge_list, id_to_team, training_value)
    for alpha in alphas:
        acc1 = edge_prediction_accuracy_bitwise(edge_list_1, edge_list_2,edge_list_3, id_to_team, alpha, training_value)
        if acc1 > max_acc_SR2:
            max_acc_SR2 = acc1
        acc2 = edge_prediction_accuracy_bitwise(edge_list, edge_list,edge_list, id_to_team,  alpha, training_value)

        if acc2 > max_acc_SR:
            max_acc_SR = acc2

        differences1.append((acc1 - max_acc_btl)*100)
        differences2.append((acc2 - max_acc_btl)*100)
        baseline.append(0)
    
    max_acc_pagerank = 0
    print("pagerank")
    for alpha in  np.arange(0.01,3, 0.05):
        acc = Pagerank_bitwise_accuracy(alpha, training_value)
        if acc > max_acc_pagerank:
            max_acc_pagerank = acc
    
    max_acc_rwr = 0
    print("rwr")
    for c in np.arange(0.001, 0.9, 0.05) :
        acc = RWR_bitwise_accuracy(c, 1e-10,training_value)
        if acc>max_acc_rwr:
            max_acc_rwr = acc
        
    
    print(f"SR : max accuracy {max_acc_SR} tv : {training_value}")
    print(f"SR2 : max accuracy {max_acc_SR2},  tv : {training_value}")
    print(f"BTL : max accuracy {max_acc_btl},  tv : {training_value}")
    print(f"pagerank : max accuracy {max_acc_pagerank},  tv : {training_value}")
    print(f"rwr : max accuracy {max_acc_rwr},  tv : {training_value}")
    
    plt.figure(figsize=(10,8))
    plt.scatter(alphas, differences1, c='b', label="Varying spring constants")
    plt.scatter(alphas, differences2, c='r', label="Spring constants = 1 ")
    plt.plot(alphas, baseline, c="k", linestyle="--")
    plt.xlabel("Alpha regularization value")
    plt.ylabel("bitwise accuracy improvement over BTL")
    plt.legend(bbox_to_anchor=(0,1.02,1,0.2), loc="lower left", mode="expand", ncol=3)
    plt.savefig(f"Comparison_bitwise_accuracy_{training_value}.png")


In [18]:
def genereate_bitwise_accuracy_graph_comp():
    id_to_team, team_to_id, graph, edge_list = create_network_from_edge_file("edges.txt")
    id_to_team, team_to_id, graph, edge_list_1 = create_network_from_edge_file("edges_1.txt")
    id_to_team, team_to_id, graph, edge_list_2 = create_network_from_edge_file("edges_2.txt")
    id_to_team, team_to_id, graph, edge_list_3 = create_network_from_edge_file("edges_3.txt")
    percent_training = (np.arange(0.05,1, 0.025)).tolist()
    sr  = []
    sr2 = []
    btl = []
    pagerank = []
    rwr = []
    percent_training.reverse()
    for tv in percent_training:
        print(tv)
        max_sr =0
        max_sr2 = 0
        max_alpha = 0
        for alpha in  np.arange(0.1, 20, 1):
            acc_sr1 = edge_prediction_accuracy_bitwise(edge_list, edge_list,edge_list, id_to_team, alpha, tv)
            if acc_sr1 > max_sr:
                max_sr = acc_sr1
            acc_sr2 = edge_prediction_accuracy_bitwise(edge_list_1, edge_list_2,edge_list_3, id_to_team, alpha, tv)
            if acc_sr2 > max_sr2:
                max_sr2 = acc_sr2
                max_alpha = alpha
        acc_btl =  BTL_model_bitwise(edge_list, id_to_team, tv)
        max_acc_rwr = 0
        try:
            acc1 =  RWR_bitwise_accuracy(0.01, 1e-10,tv)
        except:
            acc1 = 0
        try:
            acc2 =  RWR_bitwise_accuracy(0.1, 1e-10,tv)
        except:
            acc2 = 0
            
        max_acc_rwr = max(acc1, acc2)
        acc = max(RWR_bitwise_accuracy(0.01, 1e-10,tv),RWR_bitwise_accuracy(0.1, 1e-10,tv))      
        sr.append(max_sr)
        sr2.append(max_sr2)
        btl.append(acc_btl)
        rwr.append(max_acc_rwr)
    percent_training = [100*x for x in percent_training]
    plt.plot(percent_training, sr, label="SpringRank")
    plt.plot(percent_training, sr2, label="SpringRank*")
    plt.plot(percent_training, btl, label="BTL")
    plt.plot(percent_training, rwr, label="RWR")
    plt.legend()
    plt.ylabel("bitwise prediction accuracy")
    plt.xlabel("% of data used as training")
    plt.savefig(f"Comparison_bitwise_accuracy_all_models.png")

    
def genereate_prob_accuracy_graph_comp():
    id_to_team, team_to_id, graph, edge_list = create_network_from_edge_file("edges.txt")
    id_to_team, team_to_id, graph, edge_list_1 = create_network_from_edge_file("edges_1.txt")
    id_to_team, team_to_id, graph, edge_list_2 = create_network_from_edge_file("edges_2.txt")
    id_to_team, team_to_id, graph, edge_list_3 = create_network_from_edge_file("edges_3.txt")
    betas = np.concatenate((np.linspace(10,100, 5), np.linspace(110, 1000, 10)))
    percent_training = (np.arange(0.05,1, 0.025)).tolist()
    sr  = []
    sr2 = []
    btl = []
    pagerank = []
    rwr = []
    percent_training.reverse()
    for tv in percent_training:
        print(f"tv:{tv}")
        max_sr =0
        max_sr2 = 0
        max_alpha = 0
        max_btl = 0
        max_rwr = 0
        for beta in betas:
            print(beta)
            for alpha in  np.arange(0.1, 16.1, 2):
                acc_sr1 = edge_prediction_accuracy_probabilistic(edge_list, edge_list,edge_list, id_to_team, alpha,beta, tv)
                if acc_sr1 > max_sr:
                    max_sr = acc_sr1
                acc_sr2 = edge_prediction_accuracy_probabilistic(edge_list_1, edge_list_2,edge_list_3, id_to_team, alpha, beta, tv)
                if acc_sr2 > max_sr2:
                    max_sr2 = acc_sr2
                    max_alpha = alpha
            acc_btl =  BTL_model_probabilistic(edge_list, id_to_team, beta, tv)
            if acc_btl > max_btl:
                max_btl =  acc_btl
            try:
                acc1 =  RWR_probabilistic_accuracy(0.01, 1e-10,edge_list, id_to_team, beta,tv)
            except:
                acc1 = 0
            try:
                acc2 =  RWR_probabilistic_accuracy(0.1, 1e-10,edge_list, id_to_team,beta,tv)
            except:
                acc2 = 0
            acc_rwr = max(acc1, acc2)
            if acc_rwr > max_rwr:
                mac_rwr =  acc_rwr
            
        sr.append(max_sr)
        sr2.append(max_sr2)
        btl.append(acc_btl)
        rwr.append(max_rwr)
    percent_training = [100*x for x in percent_training]
    plt.plot(percent_training, sr, label="SpringRank")
    plt.plot(percent_training, sr2, label="SpringRank*")
    plt.plot(percent_training, btl, label="BTL")
    plt.plot(percent_training, rwr, label="RWR")
    plt.legend()
    plt.ylabel("bitwise prediction accuracy")
    plt.xlabel("% of data used as training")
    plt.savefig(f"Comparison_prob_accuracy_all_models_3Seasons.png")

