In [1]:
import networkx as nx
import numpy as np
import pandas as pd

In [2]:
graph_path = "graph.net"
edges = [line.strip().split() for line in open(graph_path)]
edges[:10]

[['1', '88160'],
 ['1', '118052'],
 ['1', '161555'],
 ['1', '244916'],
 ['1', '346495'],
 ['1', '444232'],
 ['1', '447165'],
 ['1', '500600'],
 ['2', '27133'],
 ['2', '62291']]

In [3]:
edges_num = []
for e in edges:
    edges_num.append([int(e[0]), int(e[1])])

In [4]:
G = nx.Graph()
G.add_edges_from(edges_num)

In [5]:
import community as community_louvain

In [None]:
partition = community_louvain.best_partition(G)

In [None]:
num_communities = len(set(partition.values()))
num_communities

In [None]:
modularity = community_louvain.modularity(partition, G)
modularity

In [None]:
max(partition.values())

In [None]:
communities = {}
for key, value in partition.items():
    if value in communities:
        communities[value].append(key)
    else:
        communities[value] = [key]

# Number of blocks (communities)
num_blocks = max(partition.values()) + 1

# Initialize edge-propensity parameters
theta = np.ones((num_blocks, num_blocks))

# EM algorithm
def em_algorithm(G, partition, num_blocks, theta_init=None):
    if theta_init is None:
        theta = np.ones((num_blocks, num_blocks))
    else:
        theta = theta_init.copy()
    
    for _ in range(100): # Maximum number of iterations
        # E-step: Estimate the community assignments
        for u, v in G.edges():
            r = partition[u]
            s = partition[v]
            theta[r, s] += 1
        
        # M-step: Update the edge-propensity parameters
        for i in range(num_blocks):
            for j in range(num_blocks):
                if theta[i, j] > 0:
                    theta[i, j] /= np.sum(theta[i, :])
    
    return theta

optimal_theta = em_algorithm(G, partition, num_blocks)

optimal_theta.shape

purchase_probabilities = {}
for u, v in G.edges():
    r = partition[u]
    s = partition[v]
    theta_rs = optimal_theta[r, s]
    c = G.degree(u) # Degree of customer node
    p = G.degree(v) # Degree of product node
    # Scaling factor (this is an example; adjust as needed)
    scaling_factor = 1.0 / (c * p)
    probability = theta_rs * scaling_factor
    purchase_probabilities[(u, v)] = probability

list(purchase_probabilities.keys())[:2]

In [None]:
list_items = []
with open('item_sets.txt') as f:
    for line in f:
        list_items.append(list(map(int, line.strip().split())))
list_items[0]

In [None]:
def GetGlobalCommunity(partition, items):
    d = {}
    for item in items:
        if partition[item] in d:
            d[partition[item]] += 1
        else:
            d[partition[item]] = 1
    max_key = max(d, key=d.get)
    return max_key

In [None]:
GetGlobalCommunity(partition, list_items[0])

In [None]:
def GetItems(G, items):
    d = {}
    for item in items:
        neighbors = G.neighbors(item)
        for neighbor in neighbors:
            if neighbor not in items:
                if neighbor in d:
                    d[neighbor] += 1
                else:
                    d[neighbor] = 1
    keys_with_height_value = [key for key, value in d.items() if value == max(list(d.values()))]
    
    return keys_with_height_value

In [None]:
GetItems(G, list_items[0])

In [None]:
def PredictItem(G, partition, communities,items):

    comm = GetGlobalCommunity(partition, items)
    cand = [c for c in communities[comm] if c > max(items)]
    min_path = -1
    if cand == []:
        best_item = comm[0]
    else:
        best_item = cand[0]
    
    for c in cand:
        shortest_paths = []
        for item in items:
            try:
                shortest_path_length = nx.shortest_path_length(G, c, item)
                shortest_paths.append(shortest_path_length)
            except nx.NetworkXNoPath:
                pass
        if shortest_paths:
            avg_path = sum(shortest_paths) / len(shortest_paths)
            if avg_path < min_path or min_path == -1:
                min_path = avg_path
                best_item = c
    return best_item

In [None]:
PredictItem(G, partition, communities, list_items[0])

In [None]:
from tqdm import tqdm

In [None]:
result = []
for i in tqdm(range():
    result.append(PredictItem(G, partition, communities, list_items[i]))

In [None]:
len(result)

In [None]:
result

In [None]:
new_result = result + [0] * (len(list_items) - len(result))
new_result

In [None]:
d_fin = {'id': list(range(1, len(list_items)+1)), 'target': new_result}
df = pd.DataFrame(d_fin)
df.head()

In [None]:
df.to_csv("submission_late_02.csv", index=False)

In [None]:
len(list_items)

In [None]:
len(result)

In [None]:
correct = (0.0145 * 75149) / 2017
correct