In [None]:
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from scipy.spatial.distance import pdist, squareform

%matplotlib inline

In [None]:
pd_data =pd.read_csv(os.getcwd() + '/processed_data.csv')

In [None]:
pd_data = pd_data.drop(columns=['release_date', 'steam_appid','platforms', 'categories','min_RAM','positive_ratings','negative_ratings','steamspy_tags', 'english', 'pc_requirements', 'mac_requirements','achievements','linux_requirements', 'minimum', 'recommended'])
pd_data.columns
pd_data = pd_data.dropna()

## Setting the threshold 

In [None]:
thresh = 5000

In [None]:
nodes = pd_data[['developer', 'publisher', 'name', 'genres', 'owners']][:thresh]
edges = pd_data[['average_playtime', 'median_playtime', 'price', 'grade', 'month', 'year', 'num_systems']][:thresh]

In [None]:
n_nodes = len(nodes)
adjacency = np.zeros((n_nodes, n_nodes), dtype=int)

In [None]:
n_nodes

In [None]:
def epsilon_similarity_graph(X: np.ndarray, sigma=1, epsilon=0):
    """ X (n x d): coordinates of the n data points in R^d.
        sigma (float): width of the kernel
        epsilon (float): threshold
        Return:
        adjacency (n x n ndarray): adjacency matrix of the graph.
    """
    sq_dist = pdist(X)
    pairwise_dists = squareform(sq_dist)**2
    adjacency = np.exp( -pairwise_dists / (2*sigma**2) - np.identity(len(pairwise_dists)))

    threshold = adjacency < epsilon

    adjacency[threshold]=0
    
    return adjacency

In [None]:
sq_dist = pdist(edges)
sigma = np.std(sq_dist)
print(f'The value of sigma is {np.std(sigma)}')
pairwise_dists = squareform(sq_dist)**2
adjacency = np.exp( -pairwise_dists / sigma**2 / 2) - np.identity(len(pairwise_dists))
plt.hist(adjacency.flatten())

In [None]:
epsilon = 0.8

In [None]:
adjacency = epsilon_similarity_graph(edges,sigma,epsilon+0.1) 

In [None]:
plt.spy(adjacency)
plt.show()

In [15]:
def compute_laplacian(adjacency: np.ndarray, normalize: bool):
    """ Return:
        L (n x n ndarray): combinatorial or symmetric normalized Laplacian.
    """
    distance = np.array([np.sum(adjacency,0)])
    laplacian = np.diag(distance[0]) - adjacency
    
    if (normalize):
        for i in range(0,np.shape(laplacien)[0]):
            for j in range(0,np.shape(laplacien)[1]):
                laplacian[i,j] = laplacien[i,j]/np.sqrt(dist[i]*dist[j])
    return laplacien

In [113]:
dist=np.array([np.sum(adjacency,0)])
np.diag(dist[0])-adjacency

array([[ 9.67055658e-01,  0.00000000e+00,  0.00000000e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  2.89303265e+03, -9.99898805e-01, ...,
        -9.99081608e-01, -9.99080929e-01, -9.99713098e-01],
       [ 0.00000000e+00, -9.99898805e-01,  2.89114184e+03, ...,
        -9.99587793e-01, -9.99587227e-01, -9.99846717e-01],
       ...,
       [ 0.00000000e+00, -9.99081608e-01, -9.99587793e-01, ...,
         2.88584774e+03, -9.99998728e-01, -9.99560189e-01],
       [ 0.00000000e+00, -9.99080929e-01, -9.99587227e-01, ...,
        -9.99998728e-01,  2.88584978e+03, -9.99560574e-01],
       [ 0.00000000e+00, -9.99713098e-01, -9.99846717e-01, ...,
        -9.99560189e-01, -9.99560574e-01,  2.89176771e+03]])

In [114]:
#adjacency = adjacency[adjacency > 0]

In [115]:
#adjacency[[True,True],[True, False]]

## Graph building and visualization 

In [None]:
import networkx as nx

In [None]:
graph = nx.from_numpy_array(adjacency)

In [None]:
node_props = nodes.to_dict()

In [None]:
for key in node_props:
    # print(key, node_props[key])
    nx.set_node_attributes(graph, node_props[key], key)

In [None]:
graph.node[1]

In [None]:
nx.draw_spectral(graph)