# Jaccard and word2vec matrices

In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy.spatial as sp

import matrices.loading as mload

In [2]:
plt.rcParams["figure.figsize"] = (12,12)
mpl.rcParams['image.cmap'] = 'jet'

In [3]:
data_path = "resources/aminer/v1" # 629814 documents

Load sparse matrix with jaccard similarities.

In [4]:
matrix_jaccard = mload.load_matrix_jaccard_sim(data_path) # 998 documents sample
print(matrix_jaccard.ndim, matrix_jaccard.shape)

Loading saved matrix ...
2 (998, 998)


Load sparse matrix with word2vec similarities.

In [5]:
matrix_word2vec = mload.load_matrix_word2vec_sim(data_path) # 998 documents sample
print(matrix_word2vec.ndim, matrix_word2vec.shape)

Loading saved matrix ...
2 (998, 998)


In [6]:
N = matrix_jaccard.shape[0]
print("Number of documents", N, (matrix_jaccard.shape))

Number of documents 998 (998, 998)


## Jaccard similarity

In [12]:
plt.imshow(matrix_jaccard)
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x7ff2c8af50b8>

## Dice from Jaccard

In [8]:
matrix_dice = np.divide(np.multiply(matrix_jaccard,2), np.add(matrix_jaccard, 1))
print(matrix_dice.shape)

plt.matshow(matrix_dice)
plt.colorbar()

(998, 998)


<matplotlib.colorbar.Colorbar at 0x7ff2c8d344a8>

In [9]:
matrix_dice = 2*matrix_jaccard/(1 + matrix_jaccard)
print(matrix_dice.shape)

plt.matshow(matrix_dice)
plt.colorbar()

(998, 998)


<matplotlib.colorbar.Colorbar at 0x7ff2c8c62470>

## Jaccard disimilarity

In [10]:
matrix_jaccard_dis = 1 -matrix_jaccard
print(matrix_jaccard_dis.shape)

plt.matshow(matrix_jaccard_dis)
plt.colorbar()

(998, 998)


<matplotlib.colorbar.Colorbar at 0x7ff2c8bf42e8>

## word2vec similarity.

In [11]:
plt.matshow(matrix_word2vec)
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x7ff2c8b58710>