In [2]:
import numpy as np
import scipy.sparse
import matplotlib.pyplot as plt
import umap
from sklearn.manifold import TSNE

In [3]:
#loading data and turning it into a matrix
file_name="first_10000_integers.txt"

with open(file_name) as f:
    data = [
        [[np.int32(y) for y in x.split(',')] for x in line.split()]
        for line in f
    ]

R = len(data)
C = max(p for A in data for p,e in A)+1
M = scipy.sparse.lil_matrix((R,C),dtype=np.uint32)
for i,row in enumerate(data):
    for j,e in row:
        M[i,j] = 1

# We have a question about whether or not to use e or 1 as the value in this matrix. Using 1 is in line
# with previous work and seems to make nicer pictures.

# If you don't want a sparse matrix use this code:

# R = len(data)
# C = max(p for A in data for p,e in A)+1
# M = np.zeros((R,C),dtype=np.uint64)
# for i,row in enumerate(data):
#     for j,e in row:
#         M[i,j] = 1

M

<159118x9636 sparse matrix of type '<class 'numpy.uint32'>'
	with 476872 stored elements in List of Lists format>

In [None]:
%%time
# Reducing via UMAP
reducer = umap.UMAP(metric='cosine', n_epochs=500)
umap_embedding = reducer.fit_transform(M)

#Ploting and saving image
fig = plt.figure(figsize=(8,8))
fig.patch.set_facecolor('black')
plt.scatter(umap_embedding[:,0], umap_embedding[:,1], marker='o', s=0.5, edgecolor='',
            c=np.arange(umap_embedding.shape[0]), cmap="magma")

plt.axis("off")
plt.savefig(f"{file_name}_UMAP_visualization.png", dpi=400, facecolor='black')

In [None]:
%%time
# This takes much longer than UMAP and it's re
# Reducing via t-SNE
tsne = TSNE(2,perplexity=300)
tsne_embedding = tsne.fit_transform(M)

#Ploting and saving image
fig = plt.figure(figsize=(8,8))
fig.patch.set_facecolor('black')
plt.scatter(tsne_embedding[:,0], tsne_embedding[:,1], marker='o', s=0.5, edgecolor='',
            c=np.arange(tsne_embedding.shape[0]), cmap="magma")

plt.axis("off")
plt.savefig(f"{file_name}_t-SNE_visualization.png", dpi=400, facecolor='black')