In [31]:
import csv
import sys
import os
import math
import numpy as np
import numpy.linalg as npla
import scipy
from scipy import sparse
from scipy import linalg
import scipy.sparse.linalg as spla
from scipy.spatial import distance
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.mlab as mlab
from mpl_toolkits.mplot3d import axes3d
%matplotlib tk

In [46]:
# https://stackoverflow.com/questions/7685128/sparse-3d-matrix-array-in-python
class MultiDSparseMatrix:
  def __init__(self):
    self.elements = {}

  def addValue(self, tuple, value):
    self.elements[tuple] = value

  def readValue(self, tuple):
    try:
      value = self.elements[tuple]
    except KeyError:
      # could also be 0.0 if using floats...
      value = 0
    return value

In [2]:
user_project_matrix = np.load("user_project_matrix.npy")
user_array = np.load("users_array.npy")
project_ids_array = np.load("project_ids_array.npy")

In [3]:
user_array

array(['H6VMj6yGAtxAA2Yl9p3PJnSGMws=', 'Irk0w1LDU3L8M1cB6e1mZ7tW5y8=',
       'sniZjTr+J6s/QBchnSZqQAfQj7A=', ...,
       'XOm+opbPT2KK9wUuWWkuvOx85Qc=', 'BJ65LGRD1s8qa2+mJFYI8bd7LlU=',
       'ucWofoWYXhE+gMm6qihII4iujWA='], dtype='<U28')

In [4]:
project_ids_array

array([   0,    1,    2, ..., 1928, 1929, 1930])

In [12]:
s = sparse.csr_matrix(user_project_matrix)

In [13]:
print(s)

  (0, 59)	10.0
  (0, 344)	1.0
  (0, 393)	10.0
  (0, 697)	8.0
  (0, 769)	11.0
  (0, 1238)	1.0
  (0, 1611)	11.0
  (0, 1890)	11.0
  (1, 28)	6.0
  (1, 29)	1.0
  (1, 471)	4.0
  (1, 644)	2.0
  (1, 701)	2.0
  (1, 735)	6.0
  (1, 769)	17.0
  (1, 810)	4.0
  (1, 849)	2.0
  (1, 880)	6.0
  (1, 1004)	4.0
  (1, 1065)	6.0
  (1, 1683)	1.0
  (1, 1720)	4.0
  (1, 1866)	6.0
  (1, 1890)	17.0
  (2, 26)	4.0
  :	:
  (263808, 370)	1.0
  (263808, 769)	1.0
  (263808, 1890)	1.0
  (263809, 724)	1.0
  (263809, 769)	1.0
  (263809, 1002)	1.0
  (263809, 1047)	1.0
  (263809, 1890)	1.0
  (263810, 297)	1.0
  (263810, 370)	1.0
  (263810, 769)	1.0
  (263810, 1890)	1.0
  (263811, 297)	1.0
  (263811, 370)	1.0
  (263811, 769)	1.0
  (263811, 929)	1.0
  (263811, 1890)	1.0
  (263812, 769)	1.0
  (263812, 1890)	1.0
  (263813, 297)	1.0
  (263813, 370)	1.0
  (263813, 769)	1.0
  (263813, 1890)	1.0
  (263814, 769)	1.0
  (263814, 1890)	1.0


In [24]:
user_project_matrix[np.where(user_array == "Irk0w1LDU3L8M1cB6e1mZ7tW5y8=")[0][0]][np.where(project_ids_array == 777)[0][0]]

17.0

In [27]:
a = user_project_matrix[np.where(user_array == "Irk0w1LDU3L8M1cB6e1mZ7tW5y8=")[0][0]]

In [28]:
b = user_project_matrix[np.where(user_array == "BJ65LGRD1s8qa2+mJFYI8bd7LlU=")[0][0]]

In [32]:
euc_dist = distance.euclidean(a,b)

In [35]:
print(euc_dist)

27.784887978899608


In [36]:
cos_dist = distance.cosine(a,b)

In [38]:
print(cos_dist)

0.4120427057138366


In [50]:
vector_distances_array = MultiDSparseMatrix()
vector_distances_array.addValue((324, "asdklfh", "asdfh"), (23456,0.87654))
vector_distances_array.elements = {}
print(vector_distances_array.elements)

{}


In [54]:

with open("final_canvas_users_per_project.csv", "r") as file:
        reader = csv.reader(file, delimiter = ",")
         # Skip first line (header row)
        next(file, None)
        
        for row in reader:
            pic_id = int(row[0])
            num_users = int(row[1])
            
            for i in range(2, num_users - 1 + 2):
                user1 = row[i]
                user2 = row[i + 1]
                
                if ( user1 in user_array and user2 in user_array ):
                    user_vec1 = user_project_matrix[np.where(user_array == user1)[0][0]]
                    user_vec2 = user_project_matrix[np.where(user_array == user2)[0][0]]                

                    euc_dist = distance.euclidean(user_vec1,user_vec2)
                    cos_dist = distance.cosine(user_vec1,user_vec2)

                    vector_distances_array.addValue( (pic_id, user1, user2), (euc_dist, cos_dist) )

In [56]:
vector_distances_array.elements

{(1921,
  '0OaZdogTkz5JNhjGO4sLJUgFkm0=',
  'zM7Afar4Z8dQJgjSpdHfNsce7cA='): (7.483314773547883, 0.24963352062313315),
 (1921,
  'zM7Afar4Z8dQJgjSpdHfNsce7cA=',
  '7rblw/26fTvJ37x24B8bfyBWwuo='): (7.54983443527075, 0.28157879189290025),
 (1921,
  'bqdHryxNnmZxF0PqJs8r2/MEzzQ=',
  'uiS35vYD3VUb8DA5T3rwep9bmUM='): (24.474476501040833, 0.27768488148538484),
 (1921,
  'j63llT/+D9vWiiagMECnkdMR4zM=',
  '0iBIyYrBxCF24M+4ux+QXt7qbpQ='): (19.87460691435179, 0.4725903683660774),
 (1921,
  'BZxb/t/dXlHcvZB9cRiyh3OE0RM=',
  '5lYOIuKTqLdkAK6hpAZ33xCwdGY='): (26.551836094703507, 0.3058564665381345),
 (1921,
  'EHY7+OHizBn4IMkYC+wGQ5wWJbU=',
  'i2G3Prk6jG0XnMfIDnmQurAn4Vg='): (29.614185789921695, 0.3731625648798349),
 (1921,
  'i2G3Prk6jG0XnMfIDnmQurAn4Vg=',
  'obPomZkRJIin5tdSeI/3uF/De94='): (33.015148038438355, 0.4733512142068198),
 (1921,
  'obPomZkRJIin5tdSeI/3uF/De94=',
  'y+n4zJckKQw4LXnC8wGGScfZALA='): (8.602325267042627, 0.5886604026167513),
 (1921,
  'T55zV1GkXZccIqGNnX1Z28jh8Lk=',
  'V4mSS

In [None]:
np.save("vector_distances_array.npy", vector_distances_array)