In [1]:
import scipy.sparse as sp 
import pandas as pd 
import numpy as np 
import torch
import gc 
import os 

In [2]:

user_size=None
item_size=None
dataset=None

smooth_ratio = 0.1 # only top 20% eigenvalues will be choosed to create smooth graph
rough_ratio = 0.002 # only last 0.2% eigenvalues will be choosed to create rough graph

cache_file = os.path.abspath('./cache')
dataset = 'pinterest'

In [3]:
def cal_spectral_feature(Adj:torch.tensor,#邻接矩阵
                         size:int,#topk 个
                         side:str = 'user', # 生成用户、产品侧
                         largest:bool = True, # 提取Smooth、rough 子图
                         niter = 5
                         ):
    '''
    通过Lobpcg 算法 获取前(后)k 个特征向量、特征值
    '''
    print('进行谱分解')
    value,vector = torch.lobpcg(Adj,k=size,largest=largest,niter=niter)
    
    if largest:
        feature_file_name = os.path.join(cache_file,f'{dataset}_smooth_{side}_features.pt')
        value_file_name = os.path.join(cache_file,f'{dataset}_smooth_{side}_values.pt')
    else:
        feature_file_name = os.path.join(cache_file,f'{dataset}_rough_{side}_features.pt')
        value_file_name = os.path.join(cache_file,f'{dataset}_rough_{side}_values.pt')
    print('输出文件')
    torch.save(vector,feature_file_name)
    torch.save(value,value_file_name)

In [4]:
df = pd.read_csv('/Users/yangbichen/Documents/ML算法code/图/GnnCode/datasets/pinterest/train_sparse.csv')
df = df.drop_duplicates()
col = df.iloc[:,0]
row = df.iloc[:,1]
R = sp.csr_matrix(([1]*df.shape[0],(col,row)))
torch.save(torch.Tensor(R.todense()),f'{cache_file}/pinterest/train_rate_tensor.pkl')

user_degree = np.power(R.sum(1),-0.5).flatten()
item_degree = np.power(R.sum(0),-0.5).flatten()
user_degree[np.isinf(user_degree)]=0
item_degree[np.isinf(item_degree)]=0
Du = sp.diags(np.array(user_degree).flatten(),0)
Di = sp.diags(np.array(item_degree).flatten(),0)
Au = Du.dot(R).dot(Di.power(2)).dot(R.transpose()).dot(Du)
Ai = Di.dot(R.transpose()).dot(Du.power(2)).dot(R).dot(Di)


Au = Au.tocoo()
Ai = Ai.tocoo()

Au_sp=torch.sparse_coo_tensor(torch.Tensor([Au.row.tolist(),Au.col.tolist()]),
                            torch.Tensor(Au.data))
Ai_sp=torch.sparse_coo_tensor(torch.Tensor([Ai.row.tolist(),Ai.col.tolist()]),
                            torch.Tensor(Ai.data))


print(f'User Side Adjancy Marix shape {Au.shape}')
print(f'Item Side Adjancy Marix shape {Ai.shape}')

del df,R,user_degree,item_degree,Du,Di,Au,Ai
gc.collect()




  item_degree = np.power(R.sum(0),-0.5).flatten()


User Side Adjancy Marix shape (37501, 37501)
Item Side Adjancy Marix shape (9831, 9831)


0

In [5]:
from utils import checkAndGetNodeSize
user_size,item_size = checkAndGetNodeSize('pinterest')
print('cal user side smooth spectral feature')
cal_spectral_feature(Au_sp,int(smooth_ratio*user_size),'user',True)
if rough_ratio!=0:
    print('cal user side rough spectral feature')
    cal_spectral_feature(Au_sp,int(rough_ratio*user_size),'user',False)

cal user side smooth spectral feature
进行谱分解


torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  /Users/distiller/project/pytorch/aten/src/ATen/native/BatchLinearAlgebra.cpp:1672.)
  Rinv = torch.triangular_solve(Id, R, upper=True).solution


输出文件
cal user side rough spectral feature
进行谱分解
输出文件


In [6]:
print('cal item side smooth spectral feature')
cal_spectral_feature(Ai_sp,int(smooth_ratio*item_size),'item',True)
if rough_ratio!=0:
    print('cal item side rough spectral feature')
    cal_spectral_feature(Ai_sp,int(rough_ratio*item_size),'item',False)

del Au_sp,Ai_sp 
gc.collect()

cal item side smooth spectral feature
进行谱分解
输出文件
cal item side rough spectral feature
进行谱分解
输出文件


0

In [5]:
value,vector = torch.lobpcg(Au_sp,k=5,largest=True,niter=5)

torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  /Users/distiller/project/pytorch/aten/src/ATen/native/BatchLinearAlgebra.cpp:1672.)
  Rinv = torch.triangular_solve(Id, R, upper=True).solution


In [42]:
Au_tensor=torch.sparse_coo_tensor(torch.Tensor([Au.row.tolist(),Au.col.tolist()]),
                             torch.Tensor(Au.data))
value,vector = torch.lobpcg(Au_tensor,k=5,largest=True,niter=5)

torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  /Users/distiller/project/pytorch/aten/src/ATen/native/BatchLinearAlgebra.cpp:1672.)
  Rinv = torch.triangular_solve(Id, R, upper=True).solution


In [44]:
vector

tensor([[ 0.0020,  0.0022,  0.0026, -0.0006, -0.0004],
        [ 0.0026,  0.0037,  0.0014, -0.0035, -0.0016],
        [ 0.0014,  0.0038, -0.0030, -0.0063,  0.0051],
        ...,
        [ 0.0056,  0.0083,  0.0021,  0.0023, -0.0027],
        [ 0.0036,  0.0078,  0.0028,  0.0026,  0.0018],
        [ 0.0018,  0.0020,  0.0023, -0.0052,  0.0009]])

In [5]:
from utils import checkAndGetNodeSize
user_size,item_size = checkAndGetNodeSize('pinterest')

In [6]:

print('cal user side smooth spectral feature')
cal_spectral_feature(Au,int(smooth_ratio*user_size),'user',True)


cal user side smooth spectral feature


In [None]:
if rough_ratio!=0:
    print('cal user side rough spectral feature')
    cal_spectral_feature(Au,int(rough_ratio*user_size),'user',False)

print('cal item side smooth spectral feature')
cal_spectral_feature(Ai,int(smooth_ratio*item_size),'item',True)
if rough_ratio!=0:
    print('cal item side rough spectral feature')
    cal_spectral_feature(Ai,int(rough_ratio*item_size),'item',False)

del Au,Ai 
gc.collect()

In [12]:
R = torch.load('cache/pinterest/train_rate_tensor.pkl')

In [13]:
R = torch.Tensor(R)

In [15]:
torch.save(R,'cache/pinterest/train_rate_tensor.pkl')

In [22]:
df = pd.read_csv('/Users/yangbichen/Documents/ML算法code/图/GnnCode/datasets/pinterest/train_sparse.csv')
df.item.max()

9830

In [24]:
df.item.max()

9830

In [None]:
df = df.drop_duplicates()

In [25]:
0==0.

True