## Reading files

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

import pickle
import random

In [2]:
def read_g_obj(file="adj_matrices/G_hci.pkl"):
    with open(file, "rb") as pfile: 
        G = pickle.load(pfile)
    
    follows_at_least_10 = [person for person, out_degree in G.out_degree() if out_degree >= 10] 
    
    subgraph_hci = nx.subgraph(G, follows_at_least_10)
    
    return subgraph_hci

In [3]:
subgraph_hci = read_g_obj()

In [4]:
with open("train_test/test.pkl", "rb") as pfile:
    test = pickle.load(pfile)

In [5]:
with open("train_test/train.pkl", "rb") as pfile:
    train = pickle.load(pfile)

In [6]:
with open("train_test/anti_test.pkl", "rb") as pfile:
    anti_test = pickle.load(pfile)

In [7]:
# adding 1 to all the user-user edges
train_data_extended = [(follower, following, 1) for follower, following in train]
test_data_extended = [(follower, following, 1) for follower, following in test]
anti_test_data_extended = [(follower, following, 1) for follower, following in anti_test]

In [8]:
train_df = pd.DataFrame(
    train_data_extended, columns=["user", "item", "rating"]
)
train_df.to_csv('train_df.csv')

In [9]:
test_data_extended.extend(anti_test_data_extended)
test_df = pd.DataFrame(test_data_extended, columns=["user", "item", "rating"])
test_df.to_csv('test_df.csv')

### is there a way to include features???

In [97]:
users_data = pd.read_csv("data_csv/profiles/extended_df_complete.csv")

In [98]:
users_data[users_data['acct'] == 'Anas']['id'].values[0]

109245646986992073

In [127]:
train_df.head()

Unnamed: 0,user,item,rating
0,cqz,jbigham,1
1,cqz,ryanatkn,1
2,cqz,axz,1
3,cqz,msbernst,1
4,cqz,qli,1


In [128]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22480 entries, 0 to 22479
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   user    22480 non-null  object
 1   item    22480 non-null  object
 2   rating  22480 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 527.0+ KB


In [129]:
test_df.head()

Unnamed: 0,user,item,rating
0,cqz,kentrellowens,1
1,cqz,ruotongw,1
2,cqz,schaferj,1
3,Gillian,kgajos,1
4,Gillian,andreaforte,1


In [130]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18144 entries, 0 to 18143
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   user    18144 non-null  object
 1   item    18144 non-null  object
 2   rating  18144 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 425.4+ KB


## Model Implementation

In [131]:
import sys
sys.path.append("/LightGCN")

In [132]:
from LightGCN_modules import LightGCN
from LightGCN_modules import ImplicitCF
from LightGCN_modules import deeprec_utils
from LightGCN_modules import eval_utils

In [133]:
# Model implementation

In [134]:
data = ImplicitCF.ImplicitCF(
    train=train_df,
    test=test_df,
    seed=2023,
    col_user="user",
    col_item="item",
    col_rating="rating",
)

In [135]:
### Setup the model
hparam32 = deeprec_utils.prepare_hparams(
    "LightGCN_modules/lightgcn.yaml",
    n_layers=3,
    batch_size=512,
    epochs=10,
    embed_size=32,
    learning_rate=0.01,
    eval_epoch=5,
    top_k=10,
)

In [136]:
model32 = LightGCN.LightGCN(hparam32, data, seed=2023)

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.


In [137]:
model32.fit()

Epoch 1 (train)0.6s: train loss = 0.50101 = (mf)0.50084 + (embed)0.00017
Epoch 2 (train)0.5s: train loss = 0.34159 = (mf)0.34111 + (embed)0.00049
Epoch 3 (train)0.5s: train loss = 0.34034 = (mf)0.33985 + (embed)0.00050
Epoch 4 (train)0.5s: train loss = 0.32445 = (mf)0.32390 + (embed)0.00055
Epoch 5 (train)0.4s + (eval)0.2s: train loss = 0.30454 = (mf)0.30391 + (embed)0.00064, recall = 0.14680, ndcg = 0.29229, precision = 0.21502, map = 0.09731
Epoch 6 (train)0.4s: train loss = 0.27704 = (mf)0.27624 + (embed)0.00079
Epoch 7 (train)0.5s: train loss = 0.25592 = (mf)0.25496 + (embed)0.00095
Epoch 8 (train)0.4s: train loss = 0.23965 = (mf)0.23852 + (embed)0.00113
Epoch 9 (train)0.4s: train loss = 0.23090 = (mf)0.22961 + (embed)0.00129
Epoch 10 (train)0.5s + (eval)0.2s: train loss = 0.21608 = (mf)0.21464 + (embed)0.00145, recall = 0.15906, ndcg = 0.31409, precision = 0.23475, map = 0.10601


In [138]:
topk_scores32 = model32.recommend_k_items(test_df, top_k=10, remove_seen=True)
topk_scores32.head()

Unnamed: 0,user,item,prediction
0,cqz,karger,3.905668
1,cqz,andresmh,3.894189
2,cqz,pg,3.859345
3,cqz,alicia,3.36449
4,cqz,bkeegan,3.32129


In [139]:
eval_ndcg32 = eval_utils.ndcg_at_k(test_df, topk_scores32, k=10)
eval_ndcg32

0.3140908081805034