### 1. Enviroment setup

#### 1.1. Dealing with dependencies

In [None]:
pip install -r requirements.txt

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import networkx as nx
from sklearn.metrics import pairwise_distances
from cdlib import algorithms
from functions import (split_in_k_folds, get_number_of_keys, get_user_item_matrices, get_adjacency_matrices)
from surprise import (accuracy, Dataset, Reader, KNNBaseline, NormalPredictor, KNNBasic, KNNWithMeans, KNNWithZScore, 
                            KNNBaseline, SVD, SVDpp, NMF, SlopeOne, CoClustering, BaselineOnly)
from surprise.model_selection import PredefinedKFold

#### 1.2. Set parameters

In [None]:
folds = 3
surprise_algorithms = [SVD(), SVDpp(), SlopeOne(), NMF(), NormalPredictor(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]

### 2. Loading data

In [None]:
def read_data_ml100k():
    dir = os.getcwd()+'/Datasets'
    archive = '/u.data'
    names = ['user_id', 'item_id', 'rating', 'timestamp']
    m100k = pd.read_table(dir + archive, sep='\t', 
                         header=None,
                         names=names, engine='python')
    num_users = m100k.user_id.unique().shape[0]
    num_items = m100k.item_id.unique().shape[0]
    return m100k, num_users, num_items
m100k, num_users, num_items = read_data_ml100k()
print(f'* * * A new file has been loaded. The dataframe contains {num_users} users and {num_items} items')

In [None]:
m100k.head()

### 3. Spliting data

In [None]:
splits = split_in_k_folds(m100k[['user_id','item_id' ]], m100k['rating'], k=folds, test_ratio=0.3)     

### 4. Pré-modeling

rating_matrices = get_user_item_matrices(splits)
adjacency_matrices = get_adjacency_matrices(rating_matrices, metric_dist="cosine", threshold=75)

In [None]:
G={}
for i in range(len(adjacency_matrices.items())):   
  G[f'graph-{i}'] = nx.from_numpy_matrix(adjacency_matrices[f'AM{0}'])

In [None]:
coms_louvain={}
for i in range(len(G.items())):   
  coms_louvain[i] = algorithms.louvain(G[f'graph-{i}'], weight='weight', resolution=1., randomize=False)
coms_louvain

In [None]:
for i in range(len(coms_louvain)):
    subcommunity=[]
    for u in splits[f'X_train{i}']['user_id']-1:
        for c in coms_louvain[i].communities:
            if u in c:
                subcommunity.append(coms_louvain[i].communities.index(c))
    splits[f'X_train{i}']['community']=subcommunity

### 5. Recommender System

#### 5.1. Create surprise train/test objects

In [None]:


trainset = {}
testset = {}

reader = Reader(rating_scale=(1, 5))

for i in range(folds): 

    print(f'*-- Executing process for fold 0{i}')# ele ta sobrescrevendo
    train = splits[f'X_train{i}']
    ytrain = splits[f'y_train{i}']
    
    test = splits[f'X_test{i}']
    on_left = splits[f'X_train{i}'][['user_id', 'community']]
    on_left = on_left.drop_duplicates()
    test_merged = pd.merge(test, 
                    on_left, 
                    on ='user_id', 
                    how ='inner')
    test=test_merged.set_index(test.index)   
    ytest = splits[f'y_test{i}']

    cmts = train["community"].unique()
    cmts = cmts.tolist()
    print(f'---*-- {len(cmts)} subcommunities identified.')
    for cmt in cmts:
        print(f'------*-- Executing process for subcommunity 0{cmt}')
        train_ = train[train['community'] == cmt]
        ytrain_ = ytrain[ytrain.index.isin(train.index)]
        train_['rating'] = ytrain_
        train_ = train_.drop(['community'], axis=1) 
        data = Dataset.load_from_df(train_[['user_id', 'item_id', 'rating']], reader)
        trainset[f'train_fd-{i}_sb-{cmt}'] = data.build_full_trainset()
              
        test_ = test[test['community'] == cmt]
        ytest_ = ytest[ytest.index.isin(test.index)]
        test_['rating'] = ytest_
        test_ = test_.drop(['community'], axis=1) 
        test_[['user_id','item_id']] = test_[['user_id','item_id']].astype(str)
        test_[['rating']] = test_[['rating']].astype(float)
        test_ = test_.to_numpy()
        result = list([tuple(e) for e in test_])
        #data = Dataset.load_from_df(test[['user_id', 'item_id', 'rating']], reader)
        testset[f'test_fd-{i}_sb-{cmt}'] = result #data.build_full_trainset()
        print('**********')

In [None]:
# tds os y_pred estão iguais .-.  pq?
train = trainset['train_fd-0_sb-3']
test = testset['test_fd-0_sb-3']
algo = SVD()
algo.fit(train)
predictions = algo.test(test)
predictions