In [1]:
import pandas as pd

### Import Data

In [2]:
df_sample_train = pd.read_csv('sample_train.csv')
df_sample_train = df_sample_train.iloc[:,1:]

In [3]:
df_sample_train

Unnamed: 0,user_id,item_id,rating
0,2,2,4.0
1,2,3726,3.0
2,2,1499,3.0
3,2,3595,4.0
4,2,1488,1.0
...,...,...,...
5970580,162536,151,4.0
5970581,162536,855,3.0
5970582,162536,546,2.0
5970583,162536,1577,4.0


In [4]:
from surprise import Reader, Dataset
from surprise import SVD
from surprise.model_selection import cross_validate

In [5]:
reader = Reader(rating_scale=(1, 5))
data_train = Dataset.load_from_df(df_sample_train, reader)

### Hyperparameter Tuning On Random Sample

In [6]:
import time

In [7]:
svd_param_grid = {"n_factors": [20, 50, 80], "n_epochs": [40, 60, 80], "lr_all": [0.002, 0.005, 0.007], "reg_all": [0.02, 0.05, 0.1]}

hypertuning done 'manually' since no 'checkpoint' if using built in function

In [None]:
models = {}

model_template = {
    'status':False,
    'model':'',
    'n_factors':'',
    'n_epochs':'',
    'lr_all':'',
    'reg_all':'',
    'result':''}

fails = {}
fail_count = 0
fail_template = {
    'n_factors':'',
    'n_epochs':'',
    'lr_all':'',
    'reg_all':''
}


model_count = 0
start_time_svd_tune = time.time()
for factor in svd_param_grid['n_factors']:
    for epoch in svd_param_grid['n_epochs']:
        for lr in svd_param_grid['lr_all']:
            for ref in svd_param_grid['reg_all']:

                model_template_current = model_template.copy()
                model_template_current['n_factors'] = factor
                model_template_current['n_epochs'] = epoch
                model_template_current['lr_all'] = lr
                model_template_current['reg_all'] = ref

                try:
                    print('running model ',model_count)
                    start_one = time.time()
                    current_svd = SVD(n_factors=factor, n_epochs = epoch, lr_all=lr, reg_all=ref)
                    result = cross_validate(current_svd, data_train, measures=["RMSE"], cv=3, verbose=True)
                    model_template_current['status'] = True
                    model_template_current['result'] = result
                    model_template_current['model'] = model_count
                    models[model_count] = model_template_current
                    print(model_count,' training done ', time.strftime('%H:%M:%S', time.gmtime(end_time_svd_tune - start_time_svd_tune)))
                    model_count += 1
                    end_one = time.time()
                except:
                    fail_count +=1
                    print('# of fails : ', fail_count)
                    fail_template_current = fail_template.copy()
                    fail_template_current['n_factors'] = factor
                    fail_template_current['n_epochs'] = epoch
                    fail_template_current['lr_all'] = lr
                    fail_template_current['reg_all'] = ref

                    fails[fail_count] = fail_template_current

                continue

end_time_svd_tune = time.time()
print(time.strftime('%H:%M:%S', time.gmtime(end_time_svd_tune - start_time_svd_tune)))