In [1]:
import os
import pandas as pd
import numpy as np
from time import time
from tqdm.notebook import tqdm
import pickle

from scipy.sparse import coo_matrix, csr_matrix

from lightfm.cross_validation import random_train_test_split
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k, auc_score, recall_at_k

from hyperopt import fmin, hp, tpe, Trials

from sklearn.base import clone

## Normalization
from sklearn.preprocessing import LabelEncoder, MinMaxScaler




In [2]:
# item_features = pd.read_csv("dataset/min_max_item_features.csv", index_col=0, encoding="UTF-8")
user_features = pd.read_csv("dataset/min_max_user_features.csv", index_col=0, encoding="UTF-8")
rating = pd.read_csv("dataset/rating.csv", index_col=0, encoding="UTF-8")
whisky = pd.read_csv("dataset/whisky.csv", index_col=0, encoding="UTF-8")

# Item Feature 재정의
---
- 다른 feature도 추가해보자

In [3]:
whisky

Unnamed: 0,whisky_id,link,image,name,avr_rating,category,location,total_rating,cost_rank,abv,...,herbal,oily,full_bodied,rich,sweet,salty,vanilla,tart,fruity,floral
0,0,/spirits/hibiki-21-year,https://ip-distiller.imgix.net/images/spirits/...,Hibiki 21 Year,9.01,Blended,Japan,861.0,5,43.00,...,30,20,80,80,85,15,20,25,85,50
1,1,/spirits/highland-park-18,https://ip-distiller.imgix.net/images/spirits/...,Highland Park 18 Year,8.89,Peated Single Malt,"Islands, Scotland",2988.0,4,43.00,...,20,40,70,80,70,40,50,50,70,20
2,2,/spirits/michter-s-20-year-kentucky-straight-b...,https://ip-distiller.imgix.net/images/spirits/...,Michter's 20 Year Kentucky Straight Bourbon (2...,9.00,Bourbon,"Kentucky, USA",10.0,5,57.10,...,0,15,80,90,85,5,30,25,35,0
3,3,/spirits/george-t-stagg-bourbon-fall-2019,https://ip-distiller.imgix.net/images/spirits/...,George T. Stagg Bourbon (Fall 2019),9.06,Bourbon,"Kentucky, USA",629.0,4,58.45,...,50,20,60,60,45,0,60,60,45,0
4,4,/spirits/bowmore-mizunara-cask-finish,https://ip-distiller.imgix.net/images/spirits/...,Bowmore Mizunara Cask Finish,7.86,Peated Single Malt,"Islay, Scotland",22.0,5,53.90,...,30,10,75,75,60,20,30,20,50,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3530,3530,/spirits/wild-turkey-spiced,https://ip-distiller.imgix.net/images/spirits/...,Wild Turkey Spiced,5.92,Flavored Whiskey,"Kentucky, USA",13.0,1,43.00,...,0,0,40,55,85,0,25,10,10,0
3531,3531,/spirits/seagram-s-seven-crown-american-blende...,https://ip-distiller.imgix.net/images/spirits/...,Seagram's 7 Crown American Blended Whiskey,4.31,Blended American Whiskey,USA,237.0,1,40.00,...,20,0,30,0,100,0,100,0,60,10
3532,3532,/spirits/11-wells-single-malt-whiskey,https://ip-distiller.imgix.net/images/spirits/...,11 Wells Single Malt Whiskey,6.00,American Single Malt,"Minnesota, USA",2.0,3,42.00,...,10,40,10,10,90,0,50,20,30,10
3533,3533,/spirits/immortal-spirits-early-whiskey,https://ip-distiller.imgix.net/images/spirits/...,Immortal Spirits Early Whiskey,2.00,Other Whiskey,"Oregon, USA",1.0,2,44.50,...,100,80,0,0,40,0,20,0,0,0


In [4]:
cols = whisky.columns.tolist()
cols = [cols[5]] + cols[8:10] + cols[11:] 
cols

['category',
 'cost_rank',
 'abv',
 'smoky',
 'peaty',
 'spicy',
 'herbal',
 'oily',
 'full_bodied',
 'rich',
 'sweet',
 'salty',
 'vanilla',
 'tart',
 'fruity',
 'floral']

## whisky 

In [5]:
numeric_cols = cols[2:]
categorical_cols = cols[:2]

In [6]:
cate_df = whisky[categorical_cols]
numeric_df = whisky[numeric_cols]

### categorical variable
---
- category
- const_rank

In [7]:
le_category = LabelEncoder()
le_cost_rank = LabelEncoder()

cate_df[categorical_cols[0]] = le_category.fit_transform(cate_df[categorical_cols[0]])
cate_df[categorical_cols[1]] = le_cost_rank.fit_transform(cate_df[categorical_cols[1]])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cate_df[categorical_cols[0]] = le_category.fit_transform(cate_df[categorical_cols[0]])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cate_df[categorical_cols[1]] = le_cost_rank.fit_transform(cate_df[categorical_cols[1]])


### numeric variable
---
- abv ~ floral

In [8]:
# Define the scalers
min_max_scaler = MinMaxScaler()

# Apply Min-Max normalization
numeric_df[numeric_cols] = min_max_scaler.fit_transform(whisky[numeric_cols])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  numeric_df[numeric_cols] = min_max_scaler.fit_transform(whisky[numeric_cols])


In [9]:
numeric_df

Unnamed: 0,abv,smoky,peaty,spicy,herbal,oily,full_bodied,rich,sweet,salty,vanilla,tart,fruity,floral
0,0.260,0.30,0.85,0.50,0.30,0.20,0.80,0.80,0.85,0.166667,0.20,0.25,0.85,0.526316
1,0.260,0.40,0.30,0.40,0.20,0.40,0.70,0.80,0.70,0.444444,0.50,0.50,0.70,0.210526
2,0.542,0.15,0.00,0.20,0.00,0.15,0.80,0.90,0.85,0.055556,0.30,0.25,0.35,0.000000
3,0.569,0.40,0.00,0.65,0.50,0.20,0.60,0.60,0.45,0.000000,0.60,0.60,0.45,0.000000
4,0.478,0.30,0.20,0.40,0.30,0.10,0.75,0.75,0.60,0.222222,0.30,0.20,0.50,0.052632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3530,0.260,0.35,0.00,0.20,0.00,0.00,0.40,0.55,0.85,0.000000,0.25,0.10,0.10,0.000000
3531,0.200,0.00,0.00,0.40,0.20,0.00,0.30,0.00,1.00,0.000000,1.00,0.00,0.60,0.105263
3532,0.240,0.90,0.10,0.10,0.10,0.40,0.10,0.10,0.90,0.000000,0.50,0.20,0.30,0.105263
3533,0.290,0.00,0.00,0.10,1.00,0.80,0.00,0.00,0.40,0.000000,0.20,0.00,0.00,0.000000


In [10]:
numeric_df.describe()

Unnamed: 0,abv,smoky,peaty,spicy,herbal,oily,full_bodied,rich,sweet,salty,vanilla,tart,fruity,floral
count,3535.0,3535.0,3535.0,3535.0,3535.0,3535.0,3535.0,3535.0,3535.0,3535.0,3535.0,3535.0,3535.0,3535.0
mean,0.350434,0.223089,0.087644,0.459165,0.262068,0.285881,0.546673,0.54432,0.543825,0.10617,0.429819,0.252537,0.442031,0.217884
std,0.128288,0.205434,0.194206,0.204274,0.1945,0.196807,0.186168,0.191708,0.183895,0.164596,0.210966,0.183111,0.226912,0.2131
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.26,0.05,0.0,0.3,0.1,0.15,0.4,0.4,0.4,0.0,0.25,0.1,0.25,0.042105
50%,0.32,0.2,0.0,0.45,0.2,0.25,0.55,0.55,0.6,0.0,0.4,0.2,0.4,0.157895
75%,0.4,0.3,0.0,0.6,0.4,0.4,0.7,0.7,0.7,0.166667,0.6,0.4,0.6,0.315789
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [11]:
item_features = pd.concat([cate_df, numeric_df], axis=1)

In [12]:
item_features

Unnamed: 0,category,cost_rank,abv,smoky,peaty,spicy,herbal,oily,full_bodied,rich,sweet,salty,vanilla,tart,fruity,floral
0,1,4,0.260,0.30,0.85,0.50,0.30,0.20,0.80,0.80,0.85,0.166667,0.20,0.25,0.85,0.526316
1,12,3,0.260,0.40,0.30,0.40,0.20,0.40,0.70,0.80,0.70,0.444444,0.50,0.50,0.70,0.210526
2,5,4,0.542,0.15,0.00,0.20,0.00,0.15,0.80,0.90,0.85,0.055556,0.30,0.25,0.35,0.000000
3,5,3,0.569,0.40,0.00,0.65,0.50,0.20,0.60,0.60,0.45,0.000000,0.60,0.60,0.45,0.000000
4,12,4,0.478,0.30,0.20,0.40,0.30,0.10,0.75,0.75,0.60,0.222222,0.30,0.20,0.50,0.052632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3530,8,0,0.260,0.35,0.00,0.20,0.00,0.00,0.40,0.55,0.85,0.000000,0.25,0.10,0.10,0.000000
3531,2,0,0.200,0.00,0.00,0.40,0.20,0.00,0.30,0.00,1.00,0.000000,1.00,0.00,0.60,0.105263
3532,0,2,0.240,0.90,0.10,0.10,0.10,0.40,0.10,0.10,0.90,0.000000,0.50,0.20,0.30,0.105263
3533,9,1,0.290,0.00,0.00,0.10,1.00,0.80,0.00,0.00,0.40,0.000000,0.20,0.00,0.00,0.000000


## user_features 재정의

In [13]:
user_features

Unnamed: 0,cost_rank,smoky,peaty,spicy,herbal,oily,full_bodied,rich,sweet,salty,vanilla,tart,fruity,floral
0,4.0,0.20,0.30,0.4,0.20,0.1,0.6,0.5,0.70,0.222222,0.70,0.60,0.70,0.526316
1,5.0,0.30,0.30,0.6,0.40,0.7,0.8,0.7,0.70,0.444444,0.60,0.40,0.70,0.421053
2,5.0,0.30,0.85,0.5,0.30,0.2,0.8,0.8,0.85,0.166667,0.20,0.25,0.85,0.526316
3,5.0,0.20,0.00,0.6,0.40,0.3,0.7,0.7,0.60,0.111111,0.40,0.50,0.80,0.315789
4,4.0,0.00,0.00,0.2,0.40,0.6,0.6,0.8,0.70,0.000000,0.30,0.20,0.80,0.736842
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119510,1.0,0.00,0.00,0.4,0.20,0.0,0.3,0.0,1.00,0.000000,1.00,0.00,0.60,0.105263
119511,1.0,0.00,0.00,0.4,0.20,0.0,0.3,0.0,1.00,0.000000,1.00,0.00,0.60,0.105263
119512,1.0,0.31,0.00,0.4,0.29,0.0,0.3,0.4,1.00,0.000000,0.55,0.00,0.30,0.105263
119513,1.0,0.31,0.00,0.4,0.29,0.0,0.3,0.4,1.00,0.000000,0.55,0.00,0.30,0.105263


## user_features & item_features 정의

In [14]:
item_features = csr_matrix(item_features)
user_features = csr_matrix(user_features)

In [15]:
print(user_features)
user_features

  (0, 0)	4.0
  (0, 1)	0.2
  (0, 2)	0.3
  (0, 3)	0.4
  (0, 4)	0.2
  (0, 5)	0.1
  (0, 6)	0.6
  (0, 7)	0.5
  (0, 8)	0.7000000000000001
  (0, 9)	0.2222222222222222
  (0, 10)	0.7000000000000001
  (0, 11)	0.6
  (0, 12)	0.7000000000000001
  (0, 13)	0.5263157894736842
  (1, 0)	5.0
  (1, 1)	0.3
  (1, 2)	0.3
  (1, 3)	0.6
  (1, 4)	0.4
  (1, 5)	0.7000000000000001
  (1, 6)	0.8
  (1, 7)	0.7000000000000001
  (1, 8)	0.7000000000000001
  (1, 9)	0.4444444444444445
  (1, 10)	0.6
  :	:
  (119512, 7)	0.4
  (119512, 8)	1.0
  (119512, 10)	0.55
  (119512, 12)	0.3
  (119512, 13)	0.1052631578947368
  (119513, 0)	1.0
  (119513, 1)	0.31
  (119513, 3)	0.4
  (119513, 4)	0.29
  (119513, 6)	0.3
  (119513, 7)	0.4
  (119513, 8)	1.0
  (119513, 10)	0.55
  (119513, 12)	0.3
  (119513, 13)	0.1052631578947368
  (119514, 0)	1.0
  (119514, 1)	0.31
  (119514, 3)	0.4
  (119514, 4)	0.29
  (119514, 6)	0.3
  (119514, 7)	0.4
  (119514, 8)	1.0
  (119514, 10)	0.55
  (119514, 12)	0.3
  (119514, 13)	0.1052631578947368


<119515x14 sparse matrix of type '<class 'numpy.float64'>'
	with 1477379 stored elements in Compressed Sparse Row format>

In [16]:
unique_user = rating["user_id"].unique()

In [17]:
unique_user

array([     0,      1,      2, ..., 119512, 119513, 119514], dtype=int64)

### make Interactions 

In [24]:
rating.shape

(908176, 3)

In [25]:
def create_user_item_interactions(rating, n_users, n_items):
    interactions = coo_matrix(
        (
            rating['rating'].values,
            (rating['user_id'].values, rating["whisky_id"].values)
        ),
        shape=(n_users, n_items)
    )
    return interactions

In [26]:
interactions = create_user_item_interactions(rating, rating["user_id"].nunique(), whisky["whisky_id"].nunique())

In [27]:
print(interactions)
interactions

  (0, 0)	9.0
  (1, 0)	7.0
  (2, 0)	9.0
  (3, 0)	10.0
  (4, 0)	9.0
  (5, 0)	7.0
  (6, 0)	10.0
  (7, 0)	7.0
  (8, 0)	10.0
  (9, 0)	10.0
  (10, 0)	9.0
  (11, 0)	9.0
  (12, 0)	8.0
  (13, 0)	8.0
  (14, 0)	10.0
  (15, 0)	10.0
  (16, 0)	9.0
  (17, 0)	9.0
  (18, 0)	8.0
  (19, 0)	9.0
  (20, 0)	9.0
  (21, 0)	10.0
  (22, 0)	9.0
  (23, 0)	9.0
  (24, 0)	9.0
  :	:
  (7636, 3531)	4.0
  (56292, 3531)	2.0
  (2670, 3531)	2.0
  (786, 3531)	2.0
  (5942, 3531)	6.0
  (15561, 3531)	2.0
  (25623, 3532)	6.0
  (2885, 3532)	6.0
  (7368, 3533)	2.0
  (69554, 3534)	10.0
  (4484, 3534)	2.0
  (85727, 3534)	5.0
  (43900, 3534)	4.0
  (16070, 3534)	8.0
  (29998, 3534)	8.0
  (7895, 3534)	4.0
  (104052, 3534)	5.0
  (31152, 3534)	2.0
  (119512, 3534)	6.0
  (2509, 3534)	4.0
  (4828, 3534)	4.0
  (119513, 3534)	2.0
  (119514, 3534)	8.0
  (3123, 3534)	4.0
  (95648, 3534)	2.0


<119515x3535 sparse matrix of type '<class 'numpy.float64'>'
	with 908176 stored elements in COOrdinate format>

## Train_Test data split

In [28]:
train_interactions, test_interactions = random_train_test_split(interactions, test_percentage=0.2, random_state=42)

In [29]:
train_interactions, valid_interactions = random_train_test_split(train_interactions, test_percentage=0.2, random_state=42)

## Model Define

In [19]:
class LightFMResizable(LightFM):
    """A LightFM that resizes the model to accomodate new users,
    items, and features"""

    def fit_partial(
        self,
        interactions,
        user_features=None,
        item_features=None,
        sample_weight=None,
        epochs=1,
        num_threads=1,
        verbose=False,
    ):
        try:
            self._check_initialized()
            self._resize(interactions, user_features, item_features)
        except ValueError:
            # This is the first call so just fit without resizing
            pass

        super().fit_partial(
            interactions,
            user_features,
            item_features,
            sample_weight,
            epochs,
            num_threads,
            verbose,
        )

        return self

    def _resize(self, interactions, user_features=None, item_features=None):
        """Resizes the model to accommodate new users/items/features"""

        no_components = self.no_components
        no_user_features, no_item_features = interactions.shape  # default

        if hasattr(user_features, "shape"):
            no_user_features = user_features.shape[-1]
        if hasattr(item_features, "shape"):
            no_item_features = item_features.shape[-1]

        if (
            no_user_features == self.user_embeddings.shape[0]
            and no_item_features == self.item_embeddings.shape[0]
        ):
            return self

        new_model = clone(self)
        new_model._initialize(no_components, no_item_features, no_user_features)

        # update all attributes from self._check_initialized
        for attr in (
            "item_embeddings",
            "item_embedding_gradients",
            "item_embedding_momentum",
            "item_biases",
            "item_bias_gradients",
            "item_bias_momentum",
            "user_embeddings",
            "user_embedding_gradients",
            "user_embedding_momentum",
            "user_biases",
            "user_bias_gradients",
            "user_bias_momentum",
        ):
            # extend attribute matrices with new rows/cols from
            # freshly initialized model with right shape
            old_array = getattr(self, attr)
            old_slice = [slice(None, i) for i in old_array.shape]
            new_array = getattr(new_model, attr)
            new_array[tuple(old_slice)] = old_array
            setattr(self, attr, new_array)

        return self

In [None]:
import 

In [None]:
class WhizzleDataset(Dataset):
        """
        Add Normalization and 
        """

### Evaluation
---
- 1차 최적화 hyper parameter 결과
- Best Params : {'no_components': 40, 'learning_rate': 0.05, 'item_alpha': 0.01, 'user_alpha': 0.001}
- Best Values : 0.8078765869140625

In [47]:
model = LightFMResizable(
    no_components=40, learning_rate=0.05000, item_alpha=0.00500, user_alpha=0.00050,learning_schedule='adagrad',loss="warp", random_state=42
)
%time model.fit(interactions=train_interactions, user_features=user_features, item_features=item_features, epochs=10, verbose=True)

Epoch: 100%|███████████████████████████████████████████████████████████████████████████| 10/10 [05:34<00:00, 33.50s/it]

CPU times: total: 5min 34s
Wall time: 5min 35s





<__main__.LightFMResizable at 0x1d4c2d32f40>

In [48]:
test_precision = precision_at_k(model, test_interactions, user_features=user_features, item_features=item_features, k=9).mean()
test_recall = recall_at_k(model, test_interactions,  user_features=user_features, item_features=item_features, k=9).mean()
test_auc = auc_score(model, test_interactions, user_features=user_features, item_features=item_features).mean()

print("Train precision: %.5f" % test_precision)
print("Test precision: %.5f" % test_recall)
print("Test AUC Score : %.5f" % test_auc)

Train precision: 0.00519
Test precision: 0.02467
Test AUC Score : 0.81449


### save

In [51]:
# save the model to a file
with open('min_max_lightfm.pkl', 'wb') as f:
    pickle.dump(model, f)

### Dataset 

In [48]:
rating

Unnamed: 0,user_id,whisky_id,rating
0,0,0,9.0
1,1,0,7.0
2,2,0,9.0
3,3,0,10.0
4,4,0,9.0
...,...,...,...
908171,4828,3534,4.0
908172,119513,3534,2.0
908173,119514,3534,8.0
908174,3123,3534,4.0


In [56]:
from sklearn.model_selection import train_test_split

In [106]:
a_rating, b_rating = train_test_split(rating, test_size=0.2, random_state=42)

In [70]:
a_rating.shape

(726540, 3)

In [71]:
b_rating.shape

(181636, 3)

In [107]:
dataset = Dataset()

In [103]:
whisky.whisky_id.unique()

array([   0,    1,    2, ..., 3532, 3533, 3534], dtype=int64)

In [108]:
dataset.fit(
    users=rating["user_id"].unique(),
    items=whisky["whisky_id"].unique()
)

In [78]:
a_rating = a_rating.rename(columns={'whisky_id': 'item_id', "rating": "weight"})

In [None]:
def make_iter_form(data):
    data = []
    for i, row in data.iterrows():
        

In [99]:
print(a_rating)

        user_id  item_id  weight
267858     3548      981     7.0
41052     24671      108     9.0
304281     5515     1085     6.0
542634    18673     1987    10.0
561123     1167     2053     8.0
...         ...      ...     ...
259178    10270      889     8.0
365838    14215     1296     6.0
131932     3664      584     7.0
671155     5194     2468     4.0
121958    45578      557     9.0

[726540 rows x 3 columns]


In [123]:
[(row['user_id'], row['whisky_id'], row['rating']) for index, row in a_rating.iterrows()]

[(3548.0, 981.0, 7.0),
 (24671.0, 108.0, 9.0),
 (5515.0, 1085.0, 6.0),
 (18673.0, 1987.0, 10.0),
 (1167.0, 2053.0, 8.0),
 (72820.0, 1031.0, 8.0),
 (62124.0, 3222.0, 6.0),
 (16412.0, 1053.0, 7.0),
 (77546.0, 1097.0, 4.0),
 (1318.0, 1976.0, 8.0),
 (41126.0, 1217.0, 10.0),
 (16546.0, 94.0, 10.0),
 (24390.0, 2254.0, 8.0),
 (17195.0, 1502.0, 8.0),
 (96941.0, 3349.0, 8.0),
 (15621.0, 609.0, 6.0),
 (8858.0, 2254.0, 7.0),
 (61799.0, 3223.0, 7.0),
 (20573.0, 752.0, 8.0),
 (106204.0, 2462.0, 8.0),
 (57468.0, 1983.0, 8.0),
 (1272.0, 2252.0, 7.0),
 (5658.0, 1968.0, 10.0),
 (10701.0, 801.0, 10.0),
 (13100.0, 1361.0, 7.0),
 (18258.0, 2241.0, 6.0),
 (8008.0, 1644.0, 7.0),
 (48321.0, 2386.0, 8.0),
 (10484.0, 1335.0, 8.0),
 (3408.0, 3214.0, 8.0),
 (4245.0, 2123.0, 8.0),
 (77569.0, 1097.0, 8.0),
 (109182.0, 2530.0, 5.0),
 (22704.0, 2358.0, 7.0),
 (83503.0, 3021.0, 6.0),
 (29777.0, 290.0, 9.0),
 (13065.0, 1987.0, 8.0),
 (29708.0, 1206.0, 9.0),
 (43500.0, 460.0, 9.0),
 (98258.0, 2486.0, 6.0),
 (91139.0, 2

In [109]:
%time a_interactions, a_weights = dataset.build_interactions((row['user_id'], row['whisky_id'], row['rating']) for index, row in a_rating.iterrows())

CPU times: total: 29.9 s
Wall time: 30.4 s


In [111]:
b_rating.loc[b_rating["user_id"]==0]

Unnamed: 0,user_id,whisky_id,rating
0,0,0,9.0
231208,0,811,9.0
120472,0,546,8.0


In [121]:
a_rating.loc[a_rating["user_id"]==119513]

Unnamed: 0,user_id,whisky_id,rating
908172,119513,3534,2.0


In [110]:
print(a_rating.sort_values(by="user_id"))

        user_id  whisky_id  rating
419372        0       1370     8.0
548651        0       1994     6.0
656523        0       2451     3.0
491750        0       1769     8.0
300209        1       1081     9.0
...         ...        ...     ...
908039   119509       3531     3.0
908057   119510       3531    10.0
908128   119511       3531     8.0
908169   119512       3534     6.0
908172   119513       3534     2.0

[726540 rows x 3 columns]


In [113]:
a_rating

Unnamed: 0,user_id,whisky_id,rating
267858,3548,981,7.0
41052,24671,108,9.0
304281,5515,1085,6.0
542634,18673,1987,10.0
561123,1167,2053,8.0
...,...,...,...
259178,10270,889,8.0
365838,14215,1296,6.0
131932,3664,584,7.0
671155,5194,2468,4.0


In [119]:
a_weights.toarray()

array([[0., 0., 0., ..., 0., 0., 0.],
       [7., 0., 0., ..., 0., 0., 0.],
       [9., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 6.],
       [0., 0., 0., ..., 0., 0., 2.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

### b_rating 추가

In [None]:
dataset.fit_partial(users=)

### load

In [20]:
 mm_lfm = pickle.load(open('min_max_lightfm.pkl', 'rb'))

In [85]:
whisky.columns.tolist()

['whisky_id',
 'link',
 'image',
 'name',
 'avr_rating',
 'category',
 'location',
 'total_rating',
 'cost_rank',
 'abv',
 'cask_type',
 'smoky',
 'peaty',
 'spicy',
 'herbal',
 'oily',
 'full_bodied',
 'rich',
 'sweet',
 'salty',
 'vanilla',
 'tart',
 'fruity',
 'floral']

In [88]:
cols = ["name","category", "cost_rank", "abv", 'smoky','peaty','spicy','herbal','oily','full_bodied','rich','sweet','salty','vanilla','tart','fruity', 'floral']

In [90]:
wi = rating.loc[rating["user_id"] == 4828].sort_values(by="rating", ascending=False).whisky_id
whisky.loc[wi, cols]

Unnamed: 0,name,category,cost_rank,abv,smoky,peaty,spicy,herbal,oily,full_bodied,rich,sweet,salty,vanilla,tart,fruity,floral
10,"Booker's Bourbon Batch 2015-01 ""Big Man, Small...",Bourbon,3,64.35,15,0,35,5,20,80,80,75,10,30,20,40,0
1089,Glenlivet 18 Year,Single Malt,3,43.0,20,0,50,0,20,70,70,80,0,60,50,80,30
2227,The Irishman Single Malt,Single Malt,2,40.0,20,0,60,30,20,50,50,60,0,40,50,80,30
1105,Maker's Mark 46 French Oaked,Bourbon,2,47.0,30,0,80,20,10,60,60,70,10,80,50,30,20
1994,Maker's Mark Bourbon,Bourbon,2,45.0,10,0,40,30,10,50,70,80,0,90,20,10,0
609,Lagavulin 16 Year,Peated Single Malt,3,43.0,60,80,60,50,70,80,70,40,60,40,20,40,10
3008,Smooth Ambler Old Scout Bourbon 7 Year,Bourbon,2,49.5,0,0,25,15,0,60,60,70,0,15,10,15,0
410,Elijah Craig 12 Year,Bourbon,2,47.0,40,0,50,30,40,60,50,70,50,40,20,40,20
3381,Concannon Irish Whiskey,Blended,1,40.0,54,0,32,85,62,68,42,78,43,75,28,73,60
1656,Tomatin 18 Year Oloroso Sherry Casks,Single Malt,3,46.0,50,20,60,60,20,50,50,60,20,30,30,60,60


In [91]:
np.arange(3535)

array([   0,    1,    2, ..., 3532, 3533, 3534])

In [55]:

## 신규 사용자
mm_lfm.predict(0, np.arange(3535), )

{'loss': 'warp',
 'learning_schedule': 'adagrad',
 'no_components': 40,
 'learning_rate': 0.05,
 'k': 5,
 'n': 10,
 'rho': 0.95,
 'epsilon': 1e-06,
 'max_sampled': 10,
 'item_alpha': 0.005,
 'user_alpha': 0.0005,
 'random_state': RandomState(MT19937) at 0x1D4C2E89A40}