In [1]:
import os
import pandas as pd
import numpy as np
from time import time
from tqdm.notebook import tqdm
import pickle

from scipy.sparse import coo_matrix, csr_matrix

from lightfm.cross_validation import random_train_test_split
from lightfm import LightFM
from lightfm.evaluation import precision_at_k, auc_score, recall_at_k



In [2]:
item_features = pd.read_csv("dataset/item_features.csv", index_col=0, encoding="UTF-8")

In [3]:
item_features

Unnamed: 0,category,price_tier,abv,smoky,peaty,spicy,herbal,oily,body,rich,sweet,salty,vanilla,tart,fruity,floral
0,0,0,0.000,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.000000,0.00,0.00,0.00,0.000000
1,1,4,0.260,0.30,0.85,0.50,0.30,0.20,0.8,0.80,0.85,0.166667,0.20,0.25,0.85,0.526316
2,12,3,0.260,0.40,0.30,0.40,0.20,0.40,0.7,0.80,0.70,0.444444,0.50,0.50,0.70,0.210526
3,5,4,0.542,0.15,0.00,0.20,0.00,0.15,0.8,0.90,0.85,0.055556,0.30,0.25,0.35,0.000000
4,5,3,0.569,0.40,0.00,0.65,0.50,0.20,0.6,0.60,0.45,0.000000,0.60,0.60,0.45,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3531,8,0,0.260,0.35,0.00,0.20,0.00,0.00,0.4,0.55,0.85,0.000000,0.25,0.10,0.10,0.000000
3532,2,0,0.200,0.00,0.00,0.40,0.20,0.00,0.3,0.00,1.00,0.000000,1.00,0.00,0.60,0.105263
3533,0,2,0.240,0.90,0.10,0.10,0.10,0.40,0.1,0.10,0.90,0.000000,0.50,0.20,0.30,0.105263
3534,9,1,0.290,0.00,0.00,0.10,1.00,0.80,0.0,0.00,0.40,0.000000,0.20,0.00,0.00,0.000000


2번 위스키
4, 0.15, 0.00, 0.20, 0.00, 0.15, 0.80, 0.90, 0.85, 0.055556, 0.30, 0.25, 0.35, 0.000000
4번 위스키
4, 0.30, 0.20, 0.40, 0.30, 0.10, 0.75, 0.75, 0.60, 0.222222, 0.30, 0.20, 0.50, 0.052632

In [4]:
user_features = pd.read_csv("dataset/user_features.csv", index_col=0, encoding="UTF-8")

In [5]:
user_features

Unnamed: 0,price_tier,smoky,peaty,spicy,herbal,oily,body,rich,sweet,salty,vanilla,tart,fruity,floral
0,0,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.00,0.000000,0.00,0.00,0.00,0.000000
1,5,0.30,0.85,0.50,0.30,0.20,0.8,0.8,0.85,0.166667,0.20,0.25,0.85,0.526316
2,5,0.30,0.00,0.40,0.10,0.30,0.7,0.8,0.80,0.111111,0.60,0.20,0.90,0.105263
3,5,0.25,0.00,0.45,0.20,0.15,0.5,0.5,0.60,0.000000,0.30,0.55,0.70,0.157895
4,5,0.30,0.85,0.50,0.30,0.20,0.8,0.8,0.85,0.166667,0.20,0.25,0.85,0.526316
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119511,5,0.00,0.00,0.40,0.20,0.00,0.3,0.0,1.00,0.000000,1.00,0.00,0.60,0.105263
119512,5,0.00,0.00,0.40,0.20,0.00,0.3,0.0,1.00,0.000000,1.00,0.00,0.60,0.105263
119513,5,0.31,0.00,0.40,0.29,0.00,0.3,0.4,1.00,0.000000,0.55,0.00,0.30,0.105263
119514,5,0.31,0.00,0.40,0.29,0.00,0.3,0.4,1.00,0.000000,0.55,0.00,0.30,0.105263


### load model

In [6]:
# Load the saved model
with open('model/rec_model.pkl', 'rb') as f:
    model = pickle.load(f)

In [7]:
def load_rec_model():
    model = pickle.load(open('model/rec_model.pkl', "rb"))
    return model

In [71]:
def make_user_features(preference):
#     my_features = [preference["price_tier"]] + list((preference["flavor"]).values())
#     if preference["user_id"] == 0: 
#         return csr_matrix(my_features)
#     else:
    user_features = pd.read_csv("dataset/user_features.csv", index_col=0, encoding="UTF-8")
#         user_id = 119515+ preference["user_id"]
#         user_features.iloc[0] = my_features
    return csr_matrix(user_features)


In [72]:
def predict_personal_whisky(preference, item_features):
    model = load_rec_model()
    user_features = make_user_features(preference)
    print(user_features)
    item_ids = np.arange(item_features.shape[0])
    print(item_ids)
    scores = model.predict(
        user_ids=preference["user_id"]
        if preference["user_id"] == 0
        else 119515 + preference["user_id"],
        item_ids=item_ids,
        item_features=csr_matrix(item_features),
        user_features=user_features,
    )
    return np.argsort(-scores).tolist()


In [73]:
preference = {
"user_id": -848,
"price_tier": 5,
"flavor": {
    "smoky":0.165,
    "peaty": 0.40,
    "spicy": 0.810,
    "herbal": 0.50,
    "oily":0.05,
    "body":0.950,
    "rich":0.10,
    "sweet":0.85,
    "salty":0.1432,
    "vanilla":0.30,
    "tart":0.25,
    "fruity":0.35,
    "floral":0.000000
    }
}

In [74]:
result = predict_personal_whisky(preference, item_features)

  (1, 0)	5.0
  (1, 1)	0.3
  (1, 2)	0.85
  (1, 3)	0.5
  (1, 4)	0.3
  (1, 5)	0.2
  (1, 6)	0.8
  (1, 7)	0.8
  (1, 8)	0.85
  (1, 9)	0.1666666666666666
  (1, 10)	0.2
  (1, 11)	0.25
  (1, 12)	0.85
  (1, 13)	0.5263157894736842
  (2, 0)	5.0
  (2, 1)	0.3
  (2, 3)	0.4
  (2, 4)	0.1
  (2, 5)	0.3
  (2, 6)	0.7000000000000001
  (2, 7)	0.8
  (2, 8)	0.8
  (2, 9)	0.1111111111111111
  (2, 10)	0.6
  (2, 11)	0.2
  :	:
  (119513, 7)	0.4
  (119513, 8)	1.0
  (119513, 10)	0.55
  (119513, 12)	0.3
  (119513, 13)	0.1052631578947368
  (119514, 0)	5.0
  (119514, 1)	0.31
  (119514, 3)	0.4
  (119514, 4)	0.29
  (119514, 6)	0.3
  (119514, 7)	0.4
  (119514, 8)	1.0
  (119514, 10)	0.55
  (119514, 12)	0.3
  (119514, 13)	0.1052631578947368
  (119515, 0)	5.0
  (119515, 1)	0.31
  (119515, 3)	0.4
  (119515, 4)	0.29
  (119515, 6)	0.3
  (119515, 7)	0.4
  (119515, 8)	1.0
  (119515, 10)	0.55
  (119515, 12)	0.3
  (119515, 13)	0.1052631578947368
[   0    1    2 ... 3533 3534 3535]


In [75]:
result

[2642,
 3124,
 2830,
 3210,
 3222,
 404,
 3442,
 1763,
 2075,
 2433,
 1989,
 675,
 3117,
 1681,
 551,
 3338,
 2637,
 3227,
 3288,
 3382,
 238,
 690,
 1181,
 1087,
 3381,
 3287,
 1990,
 1318,
 2664,
 3340,
 3003,
 3032,
 1682,
 3007,
 3286,
 2471,
 1201,
 1634,
 2168,
 410,
 3334,
 264,
 2636,
 2452,
 1463,
 2559,
 3289,
 1104,
 2829,
 1370,
 1652,
 2972,
 2994,
 1943,
 3001,
 147,
 3387,
 1359,
 2297,
 21,
 950,
 2463,
 2998,
 3379,
 2410,
 779,
 1467,
 33,
 1362,
 2472,
 3135,
 1369,
 1642,
 807,
 954,
 1180,
 2218,
 1215,
 3385,
 1079,
 2784,
 1929,
 2488,
 599,
 2660,
 609,
 2851,
 1341,
 790,
 2368,
 2345,
 2487,
 524,
 2638,
 3516,
 787,
 1749,
 260,
 3363,
 319,
 2438,
 2679,
 2667,
 239,
 3487,
 1032,
 3351,
 3002,
 1363,
 2654,
 3402,
 1569,
 671,
 403,
 2656,
 3103,
 2458,
 1622,
 673,
 1791,
 375,
 1945,
 1946,
 786,
 2714,
 2984,
 1023,
 2852,
 142,
 1345,
 3509,
 1019,
 255,
 2278,
 2213,
 1608,
 1337,
 670,
 2753,
 1565,
 2800,
 2383,
 3284,
 1076,
 3266,
 2137,
 1644,
 16

In [37]:
result

[2642,
 3124,
 2830,
 3210,
 3222,
 404,
 3442,
 1763,
 2075,
 2433,
 1989,
 675,
 3117,
 1681,
 551,
 3338,
 2637,
 3227,
 3288,
 3382,
 238,
 690,
 1181,
 1087,
 3381,
 3287,
 1990,
 1318,
 2664,
 3340,
 3003,
 3032,
 1682,
 3007,
 3286,
 2471,
 1201,
 1634,
 2168,
 410,
 3334,
 264,
 2636,
 2452,
 1463,
 2559,
 3289,
 1104,
 2829,
 1370,
 1652,
 2972,
 2994,
 1943,
 3001,
 147,
 3387,
 1359,
 2297,
 21,
 950,
 2463,
 2998,
 3379,
 2410,
 779,
 1467,
 33,
 1362,
 2472,
 3135,
 1369,
 1642,
 807,
 954,
 1180,
 2218,
 1215,
 3385,
 1079,
 2784,
 1929,
 2488,
 599,
 2660,
 609,
 2851,
 1341,
 790,
 2368,
 2345,
 2487,
 524,
 2638,
 3516,
 787,
 1749,
 260,
 3363,
 319,
 2438,
 2679,
 2667,
 239,
 3487,
 1032,
 3351,
 3002,
 1363,
 2654,
 3402,
 1569,
 671,
 403,
 2656,
 3103,
 2458,
 1622,
 673,
 1791,
 375,
 1945,
 1946,
 786,
 2714,
 2984,
 1023,
 2852,
 142,
 1345,
 3509,
 1019,
 255,
 2278,
 2213,
 1608,
 1337,
 670,
 2753,
 1565,
 2800,
 2383,
 3284,
 1076,
 3266,
 2137,
 1644,
 16

In [93]:
model.

TypeError: 'numpy.ndarray' object is not callable

In [None]:
item_features = csr_matrix(item_features)
user_features = csr_matrix(user_features)

In [None]:
## new user
scores = model.predict(user_ids=4, item_ids=item_ids, item_features=item_features, user_features=user_features)

In [None]:
sorted(scores, reverse=True)

In [None]:
np.argsort(-scores)

In [None]:
a = np.argsort(-scores)[:20]

In [None]:
whisky = pd.read_csv("../dataset/whisky.csv", index_col=0, encoding="UTF-8")

In [None]:
whisky.iloc[2]

In [None]:
whisky.iloc[a].loc[:, ["category", "price_tier","abv", "smoky","peaty","spicy","herbal","oily","body","rich","sweet", "salty","vanilla","tart","fruity","floral"]]

In [None]:
def sample_recommendation(model, data, user_id, item_features, user_features, cost_rank):
    # user_ids, item_ids 는 비교하고자하는 user-item pair를 적용한다.
    # 즉, 우리는 user_id 하나와 all_item or filtering_item_ids 를 비교하면 된다. 
    # item_features, user_features 는 누적이어야 한다.
    n_items = item_features.shape[0]
    
    scores = model.predict(user_ids=user_id, item_ids=np.arange(n_items), item_features=item_features, user_features=user_features)
    print(np.argsort(-scores))
    return np.argsort(-scores)


In [None]:
item_features = pd.read_csv("dataset/item_features.csv", index_col=0, encoding="UTF-8")

In [None]:
item_features

2번 위스키
4, 0.15, 0.00, 0.20, 0.00, 0.15, 0.80, 0.90, 0.85, 0.055556, 0.30, 0.25, 0.35, 0.000000
4번 위스키
4, 0.30, 0.20, 0.40, 0.30, 0.10, 0.75, 0.75, 0.60, 0.222222, 0.30, 0.20, 0.50, 0.052632

In [None]:
user_features = [4, 0.30, 0.20, 0.40, 0.30, 0.10, 0.75, 0.75, 0.60, 0.222222, 0.30, 0.20, 0.50, 0.052632]

In [None]:
item_features = csr_matrix(item_features)
user_features = csr_matrix(user_features)

### load model

In [None]:
# Load the saved model
with open('model/model_v4.pkl', 'rb') as f:
    model1 = pickle.load(f)

with open('model/rec_model.pkl', 'rb') as f:
    model2= pickle.load(f)


In [None]:
model2.get_params()

In [None]:
model1.get_params()

In [None]:
model1.get_item_representations()

In [None]:
model2.get_item_representations()

## Predict

In [None]:
item_ids = np.arange(item_features.shape[0])

In [None]:
## new user
scores = model.predict(user_ids=0, item_ids=item_ids, item_features=item_features, user_features=user_features)

In [None]:
sorted(scores, reverse=True)

In [None]:
np.argsort(-scores)

In [None]:
a = np.argsort(-scores)[:20]

In [None]:
whisky = pd.read_csv("../dataset/whisky.csv", index_col=0, encoding="UTF-8")

In [None]:
whisky.iloc[2]

In [None]:
whisky.iloc[a].loc[:, ["category", "price_tier","abv", "smoky","peaty","spicy","herbal","oily","body","rich","sweet", "salty","vanilla","tart","fruity","floral"]]