In [219]:
import pandas as pd
invest = pd.read_excel('invest_data.xlsx').fillna(0)
invest.head(20)

Unnamed: 0,客戶,基金,基金簡稱,風險等級,金額
0,100066,1,野村優質基金-累積類型新臺幣計價,RR4,3000
1,100066,5,野村 e科技基金,RR5,36079
2,100066,7,野村中小基金-累積類型,RR5,40000
3,100066,9,野村全球高股息基金-累積型新臺幣計價,RR4,3000
4,100066,30,野村精選貨幣市場基金,RR1,0
5,100066,37,野村全球生技醫療基金,RR4,9617
6,100066,46,野村新興傘型之大俄羅斯基金,RR5,54273
7,100066,48,野村中國機會基金,RR5,119385
8,100066,49,野村全球美元投資級公司債基金-累積型,RR2,5000
9,100066,51,野村巴西證券投資信託基金,RR5,88041


In [220]:
invest = invest.infer_objects()

In [221]:
invest.dtypes

客戶       int64
基金      object
基金簡稱    object
風險等級    object
金額       int64
dtype: object

In [222]:
fund = invest[['基金','基金簡稱','風險等級']].drop_duplicates('基金','first')

In [223]:
fund_dict = fund.set_index('基金').T.to_dict()

In [224]:
fund_dict[1]

{'基金簡稱': '野村優質基金-累積類型新臺幣計價', '風險等級': 'RR4'}

In [225]:
invest_sum = invest.groupby(by='客戶', as_index=True,sort=False).sum()

In [226]:
invest_sum.rename(columns={'金額':'總金額'},inplace=True)
invest_sum.head()

Unnamed: 0_level_0,總金額
客戶,Unnamed: 1_level_1
100066,601432
100542,3000
100558,12000
100606,12000
100657,5000


In [227]:
invest = pd.merge(invest, invest_sum, on=['客戶'])

In [228]:
invest.head()

Unnamed: 0,客戶,基金,基金簡稱,風險等級,金額,總金額
0,100066,1,野村優質基金-累積類型新臺幣計價,RR4,3000,601432
1,100066,5,野村 e科技基金,RR5,36079,601432
2,100066,7,野村中小基金-累積類型,RR5,40000,601432
3,100066,9,野村全球高股息基金-累積型新臺幣計價,RR4,3000,601432
4,100066,30,野村精選貨幣市場基金,RR1,0,601432


In [229]:
invest['weight'] = invest['金額']/invest['總金額']

In [230]:
invest.head(20)

Unnamed: 0,客戶,基金,基金簡稱,風險等級,金額,總金額,weight
0,100066,1,野村優質基金-累積類型新臺幣計價,RR4,3000,601432,0.004988
1,100066,5,野村 e科技基金,RR5,36079,601432,0.059988
2,100066,7,野村中小基金-累積類型,RR5,40000,601432,0.066508
3,100066,9,野村全球高股息基金-累積型新臺幣計價,RR4,3000,601432,0.004988
4,100066,30,野村精選貨幣市場基金,RR1,0,601432,0.0
5,100066,37,野村全球生技醫療基金,RR4,9617,601432,0.01599
6,100066,46,野村新興傘型之大俄羅斯基金,RR5,54273,601432,0.09024
7,100066,48,野村中國機會基金,RR5,119385,601432,0.198501
8,100066,49,野村全球美元投資級公司債基金-累積型,RR2,5000,601432,0.008313
9,100066,51,野村巴西證券投資信託基金,RR5,88041,601432,0.146386


In [231]:
from lightfm.data import Dataset

In [232]:
data = zip(list(invest['客戶']),list(invest['基金']),list(invest['weight']))

In [233]:
dataset = Dataset()
user_id = list(set(invest['客戶']))
item_id = list(set(invest['基金']))
dataset.fit(user_id,item_id)
COO = dataset.build_interactions(data)

In [234]:
id_map = dataset.mapping()

In [235]:
user_map = id_map[0]
internal_item_map = id_map[2]

In [236]:
user_map[123855]

181

In [237]:
item_map={}
for k in internal_item_map.keys():
    v = internal_item_map[k]
    item_map[v]=k

In [238]:
item_map[55]

88

In [240]:
from lightfm import LightFM
model = LightFM(loss='warp')
model.fit(COO[0],epochs=100,num_threads=4)

<lightfm.lightfm.LightFM at 0x2eb10be0320>

In [241]:
query_id = 105417

In [242]:
import numpy as np
scores = model.predict(user_map[105417], np.arange(83))

In [245]:
COO[0].shape

(1522, 83)

In [246]:
def recommendation(model, data, user_ids,user_map,item_map):
    n_users, n_items = data.shape
    for user_id in user_ids:
        known_positives = invest[invest['客戶']==user_id]
        scores = model.predict(user_map[user_id], np.arange(n_items))
        top_items = []
        count = 0
        known_positive_list = list(known_positives['基金'])
        for x in np.argsort(-scores):
            if(item_map[x] not in known_positive_list):
                top_items.append(item_map[x])
                count += 1
                if(count==3):
                    break
        print("User %s" % user_id)
        print("Known positives:")
        print(known_positives[['客戶','基金','基金簡稱','風險等級','金額']].head())
        print("Recommended:")
        for x in top_items[:3]:
            print(x,':',fund_dict[x])

In [247]:
recommendation(model,COO[0],[query_id],user_map,item_map)

User 105417
Known positives:
         客戶  基金              基金簡稱 風險等級    金額
125  105417   1  野村優質基金-累積類型新臺幣計價  RR4  3000
126  105417   7       野村中小基金-累積類型  RR5  3000
127  105417  23          野村積極成長基金  RR4  3000
128  105417  28           野村高科技基金  RR5  3000
129  105417  93     野村日本領先基金-累積類型  RR4  3000
Recommended:
36 : {'基金簡稱': '野村台灣高股息基金', '風險等級': 'RR4'}
19 : {'基金簡稱': '野村鴻運基金', '風險等級': 'RR4'}
18 : {'基金簡稱': '野村台灣運籌基金', '風險等級': 'RR4'}
