#### Import libraries

In [26]:
import numpy as np
import pandas as pd

#### Parameters  

In [38]:
csv_in = 'dm-end1-3.csv'
# min number of common items between target user's evaluation and items in DB
min_common_items = 3

# To show more rows and columns
pd.options.display.max_rows = 999 
pd.options.display.max_columns = 999 

#### Read CSV file  

In [39]:
df_orig = pd.read_csv(csv_in, delimiter=',', skiprows=0, header=0)

#df.index = df.columns
print(df_orig.shape)
print(df_orig.info())
display(df_orig.head())

(50, 10)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       23 non-null     float64
 1   B       24 non-null     float64
 2   C       23 non-null     float64
 3   D       28 non-null     float64
 4   E       28 non-null     float64
 5   F       30 non-null     float64
 6   G       25 non-null     float64
 7   H       25 non-null     float64
 8   I       30 non-null     float64
 9   J       29 non-null     float64
dtypes: float64(10)
memory usage: 4.0 KB
None


Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,4.0,,,4.0,,,,1.0,0.0,
1,,,,3.0,4.0,0.0,1.0,,2.0,3.0
2,3.0,2.0,0.0,,2.0,4.0,1.0,,,
3,,,0.0,,1.0,1.0,,0.0,,4.0
4,,,,,,3.0,3.0,4.0,1.0,


In [40]:
df_orig = df_orig.replace(-1, np.nan)
display(df_orig.head())

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,4.0,,,4.0,,,,1.0,0.0,
1,,,,3.0,4.0,0.0,1.0,,2.0,3.0
2,3.0,2.0,0.0,,2.0,4.0,1.0,,,
3,,,0.0,,1.0,1.0,,0.0,,4.0
4,,,,,,3.0,3.0,4.0,1.0,


相関行列を計算し、dfに格納   
compute correlation matrix and store it to the DataFrame df


In [41]:
df_corr = df_orig.corr(min_periods=12)

In [42]:
df_corr

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
A,1.0,,,0.124781,-0.127381,,,,-0.23059,0.521481
B,,1.0,-0.063112,,-0.297775,,-0.319334,,,0.441975
C,,-0.063112,1.0,,0.376275,-0.34241,-0.050043,,,-0.593291
D,0.124781,,,1.0,-0.102489,0.062943,-0.023618,0.042618,0.159281,0.36445
E,-0.127381,-0.297775,0.376275,-0.102489,1.0,-0.348724,-0.316176,0.29582,-0.380018,0.011155
F,,,-0.34241,0.062943,-0.348724,1.0,0.325913,0.053822,-0.026504,-0.03149
G,,-0.319334,-0.050043,-0.023618,-0.316176,0.325913,1.0,0.191237,-0.17203,0.40901
H,,,,0.042618,0.29582,0.053822,0.191237,1.0,0.300123,0.532423
I,-0.23059,,,0.159281,-0.380018,-0.026504,-0.17203,0.300123,1.0,-0.132229
J,0.521481,0.441975,-0.593291,0.36445,0.011155,-0.03149,0.40901,0.532423,-0.132229,1.0


In [43]:
def predict_scores(df_sim, ser_target):
    ret = {}
    for item1 in df_sim.index:  # not yet rated by the target user
        v1 = df_sim.loc[item1]
        #if v1.isnull().sum() > 0:  # debug
        #    print('v1:',v1)  # debug
        if v1.notnull().sum() < min_common_items: continue
        v11 = v1[ v1.notnull() ]
        t11 = ser_target[ v1.notnull() ]
        pred1 = (v11 * t11).sum() / np.abs(v11).sum()
        #print('v11:',v11)  # debug
        #print('t11:',t11)  # debug
        #print('pred1:',pred1)  # debug
        ret[item1] = pred1
    
    ser_ret = pd.Series(ret)
    
    return ser_ret.sort_values(ascending=False)

Function for user-based collaborative filtering.  

arguments: dictionary of scores for the target user  
and the number of items to recommend.  

ex)
```
get_recomm_by_user_sim(df, {'maguro':1, 'ika':1, 'uni':3,
                        'awabi':4, 'hirame':4, 'aoyagi':4})  
```
-> return list such as [('akagai', 2.9835603009918303), ('mirugai', 2.945676429588114), ...]

In [44]:
def get_recomm_by_item_sim(df, target_dic):
    ser_target = pd.Series(target_dic)
    #print(target_dic)  # debug
    #print(ser_target)  # debug
    # make dataframe with columns included in target_dic
    #print(df.shape)  # debug
    df_scores = df[ ser_target.index ]
    #print(df_scores.shape)  # debug
    # drop rows included in target_dic (already rated)
    df_scores = df_scores.drop(index=ser_target.index)
    #print(df_scores.shape)  # debug
    #display(df_scores.head())  # debug
    recomm = predict_scores(df_scores, ser_target)
    
    return recomm

#### Do recommendation  

In [45]:
recomm = get_recomm_by_item_sim(df_corr,
                                {'A':0, 'B':0, 'C':4,'D':4})
print('Number of items calculated:', len(recomm))
print('Recommendation:')
print(recomm.head())

Number of items calculated: 3
Recommendation:
E    1.211548
J   -0.476453
G   -0.749742
dtype: float64
