## 電影推薦實作

In [131]:
import numpy as np
import numpy.ma as ma
import pandas as pd
import tensorflow as tf
import tabulate
import matplotlib

from tensorflow import keras
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

pd.set_option("display.precision", 1)

### **\# 資料集來源: 自己**  

In [132]:
df = pd.read_excel("data.xlsx")
left_aligned_df = df.style.set_properties(**{'text-align': 'left'})
left_aligned_df = left_aligned_df.set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])])
display(left_aligned_df)

Unnamed: 0,ID,電影名稱,類別,user1,user2,user3,user4,user5,x1 (comedy),x2 (Animation),x3 (Science fiction)
0,1,讓子彈飛,comedy,5,3,1,2,1,0.9,0.1,0.1
1,2,九品芝麻官,comedy,4,3,1,2,1,0.9,0.1,0.1
2,3,殭屍100,Animation,1,2,4,3,4,0.7,0.9,0.1
3,4,奧術,Animation,1,2,4,3,4,0.3,0.9,0.5
4,5,獵巫行動,Science fiction,1,5,2,1,3,0.1,0.1,0.9
5,6,侏儸紀公園,Science fiction,1,5,2,1,3,0.1,0.5,0.9
6,7,世界異戰,Science fiction,1,5,2,1,3,0.1,0.1,0.9
7,8,終末女武神,Animation,1,2,4,3,4,0.2,0.9,0.1
8,9,航海王,Animation,1,2,4,3,4,0.5,0.9,0.1


--------------------------------------------

### **\#根據上方資料，可得出以下資訊用於計算cost function:**
##### **觀眾數量 : num_users = 5**
##### **電影數量 : num_movies = 9**
##### **類別數量 : num_features = 3**
##### **$\mathbf{X}$ 代表每部電影的類別分數，例如【讓子彈飛】: $\mathbf{x}^{(1)}$=[0.90, 0.1, 0.1]**
##### **$\mathbf{Y}$ 代表每部電影對應於各使用者的評分，例如【奧術】: $\mathbf{y}^{(4)}$=[1, 2, 4, 3, 4]**
##### **$\mathbf{R}$ = $r$ (num_movies, num_users)，代表user是否評分過該電影，例如【讓子彈飛】: $r(1,)$=[1, 1, 1, 1, 1]**
##### **$\mathbf{W}$ = $w$ (num_users, num_features)，為亂數**
##### **$\mathbf{b}$ = $b$ (1, num_users)，為亂數**

In [133]:
import random

num_users=5
num_movies=9
num_features=3
X=[]
Y=[]
R=[]
W=[]
b=[]

df_numpy=df.to_numpy()
for i in range(num_movies):
    X.append([])
    Y.append([])
    R.append([])
    for j in range(11):
        if j >=3 and j <=7:
            Y[i].append(float(df_numpy[i,j]))
            if df_numpy[i,j]>0:
                R[i].append(1.0)
            else:
                R[i].append(0.0)
        if j>=8 and j<11:
            X[i].append(df_numpy[i,j])
b.append([])
for i in range(num_users):
    W.append([])
    b[0].append(random.uniform(-10,10))
    for j in range(num_features):
        W[i].append(random.uniform(-10,10))
        

In [134]:
sum=0
for j in range(num_users):
    if R[0][j]==1:
        sum+=Y[0][j]
tsmean = sum/num_users
print(f"對於第一部電影的平均評價:{tsmean:0.1f}顆星" )

對於第一部電影的平均評價:2.4顆星


-------------------------------------------------------------
<a name="4.1"></a>
#### \#collaborative filtering cost function 的定義
$$J({\mathbf{x}^{(0)},...,\mathbf{x}^{(n_m-1)},\mathbf{w}^{(0)},b^{(0)},...,\mathbf{w}^{(n_u-1)},b^{(n_u-1)}})= \left[ \frac{1}{2}\sum_{(i,j):r(i,j)=1}(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+ \underbrace{\left[
\frac{\lambda}{2}
\sum_{j=0}^{n_u-1}\sum_{k=0}^{n-1}(\mathbf{w}^{(j)}_k)^2
+ \frac{\lambda}{2}\sum_{i=0}^{n_m-1}\sum_{k=0}^{n-1}(\mathbf{x}_k^{(i)})^2
\right]}_{regularization}
\tag{1}$$
The first summation in (1) is "for all $i$, $j$ where $r(i,j)$ equals $1$" and could be written:

$$
= \left[ \frac{1}{2}\sum_{j=0}^{n_u-1} \sum_{i=0}^{n_m-1}r(i,j)*(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+\text{regularization}
$$

In [135]:
def cofi_cost_func(X, W, b, Y, R, lambda_):
    J = 0 
    for i in range(num_users):
        w=W[i][:]
        for k in range(num_movies):
            x=X[k][:]
            y=Y[k][i]
            r=R[k][i]
            J += np.square(r * (np.dot(w,x) + b[0][i] - y ) )
        J +=lambda_* (np.sum(np.square(W)) + np.sum(np.square(X)))
    J=J/2         
    return J

In [136]:
# Evaluate cost function with regularization 
J = cofi_cost_func(X, W, b, Y, R, 1.5);
print(f"Cost (with regularization): {J:0.2f}")

Cost (with regularization): 5670.21


In [137]:
#  Useful Values
num_movies, num_users = 300,100
num_features = 30

# Set Initial Parameters (W, X), use tf.Variable to track these variables
tf.random.set_seed(1234) # for consistent results
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_movies, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1,num_users),dtype=tf.float64) ,  name='b')
Y =[]
R =[]

for i in range(num_movies):
    Y.append([])
    R.append([])
    for j in range(num_users):
        rate=round(random.uniform(0, 5), 0)
        Y[i].append(rate)
        R[i].append(rate)
        if Y[i][j]>=1:
            R[i][j]=1.0
        else:
            R[i][j]=0.0
            
# Instantiate an optimizer.
optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [141]:
print(b)

<tf.Variable 'b:0' shape=(1, 100) dtype=float64, numpy=
array([[-0.08194051,  0.7000041 , -0.98175261,  1.10141916, -0.49974704,
         0.39351135, -1.23614588,  0.99599781, -0.91513672,  0.31685958,
         0.47418537, -0.38766181, -0.66364006,  0.34178677, -0.30953542,
         2.30197844,  2.19730294, -0.58498967,  0.26631225,  1.63030408,
        -2.04239495,  0.39630212, -2.13983715,  3.33201036, -0.46764289,
        -0.15335556, -0.51456379, -0.58334794, -0.95389979, -0.41268613,
         0.84326297, -0.90365459, -1.11368457,  0.14639436,  1.17330207,
         1.26389855,  1.45581175, -0.45724971,  0.76289591, -0.17507672,
         0.42835664, -1.18268666, -0.40656716, -1.68187541,  3.96737893,
        -2.71427732,  0.31842615,  1.19225222,  1.16441078,  0.49310621,
         0.05172945,  0.85886458, -1.06175083,  0.0864987 ,  1.18688573,
         1.32847309, -0.63768585,  0.36481673,  1.01765262, -1.25631556,
        -0.14750861, -0.36233672, -0.81235216, -0.73540246, -0.33410

In [138]:
# Add new user ratings to Y 
for i in range(num_movies):
    rate=round(random.uniform(0, 5), 0)
    Y[i].append(rate)
    if rate >= 1:
        R[i].append(1.0)
    else:
        R[i].append(0.0)

In [139]:
#正規化Y
#minY=min(map(min, Y))
#maxY=max(map(max, Y))
#for i in range(num_movies):
    #for j in range(num_users):
        #Y[i][j]=(Y[i][j]-minY)/(maxY-minY)

In [140]:
iterations=100
lambda_=1.5
for iter in range(iterations):
    # Use TensorFlow’s GradientTape
    # to record the operations used to compute the cost 
    with tf.GradientTape() as tape:
        # Compute the cost (forward pass included in cost)
        cost_value = cofi_cost_func(X, W, b, Y, R, lambda_)
    
    cost_value=tf.convert_to_tensor(cost_value,dtype=tf.float64)
    # Use the gradient tape to automatically retrieve
    # the gradients of the trainable variables with respect to the loss
    grads = tape.gradient(cost_value, [X,W,b] ,unconnected_gradients=tf.UnconnectedGradients.ZERO)
    # Run one step of gradient descent by updating
    # the value of the variables to minimize the loss.
    optimizer.apply_gradients(zip(grads, [X,W,b]) )

    # Log periodically.
    #if iter % 20 == 0:
    print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

KeyboardInterrupt: 