<a href="https://colab.research.google.com/github/rsemihkoca/YouDo-ds-bc/blob/main/YouDo_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np


class UserBased:
    mu: np.ndarray
    sim: np.ndarray

    def __init__(self, zero_mean: bool = True, beta: int = 1, idf: bool = False, verbosity: int = 0):

        self.zero_mean = zero_mean
        self.beta = beta
        self.idf = idf
        self.verbosity = verbosity

    def fit(self, r: np.ndarray):
        m, n = r.shape
        if self.zero_mean:
            self.mu = np.nanmean(r, axis=1)
        else:
            self.mu = np.zeros(m)

        self.sim = np.zeros((m, m))

        if self.idf:
            idf = np.log(1 + m / (~np.isnan(r)).sum(axis=0))
        else:
            idf = np.ones(n)

        if self.verbosity > 0:
            print(idf)

        for i in range(m):
            for j in range(m):
                mask = ~np.isnan(r[i, :]) & ~np.isnan(r[j, :])

                si = r[i, mask] - self.mu[i]
                sj = r[j, mask] - self.mu[j]

                self.sim[i][j] = (si * sj * idf[mask]).sum() / (
                        np.sqrt((idf[mask] * (si ** 2)).sum()) * np.sqrt((idf[mask] * (sj ** 2)).sum()))

                total_intersection = mask.sum()

                self.sim[i][j] *= min(total_intersection, self.beta) / self.beta

        return self.sim

    def predict_single_rate(self, r: np.array, u: int, j: int, top_k: int = 3) -> float:
        _, n = r.shape

        users_rated_j = np.nonzero(~np.isnan(r[:, j]))[0]

        topk_users = users_rated_j[self.sim[u, users_rated_j].argsort()[::-1][:top_k]]

        mean_centered_topk_user_rate = r[topk_users, j] - self.mu[topk_users]
        Rvj=r[topk_users, j] # topk k similar userin j itemine vermiş oldukları rating vectoru
        # w = self.sim[u, topk_users]
        Mv=self.mu[topk_users]
        # return np.dot(mean_centered_topk_user_rate, w) / np.abs(w).sum() + self.mu[u]
        Mu=self.mu[u]

        return Rvj,Mv,Mu # (rvj-mv) ve mv

   
import numpy as np


r = np.array([[7, 6, 7, 4, 5, 4,6],
              [6, 7, np.nan, 4, 3, 4,7],
              [np.nan, 3, 3, 1, 1, np.nan,3],
              [1, 2, 3, 3, 3, 4,2],
              [1, np.nan, 1, 2, 3, 3,3],
              [1, 5, 1, 2, 3, 3,5]])

u=4 # 5. user 
j=1 # 2. item nan values
UserBased_model = UserBased(zero_mean=False, beta=1, idf=False)

sim = UserBased_model.fit(r)

r_userbased=UserBased_model.predict_single_rate(r,u,j, top_k=1)
r_userbased

(array([5.]), array([0.]), 0.0)

In [None]:
j,u=u,j
u

1

In [None]:
# u=1  2. item 
# j=4  5. user nan values
ItemBased_model = UserBased(zero_mean=False, beta=1, idf=False)

sim = ItemBased_model.fit(r.T)

r_itembased=ItemBased_model.predict_single_rate(r.T,u,j, top_k=1)


r_itembased
        
        

(array([3.]), array([0.]), 0.0)

In [None]:
top_k=1
a=np.array([0,1,2,3,4])
a_new=a[::-1][:top_k]
a_new

array([4])

In [None]:
import pandas as pd
r = pd.DataFrame([[7, 6, 7, 4, 5, 4,6],
              [6, 7, np.nan, 4, 3, 4,7],
              [np.nan, 3, 3, 1, 1, np.nan,3],
              [1, 2, 3, 3, 3, 4,2],
              [1, np.nan, 1, 2, 3, 3,3],
              [1, 5, 1, 2, 3, 3,5]])
r.index =['User_0','User_1', 'User_2', 'User_3', 'User_4','User_4_cpy']
r.columns =['Item_0','Item_1', 'Item_2', 'Item_3', 'Item_4', 'Item_5','Item_1_cpy']
r,r.T
# r_pred=np.full((r.shape),np.nan)
# r_pred

array([[nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan]])

In [None]:
users,items=r.shape
u=4 # 5. user 
j=1 # 2. item nan values

top_k=3
w_users=np.random.rand(1,top_k)
# w_items=np.random.rand(1,top_k)
# w_items

UserBased_model = UserBased(zero_mean=True, beta=1, idf=False)
sim = UserBased_model.fit(r)

Rvj,Mv,Mu=UserBased_model.predict_single_rate(r,u,j, top_k=3)
Rvj,Mv,Mu
w_users
np.dot(w_users,Rvj-Mv)

r_pred= Mu + np.dot(w_users,Rvj-Mv)
r_pred

TypeError: ignored

In [None]:
import numpy as np
r = np.array([[7, 6, 7, 4, 5, 4,6],
              [6, 7, np.nan, 4, 3, 4,7],
              [np.nan, 3, 3, 1, 1, np.nan,3],
              [1, 2, 3, 3, 3, 4,2],
              [1, np.nan, 1, 2, 3, 3,3],
              [1, 5, 1, 2, 3, 3,5]])

def Item_based_model(r,u,j,top_k,zero_mean=True,beta=1,idf=False):
  j,u=u,j
  ItemBased_model = UserBased(zero_mean=zero_mean, beta=beta, idf=idf)
  sim = ItemBased_model.fit(r.T)
  Rut,Mj,Mu=ItemBased_model.predict_single_rate(r.T,u,j, top_k)
  return Rut,Mj,Mu

def User_based_model(r,u,j,top_k,zero_mean=True,beta=1,idf=False):
  UserBased_model = UserBased(zero_mean=zero_mean, beta=beta, idf=idf)
  sim = UserBased_model.fit(r)
  Rvj,Mv,Mu=UserBased_model.predict_single_rate(r,u,j, top_k)
  return Rvj,Mv,Mu


def Combined_SGD(r,top_k,beta,zero_mean,lamb,alpha,max_iter):
  users,items=r.shape
  w_users=np.random.rand(users,top_k)
  w_items=np.random.rand(top_k,items)
  grad_users=np.zeros(top_k)
  grad_items=np.zeros(top_k)

  r_pred=np.full((r.shape),np.nan)

  for iter in range(max_iter):
    loss=0
    Cum_loss=0
    for u in range(users):
      for j in range(items):
        if np.isnan(r[u][j]): # zaten bilmediğim değeri tahmin edemem NANları nasıl tahmin ederim
          continue
        else:
          
          Rvj,Mv,Mu=User_based_model(r,u,j,top_k,zero_mean=True,beta=1,idf=False)
          Rut,Mj,Mu=Item_based_model(r,u,j,top_k,zero_mean=True,beta=1,idf=False)

          r_pred[u][j]= Mu + np.dot(w_users[u,:],Rvj-Mv) + np.dot(w_items[:,j],Rut-Mj)

          loss+= (r[u][j] - r_pred[u][j])**2 #train test splitte ayır bunları

          grad_users = -2*np.dot((r[u][j] - r_pred[u][j]),(Rvj-Mv))
          grad_items = -2*np.dot((r[u][j] - r_pred[u][j]),(Rut-Mj))

          w_users[u,:] = w_users[u,:] - alpha * grad_users
          w_items[:,j] = w_items[:,j] - alpha * grad_items
          
          Cum_loss+= np.nansum((r-r_pred)**2)

    if(iter%10 == 0):
        print(f"iteration: ({iter}) , gradient: {np.linalg.norm(grad_users)+np.linalg.norm(grad_items):.3f}, loss={loss:.3f}, Total error: {Cum_loss:.3f}")
        
    if np.linalg.norm(grad_users)+np.linalg.norm(grad_items) < 0.00000001:
        print(f"I do early stoping at iteration {iter}")
        break
  return r_pred

#users,items=r.shape
# w_users=np.random.rand(users,top_k)
# w_items=np.random.rand(top_k,items)
# grad=np.zeros(top_k)
top_k=4
alpha=10**-2*(3)
max_iter=200
beta=1
zero_mean=True
lamb=1
r_pred=Combined_SGD(r,top_k,beta,zero_mean,lamb,alpha,max_iter)
np.set_printoptions(precision=0, suppress=True)
print(r_pred)

iteration: (0) , gradient: nan, loss=nan, Total error: 2231.402
iteration: (10) , gradient: nan, loss=nan, Total error: 0.000
iteration: (20) , gradient: nan, loss=nan, Total error: 0.000
iteration: (30) , gradient: nan, loss=nan, Total error: 0.000
iteration: (40) , gradient: nan, loss=nan, Total error: 0.000
iteration: (50) , gradient: nan, loss=nan, Total error: 0.000
iteration: (60) , gradient: nan, loss=nan, Total error: 0.000
iteration: (70) , gradient: nan, loss=nan, Total error: 0.000
iteration: (80) , gradient: nan, loss=nan, Total error: 0.000
iteration: (90) , gradient: nan, loss=nan, Total error: 0.000
iteration: (100) , gradient: nan, loss=nan, Total error: 0.000
iteration: (110) , gradient: nan, loss=nan, Total error: 0.000
iteration: (120) , gradient: nan, loss=nan, Total error: 0.000


KeyboardInterrupt: ignored

In [None]:
np.set_printoptions(precision=0, suppress=True)

r_pred

array([[ 7.,  6.,  7.,  4.,  5.,  4.,  6.],
       [ 6.,  7., nan,  4.,  3.,  4.,  7.],
       [nan,  3.,  3.,  1.,  1., nan,  3.],
       [ 1.,  2.,  3.,  3.,  3.,  4.,  2.],
       [ 1., nan,  1.,  2.,  3.,  3.,  3.],
       [ 1.,  5.,  1.,  2.,  3.,  3.,  5.]])

In [None]:
np.array([[6.99701826, 6.01365855, 6.99771103, 3.99912263, 5.00242315, 3.99538269, 5.9838271 ],
       [5.9989501 , 6.98556526,        np.nan, 4.00205566, 3.00083937, 3.99778344, 7.01523193],
       [       np.nan, 2.99561491, 3.00100138, 0.99857788, 1.00199939, np.nan, 3.00501033],
       [1.00246134, 1.9983931 , 3.00104609, 2.9972323 , 3.00156812, 4.00365659, 2.00315068],
       [0.99010626,        np.nan, 1.01786441, 1.97618874, 3.04394068, 2.97602526, 2.99476216],
       [1.00198988, 5.00946122, 0.97679499, 2.02849337, 2.96021514, 3.01236376, 4.98515699]])

array([[6.99701826, 6.01365855, 6.99771103, 3.99912263, 5.00242315,
        3.99538269, 5.9838271 ],
       [5.9989501 , 6.98556526,        nan, 4.00205566, 3.00083937,
        3.99778344, 7.01523193],
       [       nan, 2.99561491, 3.00100138, 0.99857788, 1.00199939,
               nan, 3.00501033],
       [1.00246134, 1.9983931 , 3.00104609, 2.9972323 , 3.00156812,
        4.00365659, 2.00315068],
       [0.99010626,        nan, 1.01786441, 1.97618874, 3.04394068,
        2.97602526, 2.99476216],
       [1.00198988, 5.00946122, 0.97679499, 2.02849337, 2.96021514,
        3.01236376, 4.98515699]])

In [None]:
r = np.array([[7, 6, 7, 4, 5, 4,6],
              [6, 7, np.nan, 4, 3, 4,7],
              [np.nan, 3, 3, 1, 1, np.nan,3],
              [1, 2, 3, 3, 3, 4,2],
              [1, np.nan, 1, 2, 3, 3,3],
              [1, 5, 1, 2, 3, 3,5]])

users,items=r.shape
top_k=5
w_users=np.random.rand(users,top_k)
loss=0
grad=np.zeros(top_k)

alpha=10**-2
max_iter=10000
for iter in range(max_iter):
    loss=0
    for u in range(users):
        for j in range(items):
            if np.isnan(r[u][j]): # zaten bilmediğim değeri tahmin edemem
              continue
            Rvj,Mv,Mu=UserBased_model.predict_single_rate(r,u,j, top_k) # bunun icine itemi de gom tek seferde hallet
            r_pred= Mu + np.dot(w_users[u,:],Rvj-Mv)
            
            loss+= (r[u][j] - r_pred)**2 #train test splitte ayır bunları

            grad = -2*(r[u][j] - r_pred)*(Rvj-Mv)

            w_users[u,:] = w_users[u,:] - alpha * grad
            
    if(iter%100 == 0):
        print(f"iteration: ({iter}) , gradient: {np.linalg.norm(grad)}, loss={loss}")
        
    if np.linalg.norm(grad) < 0.00000001:
        print(f"I do early stoping at iteration {iter}")
        break





In [None]:
grad=np.zeros(top_k)
grad

array([0., 0., 0.])

In [None]:
users,items=r.shape
top_k=3
w_users=np.random.rand(1,top_k)
loss=0
grad=np.zeros(top_k)
for u in range(users):
  for j in range(items):
    if np.isnan(r[u][j]):
      continue
    Rvj,Mv,Mu=UserBased_model.predict_single_rate(r,u,j, top_k=3) # bunun icine itemi de gom tek seferde hallet
    loss+= (r[u][j] - Mu + np.dot(w_users,Rvj-Mv))**2
print(loss)

[334.73485311]


In [None]:
def User_Item_Combined(x, y): # -> np.ndarray:
    theta=5
    beta = np.random.random(2)
    alpha=1.2*10**-2

    for i in range(1000000):
        y_pred= beta[0] + beta[1] * x    
       
        # Update Functions:
        g_b0 = -2 * ((y - y_pred).mean()) * np.power(np.e,(-1*np.power(((y - y_pred).mean()),2)/theta)) 
        g_b1 = -2 * ((x * (y - y_pred)).mean()) * np.power(np.e,(-1*np.power(((y - y_pred).mean()),2)/theta)) 

        beta_prev = np.copy(beta)


        beta[0] = beta[0] - alpha * g_b0
        beta[1] = beta[1] - alpha * g_b1
        
        if(i%1000 == 0):
            print(f"iteration: ({i}) beta: {beta}, gradient: {g_b0} {g_b1}")
            
        if np.linalg.norm(beta - beta_prev) < 0.00000001:
            print(f"I do early stoping at iteration {i}")
            break


    return beta

In [None]:
beta=User_Item_Combined(X, y)
beta

In [None]:
r.T

In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv('https://files.grouplens.org/datasets/movielens/ml-100k/u.data', delimiter=r'\t',
names=['user_id', 'item_id', 'rating', 'timestamp'])
r = df.pivot(index='user_id', columns='item_id', values='rating').values

In [None]:
r

# Model 1

Part *1*:

In [None]:
Casting yapmalı mıyım 4.7 to 5 gibi
mu iki kere ekleme bir kere eklenecek

In [None]:
r = np.array([[7, 6, 7, 4, 5, 4],
              [6, 7, np.nan, 4, 3, 4],
              [np.nan, 3, 3, 1, 1, np.nan],
              [1, 2, 3, 3, 3, 4],
              [1, np.nan, 1, 2, 3, 3]])

In [None]:
r

In [None]:
m, n = r.shape
mu = np.nanmean(r, axis=1)
beta=3


sim = np.zeros((m, m))
sim

In [None]:


idf = np.log(1 + m / (~np.isnan(r)).sum(axis=0))
idf

In [None]:



for i in range(m):
    for j in range(m):
        mask = ~np.isnan(r[i, :]) & ~np.isnan(r[j, :])

        si = r[i, mask] - mu[i]
        sj = r[j, mask] - mu[j]

        sim[i][j] = (si * sj * idf[mask]).sum() / (
                np.sqrt((idf[mask] * (si ** 2)).sum()) * np.sqrt((idf[mask] * (sj ** 2)).sum()))

        total_intersection = mask.sum()

        sim[i][j] *= min(total_intersection, beta) / beta

sim

In [None]:
r

In [None]:
j= 3
i= 
mask = ~np.isnan(r[i, :]) & ~np.isnan(r[j, :])


In [None]:
topk_users = users_rated_j[self.sim[u, users_rated_j].argsort()[::-1][:top_k]]

mean_centered_topk_user_rate = r[topk_users, j] - self.mu[topk_users]

w = self.sim[u, topk_users]

result= np.dot(mean_centered_topk_user_rate, w) / np.abs(w).sum() + self.mu[u]

In [None]:
r_row,r_col=r.shape

In [None]:
for u in range(r_row): # user sayısı kadar don
  for j in range(r_col): # item sayısı kadar don
  