<a href="https://colab.research.google.com/github/sumeyyedemir5/nlp-preprocessing_and_textRepresentation/blob/main/Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Recommendation system - Neural Network

In [None]:
# Recommendation system : kullanıcılara geçmiş davranışlarına, tercihlerine dayalı analizlerle
# ilgilerini çekebilecek hizmetler önermek için kullanılan sistemdir.
!pip install tensorflow
#import libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [None]:
# veri seti
user_ids =  np.array([0,1,2,3,4,0,1,2,3,4])
item_ids = np.array([0,1,2,3,4,1,2,3,4,5])
ratings =  np.array([5,4,3,2,1,4,5,2,3,4])


In [None]:
# train test split

user_id_train, user_id_test , item_id_train, item_id_test , rating_train, rating_test = train_test_split(
    user_ids, item_ids, ratings, test_size = 0.2,random_state =42)



In [None]:
# create NN model
num_users = 5
num_items = 6

def create_model(num_users, num_items, embedding_dim):
  user_input = Input(shape=(1,),name="user")
  item_input = Input(shape=(1,),name="item")

  user_embedding = Embedding(input_dim = num_users, output_dim = embedding_dim, name="user_embedding")(user_input)
  item_embedding = Embedding(input_dim = num_items, output_dim = embedding_dim, name="item_embedding")(item_input)

  user_vec = Flatten()(user_embedding)
  item_vec = Flatten()(item_embedding)

  dot_product = Dot(axes=1)([user_vec, item_vec])
  output = Dense(1)(dot_product)

  model = Model(inputs = [user_input,item_input], outputs = output)

  model.compile(optimizer = Adam(learning_rate=0.01),loss="mean_squared_error")

  return model

In [None]:
# training NN model
embedding_dim = 8 # 8 boyutlu
model = create_model(num_users, num_items, embedding_dim)
model.fit([user_id_train,item_id_train],rating_train, epochs = 10 ,verbose=1, validation_split = 0.1)

In [None]:
# test- evaluation
loss = model.evaluate([user_id_test,item_id_test],rating_test)
print("test loss:",loss)

In [None]:
user_id = np.array([0])
item_id = np.array([2])
prediction = model.predict([user_id, item_id])
print("prediction:",prediction)
#prediction: [[0.10810207]]

user_id = np.array([0])
item_id = np.array([3])
prediction = model.predict([user_id, item_id])
print("prediction:",prediction)
#prediction: [[0.11255766]]


user_id = np.array([0])
item_id = np.array([4])
prediction = model.predict([user_id, item_id])
print("prediction:",prediction)
# prediction: [[0.11840666]]

user_id = np.array([0])
item_id = np.array([5])
prediction = model.predict([user_id, item_id])
print("prediction:",prediction)
# prediction: [[0.11974955]]

# en yüksek prediction değerleri önerilir

Recommendation system - Machine learning

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# MovieLens 100k verisini indir
url = 'https://files.grouplens.org/datasets/movielens/ml-100k/u.data'
columns = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv(url, sep='\t', names=columns)

# Kullanıcı-Film Matrisi (Pivot Table) oluştur
# Satırlar: Kullanıcılar, Sütunlar: Filmler
user_item_matrix = df.pivot(index='user_id', columns='item_id', values='rating')

# Eksik değerleri (izlenmeyen filmleri) 0 ile doldur
user_item_matrix_filled = user_item_matrix.fillna(0)

In [None]:
# Kullanıcılar arası benzerliği hesapla
user_sim = cosine_similarity(user_item_matrix_filled)
user_sim_df = pd.DataFrame(user_sim, index=user_item_matrix.index, columns=user_item_matrix.index)

In [None]:
def predict_rating(user_id, item_id):
    # Eğer film veri setinde yoksa ortalama bir puan dön
    if item_id not in user_item_matrix.columns:
        return 2.5

    # Hedef filmi izleyen kullanıcıları ve puanlarını al
    item_ratings = user_item_matrix[item_id]
    users_who_rated = item_ratings[item_ratings > 0].index

    if len(users_who_rated) == 0:
        return 2.5

    # Hedef kullanıcının, bu filmi izleyen diğer kullanıcılarla olan benzerlikleri
    weights = user_sim_df.loc[user_id, users_who_rated]
    ratings = item_ratings[users_who_rated]

    # Ağırlıklı ortalama: (Benzerlik * Puan) / Toplam Benzerlik
    if weights.sum() == 0:
        return 2.5

    prediction = np.dot(weights, ratings) / weights.sum()
    return prediction

In [None]:
#bir tahmin
print(predict_rating(1, 40))

In [None]:
#bir kullanıcı için en iyi önerileri sıralama
def get_recommendations(target_user, top_n =5):
  #kullanıcının henüz izlemediği filmleri bul
  user_ratings = user_item_matrix.loc[target_user]
  not_watched = user_ratings[user_ratings.isna()].index # veya user_ratings == 0

  #izlemediği her film için bir öneri
  pred = []
  for movie_id in not_watched:
    score = predict_rating(target_user,movie_id)
    pred.append((movie_id,score))

  #puanları büyükten küçüğe sırala
  pred.sort(key=lambda x:x[1],reverse=True)
  return pred[:top_n]

In [None]:
recs = get_recommendations(1,5)
for movie, score in recs:
  print(f"Film ID: {movie}, Tahmini Puan: {score}")

In [None]:
from sklearn.metrics import mean_squared_error

# Test için rastgele 100 örnek seçelim
test_samples = df.sample(100)
y_true = []
y_pred = []

for idx, row in test_samples.iterrows():
    actual = row['rating']
    predicted = predict_rating(row['user_id'], row['item_id'])

    y_true.append(actual)
    y_pred.append(predicted)

# RMSE Hesapla
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print(f"Modelin genel hata payı (RMSE): {rmse:.4f}")