In [25]:
import pandas as pd
df = pd.read_csv("netflix_content.csv")
df.head(3)

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type
0,The Night Agent: Season 1,Yes,2023-03-23,812100000,English,Show
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000,English,Show
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000,Korean,Show


In [26]:
df.columns

Index(['Title', 'Available Globally?', 'Release Date', 'Hours Viewed',
       'Language Indicator', 'Content Type'],
      dtype='object')

In [27]:
df['Hours Viewed']=df['Hours Viewed'].str.replace(',','',regex=False).astype('int64')
df.dropna(subset=['Title'],inplace=True)
df.drop_duplicates(subset=['Title'],inplace= True)
df['Content_ID'] = df.reset_index().index.astype('int32')
df['Language_ID']= df['Language Indicator'].astype('category').cat.codes
df['ContentType_ID']= df['Content Type'].astype('category').cat.codes
df[['Content_ID','Title','Hours Viewed', 'Language_ID','ContentType_ID']].head()

Unnamed: 0,Content_ID,Title,Hours Viewed,Language_ID,ContentType_ID
0,0,The Night Agent: Season 1,812100000,0,1
1,1,Ginny & Georgia: Season 2,665100000,0,1
2,2,The Glory: Season 1 // 더 글로리: 시즌 1,622800000,3,1
3,3,Wednesday: Season 1,507700000,0,1
4,4,Queen Charlotte: A Bridgerton Story,503000000,0,0


In [28]:
import tensorflow as tf
from tensorflow.keras import layers, Model
num_contents = df['Content_ID'].nunique()
num_languages = df['Language_ID'].nunique()
num_types = df['ContentType_ID'].nunique()

content_input = layers.Input(shape=(1,), name='content_input',dtype=tf.int32)
language_input = layers.Input(shape=(1,),dtype=tf.int32, name='language_id')
type_input = layers.Input(shape=(1,),dtype=tf.int32, name='type_id')

content_embedding = layers.Embedding(input_dim = num_contents+1,output_dim=32)(content_input)
language_embedding = layers.Embedding(input_dim = num_languages+1,output_dim=32)(language_input)
type_embedding = layers.Embedding(input_dim = num_types+1,output_dim=32)(type_input)

content_vec = layers.Flatten()(content_embedding)
language_vec = layers.Flatten()(language_embedding)
type_vec = layers.Flatten()(type_embedding)

combined = layers.concatenate([content_vec,language_vec,type_vec])
x = layers.Dense(64, activation='relu')(combined)
x = layers.Dense(32, activation='relu')(x)
output = layers.Dense(num_contents, activation='softmax')(x)

model = Model(inputs=[content_input,language_input,type_input], outputs=output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [29]:
model.fit(
    x={
        'content_input': df['Content_ID'],
        'language_id': df['Language_ID'],
        'type_id': df['ContentType_ID']
    },
    y=df['Content_ID'],
    epochs=5,
    batch_size=64
)

Epoch 1/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 44ms/step - accuracy: 0.0000e+00 - loss: 9.8788
Epoch 2/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 44ms/step - accuracy: 0.0000e+00 - loss: 9.8566
Epoch 3/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 43ms/step - accuracy: 0.0000e+00 - loss: 9.5825
Epoch 4/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 46ms/step - accuracy: 1.4062e-04 - loss: 9.1101
Epoch 5/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 62ms/step - accuracy: 9.0187e-04 - loss: 8.5854


<keras.src.callbacks.history.History at 0x7ab610d748f0>

In [40]:
import numpy as np
def recommend_similar(content_title, top_k=4):
  content_row=df[df['Title'].str.contains(content_title,case= False)].iloc[0]
  content_id = content_row['Content_ID']
  language_id = content_row['Language_ID']
  type_id = content_row['ContentType_ID']

  predictions = model.predict({
      'content_input':np.array([content_id]),
      'language_id':np.array([language_id]),
      'type_id':np.array([type_id])
  })
  top_indices = predictions[0].argsort()[-top_k:][::-1]
  recommendations =df[df['Content_ID'].isin(top_indices)]
  return recommendations[['Title','Language Indicator','Content Type']]

In [41]:
recommend_similar('Wednesday')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step


Unnamed: 0,Title,Language Indicator,Content Type
3779,42 Days of Darkness: Season 1 // 42 días en la...,Non-English,Show
6254,Medal of Honor: Season 1,English,Show
7330,Bakugan Legends: Season 5,English,Show
7823,Yanxi Palace: Princess Adventures: Season 1 //...,Non-English,Show


In [42]:
recommend_similar('Money Heist')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


Unnamed: 0,Title,Language Indicator,Content Type
1725,Mili // मिलि,Hindi,Movie
7156,Jaadugar // जादूगर,Hindi,Movie
14546,Africano // أفريكانو,Non-English,Movie
23765,"Banished from the Hero's Party, I Decided to L...",Japanese,Movie


In [43]:
recommend_similar('Breaking Bad')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step


Unnamed: 0,Title,Language Indicator,Content Type
3003,Rugrats (1991): Season 2,English,Show
5790,Line of Duty: Season 5,English,Show
6176,Badanamu Stories: Season 1,English,Show
10597,Aunty Donna's Big Ol' House of Fun: Season 1,English,Show
