In [55]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from keras.models import Model
from keras.optimizers import Adam
from sklearn.metrics.pairwise import cosine_similarity

# Load DataFrame
df = pd.read_excel('/content/recensioni_film_cast_tag.xlsx')

In [None]:
# Selecting relevant columns
df = df[['Regista', 'Titolo', 'Anno', 'Cast', 'TAG 1']]

# Encoding categorical features
regista_encoder = LabelEncoder()
titolo_encoder = LabelEncoder()
cast_encoder = LabelEncoder()
tag_encoder = LabelEncoder()

# Fit the encoders
df['Regista_encoded'] = regista_encoder.fit_transform(df['Regista'])
df['Titolo_encoded'] = titolo_encoder.fit_transform(df['Titolo'])
df['Cast_encoded'] = cast_encoder.fit_transform(df['Cast'])
df['TAG1_encoded'] = tag_encoder.fit_transform(df['TAG 1'])

# Preparing inputs and dummy output
X_regista = df['Regista_encoded'].values
X_titolo = df['Titolo_encoded'].values
X_cast = df['Cast_encoded'].values
X_tag1 = df['TAG1_encoded'].values
y = np.arange(len(df))  # Dummy target

# Creating input layers
input_regista = Input(shape=(1,), name="regista_input")
input_titolo = Input(shape=(1,), name="titolo_input")
input_cast = Input(shape=(1,), name="cast_input")
input_tag1 = Input(shape=(1,), name="tag1_input")

# Creating embedding layers
regista_embedding = Embedding(input_dim=len(regista_encoder.classes_), output_dim=4)(input_regista)
regista_embedding = Flatten()(regista_embedding)

titolo_embedding = Embedding(input_dim=len(titolo_encoder.classes_), output_dim=4)(input_titolo)
titolo_embedding = Flatten()(titolo_embedding)

cast_embedding = Embedding(input_dim=len(cast_encoder.classes_), output_dim=4)(input_cast)
cast_embedding = Flatten()(cast_embedding)

tag1_embedding = Embedding(input_dim=len(tag_encoder.classes_), output_dim=4)(input_tag1)
tag1_embedding = Flatten()(tag1_embedding)

# Concatenating embeddings
concat = Concatenate()([regista_embedding, titolo_embedding, cast_embedding, tag1_embedding])

# Dense layers
dense_1 = Dense(8, activation='relu')(concat)
dense_2 = Dense(16, activation='relu')(dense_1)
dense_3 = Dense(4, activation='relu')(dense_2)
output = Dense(1, activation='linear')(dense_3)  # Linear output for dummy regression task


# Building the model
model = Model(inputs=[input_regista, input_titolo, input_cast, input_tag1], outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

# Training the model
model.fit([X_regista, X_titolo, X_cast, X_tag1], y, epochs=1000, verbose=1)

# Create a similarity matrix based on features
features_matrix = df[['Regista_encoded', 'Titolo_encoded', 'Cast_encoded', 'TAG1_encoded']].values
similarity_matrix = cosine_similarity(features_matrix)

# Recommendation function
def consiglia_film(titolo=None, regista=None, cast=None, tag1=None):
    # Safe transform function
    def safe_transform(encoder, value):
        if value in encoder.classes_:
            return encoder.transform([value])[0]
        else:
            return None  # Handle unseen value

    # Transform inputs
    regista_id = safe_transform(regista_encoder, regista) if regista else None
    titolo_id = safe_transform(titolo_encoder, titolo) if titolo else None
    cast_id = safe_transform(cast_encoder, cast) if cast else None
    tag1_id = safe_transform(tag_encoder, tag1) if tag1 else None

    # Create input arrays
    input_data = []
    if regista_id is not None:
        input_data.append(np.array([regista_id]))
    else:
        input_data.append(np.array([np.random.choice(X_regista)]))  # Random regista

    if titolo_id is not None:
        input_data.append(np.array([titolo_id]))
    else:
        input_data.append(np.array([np.random.choice(X_titolo)]))  # Random titolo

    if cast_id is not None:
        input_data.append(np.array([cast_id]))
    else:
        input_data.append(np.array([np.random.choice(X_cast)]))  # Random cast

    if tag1_id is not None:
        input_data.append(np.array([tag1_id]))
    else:
        input_data.append(np.array([np.random.choice(X_tag1)]))  # Random tag1

    # Predict scores
    punteggi = model.predict(input_data)

    # Get the index of the input movie (or a random one if not found)
    if titolo_id is not None:
        input_index = titolo_id
    else:
        input_index = np.random.choice(len(df))

    # Find similar movies based on the similarity matrix
    similar_indices = np.argsort(-similarity_matrix[input_index])[:5]  # Get top 5 similar movies

    print("Film consigliati:")
    for idx in similar_indices:
      #print(f"- {df['Titolo'].iloc[idx]}")
      print(f"- {df['Titolo'].iloc[idx]} (Regista: {df['Regista'].iloc[idx]}, Cast: {df['Cast'].iloc[idx]}, Tag: {df['TAG 1'].iloc[idx]})")

In [None]:
# Example usage of the recommendation function
consiglia_film(titolo='Blade Runner', tag1='Science')