#### Gerar base de dados aleatória para o modelo

In [43]:
import pandas as pd
import random
import uuid

# Função para gerar IDs únicos
def generate_uuid():
    return str(uuid.uuid4())

# Gêneros disponíveis
genres = [
    "Action", "Adventure", "Animation", "Comedy", "Crime", "Drama", 
    "Fantasy", "Horror", "Sci-Fi", "Thriller"
]

# Gerar usuários
def generate_users(num_users=100):
    users = []
    for _ in range(num_users):
        user = {
            "id": generate_uuid(),
            "age": random.randint(18, 60),
            "gender": random.choice(["Male", "Female"]),
            "preferredGenres": random.sample(genres, k=3),
            "location": random.choice(["New York", "Los Angeles", "Chicago", "Houston", "Phoenix"]),
            "acceptsRecommendations": random.choice([True, False]),
        }
        users.append(user)
    return pd.DataFrame(users)

# Gerar filmes
def generate_movies(num_movies=50):
    movies = []
    for _ in range(num_movies):
        movie = {
            "movieId": generate_uuid(),
            "title": f"Movie {_ + 1}",
            "genre": random.choice(genres),
        }
        movies.append(movie)
    return pd.DataFrame(movies)

# Gerar interações
def generate_interactions(users, movies, num_interactions=500):
    interactions = []
    for _ in range(num_interactions):
        user = random.choice(users)
        movie = random.choice(movies)
        interaction = {
            "userId": user["id"],
            "movieId": movie["movieId"],
            "totalInteractions": random.randint(1, 5),
            "totalWatchTime": random.randint(30, 600),  # Tempo assistido em segundos
        }
        interactions.append(interaction)
    return pd.DataFrame(interactions)

# Gerar os dados
users_df = generate_users()
movies_df = generate_movies()
interactions_df = generate_interactions(users_df.to_dict('records'), movies_df.to_dict('records'))

# Salvar os dados em CSV
users_df.to_csv("users.csv", index=False)
movies_df.to_csv("movies.csv", index=False)
interactions_df.to_csv("interactions.csv", index=False)

print("Bases de dados geradas: users.csv, movies.csv, interactions.csv")

Bases de dados geradas: users.csv, movies.csv, interactions.csv


In [44]:
import pandas as pd

# Carregar os dados
users_df = pd.read_csv("users.csv")
movies_df = pd.read_csv("movies.csv")
interactions_df = pd.read_csv("interactions.csv")

# Visualizar os dados carregados
print(users_df.head())
print(movies_df.head())
print(interactions_df.head())


                                     id  age  gender  \
0  a9c69f21-c0d0-40be-9f4a-6f58f82a3268   34    Male   
1  d6cff46c-50e5-432f-9ee6-86b258bf18fe   23  Female   
2  2c312a2c-12d3-4a90-a2d8-90a75c78f935   49  Female   
3  26f6c530-3817-4073-86a4-7e6365ad3224   58  Female   
4  23271e2a-dc11-4130-8d65-404993b007af   38    Male   

                       preferredGenres     location  acceptsRecommendations  
0  ['Thriller', 'Action', 'Adventure']      Houston                    True  
1  ['Horror', 'Thriller', 'Animation']  Los Angeles                    True  
2    ['Horror', 'Thriller', 'Fantasy']     New York                    True  
3        ['Horror', 'Sci-Fi', 'Drama']  Los Angeles                   False  
4        ['Drama', 'Action', 'Horror']  Los Angeles                    True  
                                movieId    title      genre
0  f2a6a426-23b3-4681-add4-933144cd1fc4  Movie 1     Action
1  d5d45be8-2125-43f2-ab14-5d939b0f2031  Movie 2  Animation
2  02c3c56c-40a

In [23]:
# Unir interações com usuários
data = interactions_df.merge(users_df, how="left", left_on="userId", right_on="id")

# Unir com filmes
data = data.merge(movies_df, how="left", left_on="movieId", right_on="movieId")

# Visualizar o dataset combinado
print(data.head())


                                 userId                               movieId  \
0  8a6d20fe-b8e9-4f97-9be5-39994e4e38f2  09dc196b-053c-4c37-b739-7a676a4b854b   
1  66e80df6-a787-4a12-8d49-696bcf0c5237  fdad85ef-1035-4b28-8382-825fa10cf6e0   
2  24502dbf-8617-4434-8ea6-02bdbaddf78b  0795529b-0d54-4d3f-a211-804de8ad823e   
3  dccad89c-9fd5-4f73-bd63-ae430798c9b0  8a7dd3b7-78d3-4556-b928-6631b13945c1   
4  c15ac389-ec63-4c77-bea5-dd374ad7ed7f  fd1eb883-f5cb-4315-ab92-f9b23ba621f4   

   totalInteractions  totalWatchTime                                    id  \
0                  3             324  8a6d20fe-b8e9-4f97-9be5-39994e4e38f2   
1                  1             342  66e80df6-a787-4a12-8d49-696bcf0c5237   
2                  3             177  24502dbf-8617-4434-8ea6-02bdbaddf78b   
3                  5             416  dccad89c-9fd5-4f73-bd63-ae430798c9b0   
4                  2              36  c15ac389-ec63-4c77-bea5-dd374ad7ed7f   

   age  gender                        prefer

#### Treinamento do modelo NN

In [46]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf

# Dados simulados
interactions = pd.read_csv("interactions.csv")  # Carrega as interações
movies = pd.read_csv("movies.csv")  # Carrega os filmes
users = pd.read_csv("users.csv")  # Carrega os usuários

# 1. Merge entre as tabelas
merged_data = interactions.merge(movies, on="movieId", how="inner")
merged_data = merged_data.merge(users, left_on="userId", right_on="id", how="inner")

# 2. Pré-processamento da coluna 'preferredGenres'
# Converter strings para listas reais
merged_data["preferredGenres"] = merged_data["preferredGenres"].apply(lambda x: eval(x) if isinstance(x, str) else [])

# Explodir a coluna 'preferredGenres' para gerar uma linha por gênero
genres_exploded = merged_data.explode("preferredGenres")

# Codificação de gêneros
encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
encoded_genres = encoder.fit_transform(genres_exploded[["preferredGenres"]])

# Criar DataFrame com os gêneros codificados
genres_df = pd.DataFrame(encoded_genres, columns=encoder.get_feature_names_out(["preferredGenres"]))

# Agrupar gêneros codificados por usuário e filme
genres_grouped = pd.concat([genres_exploded[["userId", "movieId"]].reset_index(drop=True), genres_df], axis=1)
genres_grouped = genres_grouped.groupby(["userId", "movieId"]).sum().reset_index()

# Combinar os gêneros codificados com o dataset original
merged_data = merged_data.merge(genres_grouped, on=["userId", "movieId"], how="left")

# 3. Codificação de gênero do usuário
encoded_gender = encoder.fit_transform(merged_data[["gender"]])
gender_df = pd.DataFrame(encoded_gender, columns=encoder.get_feature_names_out(["gender"]))
merged_data = pd.concat([merged_data, gender_df], axis=1)

# 4. Seleção de recursos (features)
X = merged_data[
    ["totalInteractions", "totalWatchTime", "age"] + list(genres_df.columns) + list(gender_df.columns)
]
y = merged_data["acceptsRecommendations"].astype(int)  # Converte para valores binários (0 ou 1)

# 5. Divisão Treino/Teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 6. Criação do modelo de rede neural
nn_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation="relu", input_dim=X_train.shape[1], kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")  # Saída binária
])

# 7. Compilar o modelo
nn_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# 8. Treinar o modelo
nn_model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), shuffle=True)

# 9. Avaliação do modelo
loss, accuracy = nn_model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")


Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 34ms/step - accuracy: 0.5323 - loss: 14.5545 - val_accuracy: 0.3400 - val_loss: 2.1435
Epoch 2/500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4979 - loss: 8.7485 - val_accuracy: 0.6700 - val_loss: 1.8900
Epoch 3/500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5209 - loss: 8.2187 - val_accuracy: 0.6500 - val_loss: 1.5929
Epoch 4/500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4835 - loss: 6.8450 - val_accuracy: 0.6700 - val_loss: 2.5186
Epoch 5/500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4994 - loss: 4.9442 - val_accuracy: 0.6500 - val_loss: 1.3544
Epoch 6/500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4502 - loss: 5.5613 - val_accuracy: 0.6400 - val_loss: 2.2593
Epoch 7/500
[1m13/13[0m [32m━━━━━━━━