In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, Bidirectional, Dense, Dropout
import matplotlib.pyplot as plt

# Завантаження та попередня обробка даних
tags_df = pd.read_csv('C:\\Users\\Сергій\\Documents\\Code\\Current_task\\Math-test\\tags.csv')
movies_df = pd.read_csv('C:\\Users\\Сергій\\Documents\\Code\\Current_task\\Math-test\\movies.csv')
links_df = pd.read_csv('C:\\Users\\Сергій\\Documents\\Code\\Current_task\\Math-test\\links.csv')
ratings_df = pd.read_csv('C:\\Users\\Сергій\\Documents\\Code\\Current_task\\Math-test\\ratings_filtered.csv')

# Призначте унікальний ідентифікатор для кожного фільму
movies_df['movieId'] = pd.factorize(movies_df['title'])[0]

# Об'єднайте файли на основі movieId
merged_df = movies_df.merge(links_df, on='movieId', how='inner')
merged_df = merged_df.merge(tags_df, on='movieId', how='inner')
merged_df = merged_df.merge(ratings_df[['userId', 'movieId', 'rating']], on=['userId', 'movieId'], how='inner')

# Визначте рейтинги як позитивні або негативні за певним порогом
threshold = 3.5
merged_df['positive'] = (merged_df['rating'] >= threshold).astype(int)

# Відберіть необхідні колонки та попередньо обробіть дані
selected_df = merged_df[['positive', 'tag', 'title']]
selected_df['text'] = selected_df['tag'] + ' ' + selected_df['title']
selected_df = selected_df.drop(['tag', 'title'], axis=1)

# Використовуємо Tokenizer для обробки текстових даних
max_words = 5000
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(selected_df['text'])
x_train = tokenizer.texts_to_sequences(selected_df['text'])
x_train = pad_sequences(x_train, maxlen=300)
y_train = selected_df['positive'].values

# Побудова моделі з використанням RNN, LSTM, Bidirectional LSTM та глибокого LSTM
models = []

# RNN
model_rnn = Sequential([
    Embedding(max_words, 128, input_length=300),
    SimpleRNN(128),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
models.append(('RNN', model_rnn))

# LSTM
model_lstm = Sequential([
    Embedding(max_words, 128, input_length=300),
    LSTM(128),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
models.append(('LSTM', model_lstm))

# Bidirectional LSTM
model_bidirectional_lstm = Sequential([
    Embedding(max_words, 128, input_length=300),
    Bidirectional(LSTM(128)),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
models.append(('Bidirectional LSTM', model_bidirectional_lstm))

# Deep LSTM
model_deep_lstm = Sequential([
    Embedding(max_words, 128, input_length=300),
    LSTM(128, return_sequences=True),
    LSTM(128, return_sequences=True),
    LSTM(128),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
models.append(('Deep LSTM', model_deep_lstm))

# Навчання моделей та візуалізація результатів
for model_name, model in models:
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    history = model.fit(x_train, y_train, epochs=50, batch_size=64, validation_split=0.2, verbose=2)

    # Візуалізація точності та функції втрат під час навчання
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title(f'{model_name} Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title(f'{model_name} Training and Validation Loss')

    plt.show()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_df['text'] = selected_df['tag'] + ' ' + selected_df['title']


Epoch 1/50
26/26 - 24s - loss: 0.4885 - accuracy: 0.8072 - val_loss: 0.3795 - val_accuracy: 0.8726 - 24s/epoch - 933ms/step
Epoch 2/50
26/26 - 14s - loss: 0.4222 - accuracy: 0.8386 - val_loss: 0.3817 - val_accuracy: 0.8726 - 14s/epoch - 542ms/step
Epoch 3/50
26/26 - 14s - loss: 0.2854 - accuracy: 0.8819 - val_loss: 0.4373 - val_accuracy: 0.8486 - 14s/epoch - 524ms/step
Epoch 4/50
26/26 - 14s - loss: 0.1701 - accuracy: 0.9355 - val_loss: 0.4219 - val_accuracy: 0.8702 - 14s/epoch - 549ms/step
Epoch 5/50
26/26 - 14s - loss: 0.1428 - accuracy: 0.9416 - val_loss: 0.4817 - val_accuracy: 0.7740 - 14s/epoch - 527ms/step
Epoch 6/50
26/26 - 13s - loss: 0.1198 - accuracy: 0.9512 - val_loss: 0.5443 - val_accuracy: 0.7380 - 13s/epoch - 513ms/step
Epoch 7/50
26/26 - 14s - loss: 0.0911 - accuracy: 0.9584 - val_loss: 0.5421 - val_accuracy: 0.7812 - 14s/epoch - 525ms/step
Epoch 8/50
26/26 - 14s - loss: 0.0863 - accuracy: 0.9639 - val_loss: 0.6303 - val_accuracy: 0.6923 - 14s/epoch - 536ms/step
Epoch 9/