In [None]:
import sys
import os
sys.path.append('F:/text-autocomplete/src')

import torch
import pandas as pd
import numpy as np
from data_utils import prepare_dataset
from next_token_dataset import NextTokenDataset
from lstm_model import LSTMAutoComplete
from lstm_train import train_model
from eval_lstm import calculate_rouge_lstm
from eval_transformer_pipeline import evaluate_transformer
from torch.utils.data import DataLoader

print("Этап 1: Подготовка данных")
train_df, val_df, test_df = prepare_dataset(
    "F:/text-autocomplete/data/tweets.txt",
    "F:/text-autocomplete/data"
)

print("Этап 2: Обучение модели LSTM")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_dataset = NextTokenDataset('F:/text-autocomplete/data/train.csv')
val_dataset = NextTokenDataset('F:/text-autocomplete/data/val.csv')

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False)

model = LSTMAutoComplete(
    vocab_size=train_dataset.vocab_size,
    embedding_dim=128,
    hidden_dim=128,
    num_layers=2
).to(device)

trained_model, train_losses, val_rouge_scores = train_model(
    model, train_loader, val_loader, train_dataset.vocab, device
)

print("Этап 3: Оценка модели LST")
test_dataset = NextTokenDataset('F:/text-autocomplete/data/test.csv')
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

rouge1_lstm, rouge2_lstm, lstm_examples = calculate_rouge_lstm(
    trained_model, test_loader, train_dataset.vocab, device
)

print(f"LSTM Test Results:")
print(f"ROUGE-1: {rouge1_lstm:.4f}")
print(f"ROUGE-2: {rouge2_lstm:.4f}")

print("Этап 4: Оценка модели Transformer")
rouge1_transformer, rouge2_transformer, transformer_examples = evaluate_transformer()

print("Этап 5: Сравнение и выводы")

print("\nСравнение результатов")
print(f"LSTM Model:")
print(f"  ROUGE-1: {rouge1_lstm:.4f}")
print(f"  ROUGE-2: {rouge2_lstm:.4f}")

print(f"\nTransformer Model (distilgpt2):")
print(f"  ROUGE-1: {rouge1_transformer:.4f}")
print(f"  ROUGE-2: {rouge2_transformer:.4f}")

print("\nПример сравнения")
print("LSTM Examples:")
for i, (input_tokens, pred_tokens, target_tokens) in enumerate(lstm_examples[:2]):
    print(f"Example {i+1}:")
    print(f"  Input: {' '.join(input_tokens)}")
    print(f"  Pred: {' '.join(pred_tokens)}")
    print(f"  Target: {' '.join(target_tokens)}")
    print()

print("Transformer Examples:")
for i, (input_tokens, pred_tokens, target_tokens) in enumerate(transformer_examples[:2]):
    print(f"Example {i+1}:")
    print(f"  Input: {' '.join(input_tokens[-5:])}")
    print(f"  Pred: {' '.join(pred_tokens)}")
    print(f"  Target: {' '.join(target_tokens[:10])}")
    print()

print("\nРекомендации")
if rouge1_transformer > rouge1_lstm:
    print("Модель Transformer показывает лучшие результаты по показателям ROUGE.")
    print("Рекомендация: Используйте модель Transformer, если позволяют ограничения памяти.")
else:
    print("Модель LSTM демонстрирует конкурентные характеристики.")
    print("Рекомендация: Используйте модель LSTM для повышения эффективности использования памяти на мобильных устройствах.")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\scheb\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
  from .autonotebook import tqdm as notebook_tqdm


Этап 1: Подготовка данных
