In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text  # Import this to ensure SentencepieceOp is registered
from sklearn.model_selection import train_test_split

# Install tensorflow-text if not already installed or if there are version issues
!pip install -U tensorflow-text

# ===== 1. Wczytanie danych =====
df = pd.read_csv("emocje_20000.csv")   # <-- Twój duży korpus

df = df.dropna(subset=["text", "emotion"])

emotions = sorted(df["emotion"].unique())
print("Emocje:", emotions)

emotion_to_id = {e: i for i, e in enumerate(emotions)}
id_to_emotion = {i: e for e, i in emotion_to_id.items()}

df["label"] = df["emotion"].map(emotion_to_id)

train_texts, test_texts, y_train, y_test = train_test_split(
    df["text"].tolist(),
    df["label"].values,
    test_size=0.2,
    random_state=42,
    stratify=df["label"],
)

BATCH_SIZE = 32

train_ds = tf.data.Dataset.from_tensor_slices((train_texts, y_train)).batch(BATCH_SIZE).prefetch(1)
test_ds  = tf.data.Dataset.from_tensor_slices((test_texts,  y_test)).batch(BATCH_SIZE).prefetch(1)

# ===== 2. Encoder językowy z TF-Hub (multilingual) =====
hub_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3"

# Wrap hub.KerasLayer in a custom Keras Layer to address Keras 3 compatibility
class UniversalSentenceEncoder(tf.keras.layers.Layer):
    def __init__(self, hub_url, **kwargs):
        super().__init__(**kwargs)
        # Initialize the hub.KerasLayer here
        self.encoder_layer = hub.KerasLayer(hub_url, trainable=False, dtype=tf.string)

    def call(self, inputs):
        # Call the hub.KerasLayer within the custom layer's call method
        return self.encoder_layer(inputs)

encoder = UniversalSentenceEncoder(
    hub_url,
    name="use_encoder"
)

num_classes = len(emotions)

# ===== 3. Model emocji =====
inputs = tf.keras.Input(shape=(), dtype=tf.string, name="text")
x = encoder(inputs)                            # [batch, 512]
x = tf.keras.layers.Dense(256, activation="relu")(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(128, activation="relu")(x)
x = tf.keras.layers.Dropout(0.3)(x)
outputs = tf.keras.layers.Dense(num_classes, activation="softmax")(x)

model = tf.keras.Model(inputs, outputs)
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

# ===== 4. Trening =====
history = model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=5,    # na dużym korpusie możesz dać 5–10
    verbose=1
)

# ===== 5. Ewaluacja =====
test_loss, test_acc = model.evaluate(test_ds)
print(f"\nTest accuracy: {test_acc:.3f}")

# ===== 6. Predykcja emocji dla dowolnego tekstu =====
def predict_emotion(texts, top_k=3):
    if isinstance(texts, str):
        texts = [texts]
    probs = model.predict(texts)
    for t, p in zip(texts, probs):
        ranked = np.argsort(p)[::-1][:top_k]
        print(f"\nTekst: {t}")
        for idx in ranked:
            print(f"  {id_to_emotion[idx]}: {p[idx]:.3f}")

# Przykłady demo:
predict_emotion("To był fascynujący i bardzo satysfakcjonujący eksperyment.")
predict_emotion("Jestem wściekły, bo cały projekt został zmarnowany.")
predict_emotion("Wyniki są zaskakujące i trochę niepokojące.")

Emocje: ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']


Epoch 1/5
[1m 28/500[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1:15[0m 160ms/step - accuracy: 0.1504 - loss: 1.9446