In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder



# Load data from CSV

In [2]:
data_path = 'Emoji_Sentiment_Data_200.csv'
data = pd.read_csv(data_path)

# Map emojis to indices

In [3]:
label_encoder = LabelEncoder()
data['Emoji_index'] = label_encoder.fit_transform(data['Emoji'])
data['Sentiment'] = label_encoder.fit_transform(data['Sentiment'])

# Split data into features and target

In [4]:
X = data['Emoji_index'].values
y = data['Sentiment'].values
y = to_categorical(y)  # Convert labels to one-hot encoding

# Split data into training and testing sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model parameters

In [6]:
max_emojis = data['Emoji_index'].nunique()
embedding_size = 50

# Build the model

In [7]:
model = Sequential([
    Embedding(max_emojis, embedding_size, input_length=1),
    LSTM(50),
    Dropout(0.5),
    Dense(y.shape[1], activation='softmax')
])

2024-08-27 16:02:03.804885: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2024-08-27 16:02:03.804910: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-08-27 16:02:03.804917: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-08-27 16:02:03.805398: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-08-27 16:02:03.805422: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


# Compile the model

In [8]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model

In [9]:
model.fit(X_train, y_train, epochs=10, validation_split=0.2)

Epoch 1/10


ValueError: Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 50)

# Save the model

In [None]:
model.save('emoji_sentiment_model.h5')

# Load and evaluate the model

In [None]:
loaded_model = tf.keras.models.load_model('emoji_sentiment_model.h5')
loss, accuracy = loaded_model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Load the dataset
data_path = 'Emoji_Sentiment_Data_200.csv'
data = pd.read_csv(data_path)

# Encode emojis to indices
emoji_encoder = LabelEncoder()
data['Emoji_index'] = emoji_encoder.fit_transform(data['Emoji'])

# Encode sentiments to categorical labels
sentiment_encoder = LabelEncoder()
data['Sentiment_label'] = sentiment_encoder.fit_transform(data['Sentiment'])
y = to_categorical(data['Sentiment_label'])  # Convert labels to one-hot encoding

# Split data into features and target
X = data['Emoji_index'].values
X = X.reshape(-1, 1)  # Reshape for LSTM input

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define maximum number of unique emojis and embedding size
max_emojis = data['Emoji_index'].nunique()
embedding_size = 50

# Build the LSTM model
model = Sequential([
    Embedding(input_dim=max_emojis, output_dim=embedding_size, input_length=1),
    LSTM(50),
    Dropout(0.5),
    Dense(y.shape[1], activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, validation_split=0.2)

# Save the model
model.save('emoji_sentiment_model.h5')

Epoch 1/100




[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 66ms/step - accuracy: 0.2796 - loss: 1.0990 - val_accuracy: 0.5000 - val_loss: 1.0978
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.4089 - loss: 1.0968 - val_accuracy: 0.4211 - val_loss: 1.0973
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.4238 - loss: 1.0961 - val_accuracy: 0.4211 - val_loss: 1.0968
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.6024 - loss: 1.0917 - val_accuracy: 0.4211 - val_loss: 1.0963
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.6904 - loss: 1.0875 - val_accuracy: 0.4211 - val_loss: 1.0956
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.6171 - loss: 1.0856 - val_accuracy: 0.4211 - val_loss: 1.0950
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0



In [14]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical

# Function to extract emojis from a text
def extract_emojis(text):
    return [char for char in text if char in emoji_encoder.classes_]

# Load the trained model
model = load_model('emoji_sentiment_model.h5')

# Load the encoders
# Assuming `emoji_encoder` and `sentiment_encoder` are saved and loaded correctly
# For simplicity, redefining them here. Make sure to use the actual objects used during training.
emoji_encoder = LabelEncoder()
emoji_encoder.classes_ = np.load('emoji_classes.npy')  # Load previously saved emoji classes

# Test data
test_sentences = [
    "I love this new song 😍!",
    "I am not sure about this 😐",
    "This is absolutely terrible 😞",
    "Party time! 🎉"
]

# Process each sentence
for sentence in test_sentences:
    emojis = extract_emojis(sentence)
    if emojis:
        # Convert emojis to indices
        emoji_indices = emoji_encoder.transform(emojis)
        emoji_indices = np.array(emoji_indices).reshape(-1, 1)

        # Predict sentiment
        predictions = model.predict(emoji_indices)
        predicted_sentiments = np.argmax(predictions, axis=1)
        predicted_sentiments = [sentiment_encoder.inverse_transform([pred])[0] for pred in predicted_sentiments]

        print(f"Sentence: '{sentence}'")
        for emoji, sentiment in zip(emojis, predicted_sentiments):
            print(f"  Emoji: '{emoji}' - Sentiment: '{sentiment}'")
    else:
        print(f"Sentence: '{sentence}' - No emojis found")



ValueError: Object arrays cannot be loaded when allow_pickle=False

In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model

# Load the dataset
data_path = 'Emoji_Sentiment_Data_200.csv'
data = pd.read_csv(data_path)

# Encode emojis to indices
emoji_encoder = LabelEncoder()
data['Emoji_index'] = emoji_encoder.fit_transform(data['Emoji'])
# Save emoji classes for later use
np.save('emoji_classes.npy', emoji_encoder.classes_)

# Encode sentiments to categorical labels
sentiment_encoder = LabelEncoder()
data['Sentiment_label'] = sentiment_encoder.fit_transform(data['Sentiment'])
y = to_categorical(data['Sentiment_label'])  # Convert labels to one-hot encoding
# Save sentiment classes for later use
np.save('sentiment_classes.npy', sentiment_encoder.classes_)

# Split data into features and target
X = data['Emoji_index'].values
X = X.reshape(-1, 1)  # Reshape for LSTM input

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define maximum number of unique emojis and embedding size
max_emojis = data['Emoji_index'].nunique()
embedding_size = 50

# Build the LSTM model
model = Sequential([
    Embedding(input_dim=max_emojis, output_dim=embedding_size, input_length=1),
    LSTM(50),
    Dropout(0.5),
    Dense(y.shape[1], activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, validation_split=0.2)

# Save the model
model.save('emoji_sentiment_model.h5')

# Load the model (optional, demonstration)
loaded_model = load_model('emoji_sentiment_model.h5')

# Evaluate the model on the test set
test_loss, test_acc = loaded_model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_acc:.2f}')

Epoch 1/100




[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 70ms/step - accuracy: 0.3331 - loss: 1.0981 - val_accuracy: 0.5000 - val_loss: 1.0975
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.4177 - loss: 1.0966 - val_accuracy: 0.4737 - val_loss: 1.0969
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.4649 - loss: 1.0950 - val_accuracy: 0.4737 - val_loss: 1.0966
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.4758 - loss: 1.0939 - val_accuracy: 0.4737 - val_loss: 1.0962
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.4898 - loss: 1.0919 - val_accuracy: 0.4474 - val_loss: 1.0960
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5132 - loss: 1.0885 - val_accuracy: 0.4474 - val_loss: 1.0958
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.3993 - loss: 1.6193 
Test Accuracy: 0.40


In [15]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical

# Function to extract emojis from text
def extract_emojis(text):
    return [char for char in text if char in emoji_encoder.classes_]

# Load the model
model = load_model('emoji_sentiment_model.h5')

# Load the encoders
emoji_encoder = LabelEncoder()
emoji_encoder.classes_ = np.load('emoji_classes.npy', allow_pickle=True)  # Load emoji classes

sentiment_encoder = LabelEncoder()
sentiment_encoder.classes_ = np.load('sentiment_classes.npy', allow_pickle=True)  # Load sentiment classes

# Example sentences with emojis
test_sentences = [
    "I love this new song 😍!",
    "I am not sure about this 😐",
    "This is absolutely terrible 😞",
    "Party time! 🎉"
]

# Process each sentence
for sentence in test_sentences:
    emojis = extract_emojis(sentence)
    if emojis:
        emoji_indices = emoji_encoder.transform(emojis)
        emoji_indices = np.array(emoji_indices).reshape(-1, 1)
        predictions = model.predict(emoji_indices)
        predicted_sentiments = np.argmax(predictions, axis=1)
        predicted_sentiments = [sentiment_encoder.inverse_transform([pred])[0] for pred in predicted_sentiments]

        print(f"Sentence: '{sentence}'")
        for emoji, sentiment in zip(emojis, predicted_sentiments):
            print(f"  Emoji: '{emoji}' - Sentiment: '{sentiment}'")
    else:
        print(f"Sentence: '{sentence}' - No emojis found")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 972ms/step
Sentence: 'I love this new song 😍!'
  Emoji: '😍' - Sentiment: 'neutral'
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Sentence: 'I am not sure about this 😐'
  Emoji: '😐' - Sentiment: 'neutral'
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Sentence: 'This is absolutely terrible 😞'
  Emoji: '😞' - Sentiment: 'negative'
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
Sentence: 'Party time! 🎉'
  Emoji: '🎉' - Sentiment: 'positive'


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import ErnieModel, ErnieTokenizer
import torch
from torch import nn
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical

# Load the dataset
data_path = 'Emoji_Sentiment_Data_200.csv'
data = pd.read_csv(data_path)

# Encode emojis to indices
emoji_encoder = LabelEncoder()
data['Emoji_index'] = emoji_encoder.fit_transform(data['Emoji'])

# Encode sentiments to categorical labels
sentiment_encoder = LabelEncoder()
data['Sentiment_label'] = sentiment_encoder.fit_transform(data['Sentiment'])
y = data['Sentiment_label']

# Load ERINE tokenizer and model
tokenizer = ErnieTokenizer.from_pretrained('nghuyong/ernie-1.0')
ernie_model = ErnieModel.from_pretrained('nghuyong/ernie-1.0')

# Freeze ERINE layers (optional)
for param in ernie_model.parameters():
    param.requires_grad = False

# Tokenize emoji indices
X = data['Emoji_index'].apply(lambda x: tokenizer.encode(str(x), return_tensors='pt')[0])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X.tolist(), y, test_size=0.2, random_state=42)

# Convert to tensor for PyTorch
X_train = torch.stack([torch.tensor(x) for x in X_train])
y_train = torch.tensor(y_train.values)
X_test = torch.stack([torch.tensor(x) for x in X_test])
y_test = torch.tensor(y_test.values)

# Define a custom model combining ERINE and LSTM
class ErineLSTMModel(nn.Module):
    def __init__(self, ernie_model, lstm_hidden_size, num_labels):
        super(ErineLSTMModel, self).__init__()
        self.ernie = ernie_model
        self.lstm = nn.LSTM(input_size=ernie_model.config.hidden_size, hidden_size=lstm_hidden_size, batch_first=True)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(lstm_hidden_size, num_labels)
        
    def forward(self, input_ids):
        # Get ERINE embeddings
        with torch.no_grad():  # Disable gradients for ERINE if frozen
            outputs = self.ernie(input_ids)
            ernie_output = outputs.last_hidden_state
        
        # Pass embeddings through LSTM
        lstm_output, _ = self.lstm(ernie_output)
        lstm_output = lstm_output[:, -1, :]  # Take output from the last time step
        
        # Apply dropout and fully connected layer
        lstm_output = self.dropout(lstm_output)
        logits = self.fc(lstm_output)
        
        return logits

# Create the model
num_labels = len(data['Sentiment'].unique())
lstm_hidden_size = 50
model = ErineLSTMModel(ernie_model, lstm_hidden_size, num_labels)

# Define optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
loss_fn = nn.CrossEntropyLoss()

# Training loop
model.train()
for epoch in range(3):  # Train for 3 epochs
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = loss_fn(outputs, y_train)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1} loss: {loss.item()}")

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_test)
    predictions = torch.argmax(outputs, dim=-1)
    accuracy = accuracy_score(y_test.numpy(), predictions.numpy())
    print(f"Test Accuracy: {accuracy}")


ImportError: cannot import name 'ErnieTokenizer' from 'transformers' (C:\Users\Micheal\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\__init__.py)