In [43]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# 1. Data Collection
# Example data (replace with your own dataset)
texts = ["I love pizza", "I'm feeling happy", "This movie is terrible", "I'm feeling sad", "That joke is hilarious", "I feel down", "I'm laughing so hard"]
emojis = ["❤️", "😊", "😡", "😔", "😂", "😔", "😂"]

# 2. Data Preprocessing
# Encode emojis into numerical labels
label_map = {emoji: i for i, emoji in enumerate(set(emojis))}
labels = [label_map[emoji] for emoji in emojis]

# 3. Feature Extraction
# Convert text data into TF-IDF vectors
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)

# 4. Model Selection
# Choose a machine learning model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# 5. Training
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# 6. Evaluation
# Evaluate model performance on the test set
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# 7. Inference
# Use the trained model to predict emojis for new text inputs
new_text = ["I'm feeling sad"]
new_text_vectorized = vectorizer.transform(new_text)
predicted_label = model.predict(new_text_vectorized)[0]
predicted_emoji = [emoji for emoji, label in label_map.items() if label == predicted_label][0]
print("Predicted Emoji:", predicted_emoji)

new_text = ["I'm feeling happy"]
new_text_vectorized = vectorizer.transform(new_text)
predicted_label = model.predict(new_text_vectorized)[0]
predicted_emoji = [emoji for emoji, label in label_map.items() if label == predicted_label][0]
print("Predicted Emoji:", predicted_emoji)


Accuracy: 0.0
Predicted Emoji: 😔
Predicted Emoji: 😔


In [44]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score
from gensim.models import Word2Vec
import numpy as np

# 1. Data Collection
# Example data (replace with your own dataset)
texts = ["I love pizza", "I'm feeling happy", "This movie is terrible", "I'm feeling sad", "That joke is hilarious", "I feel down", "I'm laughing so hard"]
emojis = ["❤️", "😊", "😡", "😔", "😂", "😔", "😂"]

# 2. Data Preprocessing
# Add more emojis and corresponding texts to the dataset
additional_texts = ["I'm bored", "This food is delicious", "I'm feeling excited", "I'm tired", "This book is boring"]
additional_emojis = ["😐", "😋", "😃", "😴", "😫"]
texts.extend(additional_texts)
emojis.extend(additional_emojis)

# Encode emojis into numerical labels
label_map = {emoji: i for i, emoji in enumerate(set(emojis))}
labels = [label_map[emoji] for emoji in emojis]

# 3. Feature Extraction
# Train Word2Vec embeddings on the text data
word2vec_model = Word2Vec([text.split() for text in texts], vector_size=100, window=5, min_count=1, workers=4)

# Convert text data into Word2Vec embeddings
X = np.array([np.mean([word2vec_model.wv[word] for word in text.split() if word in word2vec_model.wv] or [np.zeros(100)], axis=0) for text in texts])

# 4. Model Selection
# Choose machine learning models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
voting_clf = VotingClassifier(estimators=[('rf', rf_model)], voting='hard')

# 5. Training
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Train the models
rf_model.fit(X_train, y_train)
voting_clf.fit(X_train, y_train)

# 6. Evaluation
# Evaluate model performance on the test set
rf_accuracy = accuracy_score(y_test, rf_model.predict(X_test))
voting_accuracy = accuracy_score(y_test, voting_clf.predict(X_test))
print("Random Forest Accuracy:", rf_accuracy)
print("Voting Classifier Accuracy:", voting_accuracy)

# 7. Inference
# Use the trained models to predict emojis for new text inputs
new_text = ["I'm feeling bored"]
new_text_embedding = np.mean([word2vec_model.wv[word] for word in new_text[0].split() if word in word2vec_model.wv] or [np.zeros(100)], axis=0).reshape(1, -1)
predicted_label_rf = rf_model.predict(new_text_embedding)[0]
predicted_label_voting = voting_clf.predict(new_text_embedding)[0]
predicted_emoji_rf = [emoji for emoji, label in label_map.items() if label == predicted_label_rf][0]
predicted_emoji_voting = [emoji for emoji, label in label_map.items() if label == predicted_label_voting][0]
print("Predicted Emoji (Random Forest):", predicted_emoji_rf)
print("Predicted Emoji (Voting Classifier):", predicted_emoji_voting)

Random Forest Accuracy: 0.0
Voting Classifier Accuracy: 0.0
Predicted Emoji (Random Forest): 😔
Predicted Emoji (Voting Classifier): 😔


In [60]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample dataset (text, emoji)
data = [("love", "❤️"),
        ("sad", "😢"),
        ("happy","😀"),
        ("Dead","💀"),
        ("Side Eye", "👀"),
        ("laughing","😂"),
        ("sleepy", "😴")

        # Add more samples
       ]

# Preprocess data
texts, labels = zip(*data)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
vocab_size = len(tokenizer.word_index) + 1
encoded_texts = tokenizer.texts_to_sequences(texts)
max_length = max(len(text.split()) for text in texts)
padded_texts = pad_sequences(encoded_texts, maxlen=max_length, padding='post')

# Convert emojis to one-hot encoding
label_dict = {label: i for i, label in enumerate(set(labels))}
num_classes = len(label_dict)
labels = [label_dict[label] for label in labels]

# Define model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_length=max_length),
    LSTM(64),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(padded_texts, np.array(labels), epochs=10, batch_size=1)

# Use the trained model to predict emoji for new text
new_text = "I'm happy"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')


# Use the trained model to predict emoji for new text
new_text = "I'm happy"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Predicted Emoji: 😀


In [62]:
import numpy as np

# Use the trained model to predict emoji for new text
new_text = "I'm happy"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

new_text = "I'm dead"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

new_text = "I'm in love"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

new_text = "I'm sad"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

new_text = "I gave her the biggest Side Eye"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)
new_text = "She was like she was in love but I looked at her like she was crazy because i know fs she was never in love"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

new_text = "Alveera is crazy"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

new_text = "Alveera is side eye"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

new_text = "My groups were so fucking useless, im so sad"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

new_text = "im sad, i didnt get enough sleep"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

new_text = "Laughing my ass off"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

new_text = "i just want to sleep, im sad"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

Predicted Emoji: 😀
Predicted Emoji: 💀
Predicted Emoji: ❤️
Predicted Emoji: 😢
Predicted Emoji: 👀
Predicted Emoji: ❤️
Predicted Emoji: ❤️
Predicted Emoji: 👀
Predicted Emoji: 😢
Predicted Emoji: 😢
Predicted Emoji: 😂
Predicted Emoji: 😢


In [63]:
import numpy as np

# Use the trained model to predict emoji for new text
new_text = "I'm happy"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

Predicted Emoji: 😀


In [64]:
import numpy as np

# Use the trained model to predict emoji for new text
new_text = "im so in love with them"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

Predicted Emoji: ❤️


In [65]:
import numpy as np

# Use the trained model to predict emoji for new text
new_text = "im gonna be DEAD if i show my parents my grades"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

Predicted Emoji: 💀


In [69]:
import numpy as np

# Use the trained model to predict emoji for new text
new_text = "he literally said that he would be with me forever but he gave up on me, and lowkey he was a jerk and I fell for it. It's lowkey rlly sad, like I did so much and I got nothing in return"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

Predicted Emoji: 😢


In [70]:
import numpy as np

# Use the trained model to predict emoji for new text
new_text = "I am lowkey so tired and sleepy, I just want to sleep in class and I dont know why i do it. Like i want to listen to the professor but I just cant stay awake."
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

Predicted Emoji: 😴


In [71]:
import numpy as np

# Use the trained model to predict emoji for new text
new_text = "I am in love with my guy best friend but I dont know how to say that I love him so much, hes so cute and sweet. Hes just so LOVEABLE"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

Predicted Emoji: ❤️


In [None]:
import numpy as np

# Use the trained model to predict emoji for new text
new_text = "I am so hap"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

In [72]:
import numpy as np

# Use the trained model to predict emoji for new text
new_text = "I am so happy that my team won the Hackathon"
encoded_new_text = tokenizer.texts_to_sequences([new_text])
padded_new_text = pad_sequences(encoded_new_text, maxlen=max_length, padding='post')
predicted_probabilities = model.predict(padded_new_text)
predicted_label = np.argmax(predicted_probabilities)
predicted_emoji = [emoji for emoji, idx in label_dict.items() if idx == predicted_label][0]

print("Predicted Emoji:", predicted_emoji)

Predicted Emoji: 😀
