# Emoji based Sentiment Analysis


In [1]:
import pandas as pd
import numpy as np

# Preprocess Data

### Emoji Dataset Preprocessing

In [2]:
# prompt: mount google drice

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
# Setup the data for emoji
df_emoji = pd.read_csv("/content/drive/MyDrive/Sentiment Analysis/dataset/Emoji_Sentiment_Data.csv",
                       usecols = ['Emoji', 'Negative', 'Neutral', 'Positive'])
df_emoji

Unnamed: 0,Emoji,Negative,Neutral,Positive
0,😂,3614,4163,6845
1,❤,355,1334,6361
2,♥,252,1942,4950
3,😍,329,1390,4640
4,😭,2412,1218,1896
...,...,...,...,...
964,➛,0,1,0
965,♝,0,1,0
966,❋,0,1,0
967,✆,0,1,0


In [4]:
df_emoji.Emoji.values

array(['😂', '❤', '♥', '😍', '😭', '😘', '😊', '👌', '💕', '👏', '😁', '☺', '♡',
       '👍', '😩', '🙏', '✌', '😏', '😉', '🙌', '🙈', '💪', '😄', '😒', '💃', '💖',
       '😃', '😔', '😱', '🎉', '😜', '☯', '🌸', '💜', '💙', '✨', '😳', '💗', '★',
       '█', '☀', '😡', '😎', '😢', '💋', '😋', '🙊', '😴', '🎶', '💞', '😌', '🔥',
       '💯', '🔫', '💛', '💁', '💚', '♫', '😞', '😆', '😝', '😪', '�', '😫', '😅',
       '👊', '💀', '😀', '😚', '😻', '©', '👀', '💘', '🐓', '☕', '👋', '✋', '🎊',
       '🍕', '❄', '😥', '😕', '💥', '💔', '😤', '😈', '►', '✈', '🔝', '😰', '⚽',
       '😑', '👑', '😹', '👉', '🍃', '🎁', '😠', '🐧', '☆', '🍀', '🎈', '🎅', '😓',
       '😣', '😐', '✊', '😨', '😖', '💤', '💓', '👎', '💦', '✔', '😷', '⚡', '🙋',
       '🎄', '💩', '🎵', '➡', '😛', '😬', '👯', '💎', '🌿', '🎂', '🌟', '🔮', '❗',
       '👫', '🏆', '✖', '☝', '😙', '⛄', '👅', '♪', '🍂', '💏', '🔪', '🌴', '👈',
       '🌹', '🙆', '➜', '👻', '💰', '🍻', '🙅', '🌞', '🍁', '⭐', '▪', '🎀', '━',
       '☷', '🐷', '🙉', '🌺', '💅', '🐶', '🌚', '👽', '🎤', '👭', '🎧', '👆', '🍸',
       '🍷', '®', '🍉', '😇', '☑', '🏃', '😿', '│', '💣', '🍺', '▶', '😲

### Set to Binary Polarity and Normalize to 0 and 1

In [5]:
# compare the polarity of the dataset and turn the polarity to binary
# 0 = negative, 1= positive
polarity_ls = []
for index, row in df_emoji.iterrows():

    # polarity == sentiment
    # initial polarity is negative
    polarity = 0

    # positive if positive value is greater than negative value
    arg_1 = row['Positive'] > row['Negative']

    # positive if neutral value is odd and positive and negative value are equal
    arg_2 = row['Positive'] == row['Negative'] and row['Neutral'] % 2 != 0

    # positive if either of the two arguments are true
    if arg_1 or arg_2:
        polarity = 1
    polarity_ls.append(polarity)

# create new emoji dataset
new_df_emoji = pd.DataFrame(polarity_ls, columns=['sentiment'])
new_df_emoji['emoji'] = df_emoji['Emoji'].values
new_df_emoji

Unnamed: 0,sentiment,emoji
0,1,😂
1,1,❤
2,1,♥
3,1,😍
4,0,😭
...,...,...
964,1,➛
965,1,♝
966,1,❋
967,1,✆


### Tweet Posts Dataset Prerprocessing

A <b>10k size dataset is provided</b> in the folder but you can download a 1.6m data online.

To download the 1.6m tweet dataset (optional)
https://www.kaggle.com/kazanova/sentiment140


In [6]:
df_posts = pd.read_csv("/content/drive/MyDrive/Sentiment Analysis/dataset/processed_tweet_dataset.csv")
df_posts = df_posts.drop([df_posts.columns[0]], axis=1)
df_posts

Unnamed: 0,sentiment,post
0,0,is upset that he can't update his Facebook by ...
1,0,I dived many times for the ball. Managed to s...
2,0,my whole body feels itchy and like its on fire
3,0,"no, it's not behaving at all. i'm mad. why am..."
4,0,not the whole crew
...,...,...
1558667,1,Just woke up. Having no school is the best fee...
1558668,1,TheWDB.com - Very cool to hear old Walt interv...
1558669,1,Are you ready for your MoJo Makeover? Ask me f...
1558670,1,Happy 38th Birthday to my boo of alll time!!! ...


# Classification using Naive Bayes

Naive Bayes is a simple technique for constructing classifiers: models that assign class labels to problem instances, represented as vectors of feature values, where the class labels are drawn from some finite set.

In [7]:
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn import naive_bayes
from sklearn.metrics import roc_auc_score

### tf–idf or TFIDF

short for <b>term frequency–inverse document frequency</b>, is a numerical statistic that is intended to reflect <b>how important a word is</b> to a document in a collection or corpus.

In [8]:
!pip install nltk
import nltk
nltk.download('stopwords')



[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [9]:
# TFIDF vectorizer
stopset = set(stopwords.words('english'))
vectorizer = TfidfVectorizer(use_idf=True, lowercase=True,
                            strip_accents='ascii', stop_words=stopset)

In [10]:
# print out the emoticons and sentiment values
e_c, p = 0, 0
for index, row in new_df_emoji.iterrows():
    print(f"{row['emoji']} = {row['sentiment']}")
    p += 1 if row['sentiment'] else 0
    e_c += 1

😂 = 1
❤ = 1
♥ = 1
😍 = 1
😭 = 0
😘 = 1
😊 = 1
👌 = 1
💕 = 1
👏 = 1
😁 = 1
☺ = 1
♡ = 1
👍 = 1
😩 = 0
🙏 = 1
✌ = 1
😏 = 1
😉 = 1
🙌 = 1
🙈 = 1
💪 = 1
😄 = 1
😒 = 0
💃 = 1
💖 = 1
😃 = 1
😔 = 0
😱 = 1
🎉 = 1
😜 = 1
☯ = 1
🌸 = 1
💜 = 1
💙 = 1
✨ = 1
😳 = 1
💗 = 1
★ = 1
█ = 0
☀ = 1
😡 = 0
😎 = 1
😢 = 1
💋 = 1
😋 = 1
🙊 = 1
😴 = 0
🎶 = 1
💞 = 1
😌 = 1
🔥 = 1
💯 = 1
🔫 = 0
💛 = 1
💁 = 1
💚 = 1
♫ = 1
😞 = 0
😆 = 1
😝 = 1
😪 = 0
� = 1
😫 = 0
😅 = 1
👊 = 1
💀 = 0
😀 = 1
😚 = 1
😻 = 1
© = 1
👀 = 1
💘 = 1
🐓 = 1
☕ = 1
👋 = 1
✋ = 1
🎊 = 1
🍕 = 1
❄ = 1
😥 = 1
😕 = 0
💥 = 1
💔 = 0
😤 = 0
😈 = 1
► = 1
✈ = 1
🔝 = 1
😰 = 0
⚽ = 1
😑 = 0
👑 = 1
😹 = 1
👉 = 1
🍃 = 1
🎁 = 1
😠 = 0
🐧 = 1
☆ = 1
🍀 = 1
🎈 = 1
🎅 = 1
😓 = 0
😣 = 0
😐 = 0
✊ = 1
😨 = 0
😖 = 0
💤 = 1
💓 = 1
👎 = 0
💦 = 1
✔ = 1
😷 = 0
⚡ = 1
🙋 = 1
🎄 = 1
💩 = 0
🎵 = 1
➡ = 1
😛 = 1
😬 = 1
👯 = 1
💎 = 1
🌿 = 1
🎂 = 1
🌟 = 1
🔮 = 1
❗ = 1
👫 = 1
🏆 = 1
✖ = 1
☝ = 1
😙 = 1
⛄ = 1
👅 = 1
♪ = 1
🍂 = 1
💏 = 1
🔪 = 1
🌴 = 1
👈 = 1
🌹 = 1
🙆 = 1
➜ = 1
👻 = 1
💰 = 1
🍻 = 1
🙅 = 0
🌞 = 1
🍁 = 1
⭐ = 1
▪ = 1
🎀 = 1
━ = 1
☷ = 1
🐷 = 1
🙉 = 1
🌺 = 1
💅 = 1
🐶 = 1
🌚 = 1
👽 = 1
🎤 = 1
👭 = 1
🎧 = 

In [11]:
print(f'Total Positive Emojis are ({p}:{e_c}) or {round(p / e_c * 100)}%')

Total Positive Emojis are (795:969) or 82%


In [12]:
new_df_post = df_posts

In [13]:
# dependent variable will be linked as:
# 0 = negative, 1 = positive
y = new_df_post.sentiment
vectorizer = TfidfVectorizer(stop_words='english')
# convert 'sentence' from text to features
X = vectorizer.fit_transform(new_df_post.post)

print(y.shape)
print(X.shape)
print(f'{X.shape[0]} observations X {X.shape[1]} unique words')


(1558672,)
(1558672, 287659)
1558672 observations X 287659 unique words


### Training


In [14]:
# Test Train Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,random_state=None)

# we will train a naive bayes classifier
clf = naive_bayes.MultinomialNB()
# clf = naive_bayes.BernoulliNB()

clf.fit(X_train, y_train)

# test our models accuracy
roc_auc_score(y_test, clf.predict_proba(X_test)[:,1])


0.8388114775549679

### Processing the inputs - Extraction of emoji and texts

In [15]:
!pip install emoji

Collecting emoji
  Downloading emoji-2.12.1-py3-none-any.whl (431 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/431.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.4/431.4 kB[0m [31m4.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m431.4/431.4 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: emoji
Successfully installed emoji-2.12.1


In [16]:
import emoji

text = "#samplesenti @emojitweets i ❤❤❤ sentiment &quot; analysis &quot; http://senti.com/pic_01.jpg "
def extract_text_and_emoji(text=text):
    global allchars, emoji_list

    # remove all tagging and links, not needed for sentiments
    remove_keys = ('@', 'http://', '&', '#')
    clean_text = ' '.join(txt for txt in text.split() if not txt.startswith(remove_keys))

    # setup the input, get the characters and the emoji lists
    allchars = [c for c in clean_text]
    emoji_list = [c for c in allchars if emoji.is_emoji(c)]

    # extract text
    clean_text = ' '.join([str for str in clean_text.split() if not any(i in str for i in emoji_list)])

    # extract emoji
    clean_emoji = ''.join([str for str in text.split() if any(i in str for i in emoji_list)])
    return (clean_text, clean_emoji)

### Get the sentiments of the processed posts

In [17]:
def get_sentiment(s_input = 'i sentiment analysis'):
    # turn input into array
    input_array= np.array([s_input])
    # vectorize the input
    input_vector = vectorizer.transform(input_array)
    # predict the score of vector
    pred_senti = clf.predict(input_vector)

    return pred_senti[0]
print(get_sentiment())

1


In [18]:
def get_emoji_sentiment(emoji_ls = '❤❤❤', emoji_df = new_df_emoji):
    emoji_val_ls = []
    for e in emoji_ls:
        get_emo_senti = [row['sentiment'] for index, row in emoji_df.iterrows() if row['emoji'] == e]
        emoji_val_ls.append(get_emo_senti[0])
    return emoji_val_ls

ges = get_emoji_sentiment()
print('Sentiment value of each emoji:',ges)

Sentiment value of each emoji: [1, 1, 1]


#extra


In [19]:
def get_emoji_sentiment(emoji_ls, emoji_df):
    emoji_val_ls = []
    for e in emoji_ls:
        get_emo_senti = [row['sentiment'] for index, row in emoji_df.iterrows() if row['emoji'] == e]
        if get_emo_senti:
            emoji_val_ls.append(get_emo_senti[0])
        else:
            emoji_val_ls.append(None)  # Handle the case when sentiment is not found for an emoji
    return emoji_val_ls

### Building the sentiment analysis

In [20]:
def get_text_emoji_sentiment(input_test = 'i ❤❤❤ sentiment analysis'):
    # separate text and emoji
    (ext_text, ext_emoji) = extract_text_and_emoji(input_test)
    print(f'\tExtracted: "{ext_text}" , {ext_emoji}')

    # get text sentiment
    senti_text = get_sentiment(ext_text)
    print(f'\tText value: {senti_text}')

    # get emoji sentiment
    senti_emoji_value = sum(get_emoji_sentiment(ext_emoji, new_df_emoji))
    print_emo_val_avg = 0 if len(ext_emoji) == 0 else senti_emoji_value/len(ext_emoji)
    print(f'\tEmoji average value: {print_emo_val_avg}')

    # avg the sentiment of emojis and text
    senti_avg = (senti_emoji_value + senti_text) / (len(ext_emoji) + 1)
    print(f'\tAverage value: {senti_avg}')

    # set value of avg sentiment to either pos or neg
    senti_truth = "Positive" if senti_avg >= 0.5 else "Negative"

    return senti_truth

print(get_text_emoji_sentiment())

	Extracted: "i sentiment analysis" , ❤❤❤
	Text value: 1
	Emoji average value: 1.0
	Average value: 1.0
Positive


In [21]:
import re

def clean_comment(comment):
    # Remove commas
    comment = comment.replace(",", "")

    # Remove non-text characters
    comment = re.sub(r"[^a-zA-Z0-9\s]", "", comment)

    # Remove links
    comment = re.sub(r"http\S+|www\S+|https\S+", "", comment)

    return comment.strip()

def get_text_emoji_sentiment(input_test='i ❤❤❤ sentiment analysis'):
    # Clean the comment
    cleaned_comment = clean_comment(input_test)

    # separate text and emoji
    (ext_text, ext_emoji) = extract_text_and_emoji(cleaned_comment)
    print(f'\tExtracted: "{ext_text}", {ext_emoji}')

    # get text sentiment
    senti_text = get_sentiment(ext_text)
    print(f'\tText value: {senti_text}')

    # get emoji sentiment
    senti_emoji_value = sum(get_emoji_sentiment(ext_emoji, new_df_emoji))
    print_emo_val_avg = 0 if len(ext_emoji) == 0 else senti_emoji_value / len(ext_emoji)
    print(f'\tEmoji average value: {print_emo_val_avg}')

    # avg the sentiment of emojis and text
    senti_avg = (senti_emoji_value + senti_text) / (len(ext_emoji) + 1)
    print(f'\tAverage value: {senti_avg}')

    # set value of avg sentiment to either pos, neg, or neu
    if senti_avg >= 0.5:
        senti_truth = "Positive"
    elif senti_avg < 0.5:
        senti_truth = "Negative"
    else:
        senti_truth = "Neutral"

    return senti_truth

# Example usage
sentiment = get_text_emoji_sentiment()
print(f"Sentiment: {sentiment}")

# Calculate percentages
num_positive = 0
num_negative = 0
num_neutral = 0
num_total = 0

# Iterate over a list of comments and calculate sentiment percentages
comments = ["I love it!", "It's terrible.", "Not sure.", "Great job!", "This is a good, comment.", "Check out this link: https://example.com"]
for comment in comments:
    cleaned_comment = clean_comment(comment)
    if not cleaned_comment:
        continue

    sentiment = get_text_emoji_sentiment(cleaned_comment)
    if sentiment == "Positive":
        num_positive += 1
    elif sentiment == "Negative":
        num_negative += 1
    else:
        num_neutral += 1
    num_total += 1

percentage_positive = (num_positive / num_total) * 100
percentage_negative = (num_negative / num_total) * 100
percentage_neutral = (num_neutral / num_total) * 100

print(f"Percentage of positive comments: {percentage_positive}%")
print(f"Percentage of negative comments: {percentage_negative}%")
print(f"Percentage of neutral comments: {percentage_neutral}%")

	Extracted: "i sentiment analysis", 
	Text value: 1
	Emoji average value: 0
	Average value: 1.0
Sentiment: Positive
	Extracted: "I love it", 
	Text value: 1
	Emoji average value: 0
	Average value: 1.0
	Extracted: "Its terrible", 
	Text value: 0
	Emoji average value: 0
	Average value: 0.0
	Extracted: "Not sure", 
	Text value: 1
	Emoji average value: 0
	Average value: 1.0
	Extracted: "Great job", 
	Text value: 1
	Emoji average value: 0
	Average value: 1.0
	Extracted: "This is a good comment", 
	Text value: 1
	Emoji average value: 0
	Average value: 1.0
	Extracted: "Check out this link", 
	Text value: 1
	Emoji average value: 0
	Average value: 1.0
Percentage of positive comments: 83.33333333333334%
Percentage of negative comments: 16.666666666666664%
Percentage of neutral comments: 0.0%


### Print the tweets with emoji

In [22]:
def print_senti_status(test):
    print('========================================')
    print(f'Your input is "{test}" \n')
    sentiment = get_text_emoji_sentiment(test)
    print(f'\nYour input is of "{sentiment}" sentiment'.upper())
    print('========================================')

def calculate_sentiment_percentages(comments):
    num_positive = 0
    num_negative = 0
    num_neutral = 0
    num_total = 0

    for comment in comments:
        cleaned_comment = clean_comment(comment)
        if not cleaned_comment:
            continue

        sentiment = get_text_emoji_sentiment(cleaned_comment)
        if sentiment == "Positive":
            num_positive += 1
        elif sentiment == "Negative":
            num_negative += 1
        else:
            num_neutral += 1
        num_total += 1

    percentage_positive = (num_positive / num_total) * 100
    percentage_negative = (num_negative / num_total) * 100
    percentage_neutral = (num_neutral / num_total) * 100

    return percentage_positive, percentage_negative, percentage_neutral

# Take user input
user_input = input("Enter your comment: ")

# Check sentiment of user input
print_senti_status(user_input)

# Calculate sentiment percentages for user input
percentage_positive, percentage_negative, percentage_neutral = calculate_sentiment_percentages([user_input])

print(f"\nPercentage of positive comments: {percentage_positive}%")
print(f"Percentage of negative comments: {percentage_negative}%")
print(f"Percentage of neutral comments: {percentage_neutral}%")

Enter your comment: I am happy
Your input is "I am happy" 

	Extracted: "I am happy", 
	Text value: 1
	Emoji average value: 0
	Average value: 1.0

YOUR INPUT IS OF "POSITIVE" SENTIMENT
	Extracted: "I am happy", 
	Text value: 1
	Emoji average value: 0
	Average value: 1.0

Percentage of positive comments: 100.0%
Percentage of negative comments: 0.0%
Percentage of neutral comments: 0.0%


## Tweet Something

In [23]:
import ipywidgets as widgets
import warnings; warnings.simplefilter('ignore')

In [24]:
# for text area
l = widgets.Layout(flex='0 1 auto', height='50px',width='auto')
post_tweet = widgets.Textarea(value='🎶 Tweet 🐤 your feelings 😲 🎶', layout=l)
print(post_tweet.value)
# for button
button = widgets.Button(description="Say your Sentiments!")
output = widgets.Output()

def on_tweet_clicked(b):
    output.clear_output()
    with output:
        output.layout={'border': '1px solid black'}
        print_senti_status(post_tweet.value)


🎶 Tweet 🐤 your feelings 😲 🎶


In [25]:
display(post_tweet,button, output)
button.on_click(on_tweet_clicked)

Textarea(value='🎶 Tweet 🐤 your feelings 😲 🎶', layout=Layout(flex='0 1 auto', height='50px', width='auto'))

Button(description='Say your Sentiments!', style=ButtonStyle())

Output()

### Conclusion
This is method 1. Where the training of the tweets are sepparate from the emoticons. the emoticons are assigned with their own sentiment polarity. then to analyze the sentiment of the tweet, we combine and average the sentiment value of both the emoticons and texts. this method will have a strong influence emoticon with a non-changing polarity value.

In [26]:
emoticon_mapping = {
    ":)": 1,  # positive
    ":(": -1,  # negative
    ":|": 0,  # neutral
    # Add more emoticons and their sentiment scores as needed
}

In [27]:
def calculate_emoticon_sentiment(text):
    emoticon_sentiment = 0
    for emoticon, sentiment in emoticon_mapping.items():
        if emoticon in text:
            emoticon_sentiment += sentiment
    return emoticon_sentiment

In [28]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

def analyze_sentiment(text):
    sia = SentimentIntensityAnalyzer()
    sentiment_score = sia.polarity_scores(text)
    return sentiment_score['pos'], sentiment_score['neg']

In [29]:
def get_overall_sentiment(emoticon_sentiment, sentiment_score):
    overall_sentiment = emoticon_sentiment + sentiment_score
    return overall_sentiment

In [30]:
def calculate_percentage(overall_sentiment):
    positive_percentage = max(0, overall_sentiment) * 100
    negative_percentage = abs(min(0, overall_sentiment)) * 100
    return positive_percentage, negative_percentage

In [38]:
from textblob import TextBlob

# Prompt the user for a comment
text = input("Enter your comment: ")

# Perform sentiment analysis
blob = TextBlob(text)
sentiment = blob.sentiment.polarity

# Calculate the percentage of positive and negative sentiment
positive_percentage = (sentiment + 1) * 50
negative_percentage = (1 - sentiment) * 50

# Print results
print("Positive percentage: {:.1f}%".format(positive_percentage))
print("Negative percentage: {:.1f}%".format(negative_percentage))

Enter your comment: 😟 🙁 ☹️
Positive percentage: 50.0%
Negative percentage: 50.0%


In [32]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [39]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Prompt the user for a comment
text = input("Enter your comment: ")

# Perform sentiment analysis using VADER
analyzer = SentimentIntensityAnalyzer()
sentiment_scores = analyzer.polarity_scores(text)

# Get the positive, negative, and neutral scores
positive_score = sentiment_scores['pos']
negative_score = sentiment_scores['neg']
neutral_score = sentiment_scores['neu']

# Calculate the percentage of positive, negative, and neutral sentiment
total_score = positive_score + negative_score + neutral_score
positive_percentage = (positive_score / total_score) * 100
negative_percentage = (negative_score / total_score) * 100
neutral_percentage = (neutral_score / total_score) * 100

# Print results
print("Positive percentage: {:.1f}%".format(positive_percentage))
print("Negative percentage: {:.1f}%".format(negative_percentage))
print("Neutral percentage: {:.1f}%".format(neutral_percentage))

Enter your comment: 😟 🙁 ☹️
Positive percentage: 0.0%
Negative percentage: 61.7%
Neutral percentage: 38.3%


In [34]:
!pip install vaderSentiment



In [35]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Prompt the user for a comment
text = input("Enter your comment: ")

# Perform sentiment analysis using VADER
sid = SentimentIntensityAnalyzer()
sentiment_scores = sid.polarity_scores(text)

# Get the sentiment label and score
sentiment_label = "Positive" if sentiment_scores["compound"] >= 0 else "Negative"
sentiment_score = abs(sentiment_scores["compound"])

# Calculate the percentage of positive and negative sentiment
positive_percentage = sentiment_score * 100
negative_percentage = (1 - sentiment_score) * 100

# Print results
print("Sentiment: ", sentiment_label)
print("Positive percentage: {:.1f}%".format(positive_percentage))
print("Negative percentage: {:.1f}%".format(negative_percentage))

Enter your comment: 😟 🙁 ☹️ I am happy.
Sentiment:  Negative
Positive percentage: 18.9%
Negative percentage: 81.2%


In [36]:
!pip install flair

Collecting flair
  Downloading flair-0.13.1-py3-none-any.whl (388 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m388.3/388.3 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting boto3>=1.20.27 (from flair)
  Downloading boto3-1.34.111-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bpemb>=0.3.2 (from flair)
  Downloading bpemb-0.3.5-py3-none-any.whl (19 kB)
Collecting conllu>=4.0 (from flair)
  Downloading conllu-4.5.3-py2.py3-none-any.whl (16 kB)
Collecting deprecated>=1.2.13 (from flair)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Collecting ftfy>=6.1.0 (from flair)
  Downloading ftfy-6.2.0-py3-none-any.whl (54 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.4/54.4 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Collecting janome>=0.4.2 (from flair)
  Downloading Janome-0.5.0-py2.py3-none-any.whl (19

In [37]:
from flair.models import TextClassifier
from flair.data import Sentence

# Prompt the user for a comment
text = input("Enter your comment: ")

# Load the sentiment analysis model
model = TextClassifier.load("en-sentiment")

# Create a Sentence object
sentence = Sentence(text)

# Predict the sentiment
model.predict(sentence)

# Get the sentiment label and score
sentiment_label = sentence.labels[0].value
sentiment_score = sentence.labels[0].score

# Calculate the percentage of positive and negative sentiment
positive_percentage = sentiment_score * 100
negative_percentage = (1 - sentiment_score) * 100

# Print results
print("Sentiment: ", sentiment_label)
print("Positive percentage: {:.1f}%".format(positive_percentage))
print("Negative percentage: {:.1f}%".format(negative_percentage))

Enter your comment: 😟 🙁 ☹️I am happy
2024-05-23 09:31:20,258 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /tmp/tmpzcc5y_mj


100%|██████████| 253M/253M [00:27<00:00, 9.64MB/s]

2024-05-23 09:31:48,823 copying /tmp/tmpzcc5y_mj to cache at /root/.flair/models/sentiment-en-mix-distillbert_4.pt





2024-05-23 09:31:49,347 removing temp file /tmp/tmpzcc5y_mj


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Sentiment:  POSITIVE
Positive percentage: 99.2%
Negative percentage: 0.8%
