#### This code sets up the NLTK library for text preprocessing by downloading essential resources like stopwords (common words to remove), WordNet (for lemmatization), and tokenization data. These tools are used for cleaning and preparing text in NLP tasks.

In [12]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re

nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt_tab')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

#### This code defines a function to preprocess text for natural language processing (NLP). It converts text to lowercase, removes URLs, mentions, and non-alphanumeric characters, tokenizes the text into words, and lemmatizes each word while filtering out stopwords. The processed words are then joined back into a cleaned text string.

In [2]:
def preprocess_text(text):
  text=text.lower()
  text=re.sub(r'http\S+','',text)
  text=re.sub(r'@[\w_]+','',text)
  text=re.sub(r'[^\W\s]','',text)
  words=nltk.word_tokenize(text)

  lemmatizer=WordNetLemmatizer()
  words=[lemmatizer.lemmatize(word) for word in words if word not in stopwords.words('english')]

  return ' '.join(words)

#### This code identifies emotions in a given text using a pretrained RoBERTa-based model for emotion classification. It preprocesses the input text, uses a Hugging Face pipeline to classify emotions, and then prints the predicted emotions and their associated confidence scores.

In [14]:
from transformers import pipeline

def identify_emotion(text):
  sample=preprocess_text(text)
  model=pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
  model_output = model(sample)
  output=model_output[0]
  print("------------------Emotions-------------------")
  for i in range(len(output)):
    print(output[i].values())


#### Necessary Import for subsequent tasks

In [4]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax

In [5]:
tokenizer=AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
model=AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")

config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

#### This code performs sentiment and emotion analysis on a given text:

1. Sentiment Analysis (identify_sentiments): The text is preprocessed, tokenized, passed through a sentiment analysis model, and the output logits are converted to probabilities using softmax. It prints the probabilities for negative, neutral, and positive sentiments.

2. Emotion Analysis (get_emotion_analysis): Calls identify_sentiments to analyze sentiment, then calls identify_emotion (from the earlier function) to analyze emotions, printing both results.

In [8]:
def identify_sentiments(text):
  sample=preprocess_text(text)
  encoded_text=tokenizer(example,return_tensors='pt')
  output=model(**encoded_text)
  scores=output[0][0].detach().numpy()
  scores=softmax(scores)
  scores_dict={
      'roberta_negative': scores[0],
      'roberta_neutral': scores[1],
      'roberta_positive': scores[2],
  }
  for k, v in scores_dict.items():
    print(k, " : ", v)

def get_emotion_analysis(text):
  print("---------------------Sentiments------------------------")
  print(identify_sentiments(text))
  identify_emotion(text)

#### Execution -

In [7]:
example="""Today was a rollercoaster of emotions. The morning started beautifully with a stunning sunrise and a warm cup of coffee, which filled me with gratitude. However, the traffic on the way to work was horrendous,
leaving me frustrated and irritable. At work, I received praise for completing a challenging project, which boosted my confidence. But later, a technical glitch caused a major delay,
adding stress to my already busy day. By evening, I was relieved to unwind with a good book, though a lingering headache made it hard to focus.
Overall, it was a day of highs and lows, leaving me reflective about its unpredictability.
"""

In [15]:
response=get_emotion_analysis(example)

---------------------Sentiments------------------------
roberta_negative  :  0.23044232
roberta_neutral  :  0.32945415
roberta_positive  :  0.4401035
None
------------------Emotions-------------------
dict_values(['neutral', 0.957525908946991])
dict_values(['approval', 0.009658534079790115])
dict_values(['realization', 0.008134313859045506])
dict_values(['annoyance', 0.007282745558768511])
dict_values(['confusion', 0.0047265090979635715])
dict_values(['amusement', 0.0045343018136918545])
dict_values(['anger', 0.004120558500289917])
dict_values(['excitement', 0.004073311109095812])
dict_values(['joy', 0.00301584554836154])
dict_values(['admiration', 0.0028949743136763573])
dict_values(['disapproval', 0.0028843169566243887])
dict_values(['surprise', 0.00280808936804533])
dict_values(['disappointment', 0.0027993875555694103])
dict_values(['curiosity', 0.0026464310940355062])
dict_values(['fear', 0.0025521430652588606])
dict_values(['disgust', 0.0025338868144899607])
dict_values(['sadness'