Everytime you run a program on colab you need to install the package because colab doesn't give you to install a custom kernel.

In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 6.4 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 51.1 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 46.4 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [2]:
import pandas as pd
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax
import csv
import urllib.request

# Preprocess text (username and link placeholders)
def preprocess(text):
    new_text = []

    for t in text.split(" "):
        
        t = '@user' if t.startswith('@') and len(t) > 1 else t   # It will remove everything after @ and replace it with @user
        new_text.append(t)
        
    return " ".join(new_text)

# Tasks:
# emoji, emotion, hate, irony, offensive, sentiment
# stance/abortion, stance/atheism, stance/climate, stance/feminist, stance/hillary

# Using this pretrained model for sentiments
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"

tokenizer = AutoTokenizer.from_pretrained(MODEL)

# download label mapping
labels=[]
#this txt file contains 3 rows 0 negative, 1 neutral and 2 positive
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt"
with urllib.request.urlopen(mapping_link) as f:
    html = f.read().decode('utf-8').split("\n")
    csvreader = csv.reader(html, delimiter='\t')

# this variable "labels" contains negative, neutral and positive
labels = [row[1] for row in csvreader if len(row) > 1]
print(labels)
# PT
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
# model.save_pretrained(MODEL)

text = "Hailie, I know you miss your mom, and I know you miss your dad " + \
         "When I'm gone, but I'm trying to give you the life that I never had " + \
         "I can see you're sad, even when you smile, even when you laugh " + \
         "I can see it in your eyes, deep inside you want to cry " + \
         "'Cause you're scared, I ain't there, daddy's with you in your prayers " + \
         "No more crying, wipe them tears, daddy's here, no more nightmares " + \
         "We gon' pull together through it, we gon' do it " + \
         "Laney uncle's crazy, ain't he? Yeah, but he loves you girl and you better know it " + \
         "We're all we got in this world, when it spins, when it swirls " + \
         "When it whirls, when it twirls, two little beautiful girls"

text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

# It gives output in negative, neutral & positive sequence
scores = softmax(output[0][0].detach().numpy())

# It is used to sort the index in ascending order based on their values 
ranking = np.argsort(scores)[::-1]
for i in range(scores.shape[0]):
    l = labels[ranking[i]]
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")

Downloading:   0%|          | 0.00/747 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

['negative', 'neutral', 'positive']


Downloading:   0%|          | 0.00/499M [00:00<?, ?B/s]

1) positive 0.6113
2) neutral 0.332
3) negative 0.0567


In [6]:
!pip install timer

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timer
  Downloading timer-0.2.2-py3-none-any.whl (7.8 kB)
Installing collected packages: timer
Successfully installed timer-0.2.2


In [32]:
import pandas
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax
import csv
import urllib.request
from timeit import default_timer as timer

class speech_to_text:

  def __init__(self):
      pass

  def preprocess(self, text):
      """
      This function uses a pre trained model from hugging face to convert text into sentiments.
      This model was trained on millions of tweets.

      """

      # Using this pretrained model for sentiments
      MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"

      tokenizer = AutoTokenizer.from_pretrained(MODEL)
      # PT
      model = AutoModelForSequenceClassification.from_pretrained(MODEL)

      labels = ['negative', 'neutral', 'positive']

      new_text = []

      for t in text.split(" "):
          
          t = '@user' if t.startswith('@') and len(t) > 1 else t   # It will remove everything after @ and replace it with @user
          new_text.append(t)
      
      text = " ".join(new_text)
      encoded_input = tokenizer(text, return_tensors='pt')
      output = model(**encoded_input)

      # It gives output in negative, neutral & positive sequence
      scores = softmax(output[0][0].detach().numpy())

      # It is used to sort the index in ascending order based on their values 
      ranking = np.argsort(scores)[::-1]
      return labels, output, scores, ranking


In [33]:
a = timer()

text = [
        "Hailie, I know you miss your mom, and I know you miss your dad ",
         "When I'm gone, but I'm trying to give you the life that I never had ",
         "'Cause you're scared, I ain't there, daddy's with you in your prayers ",
         "No more crying, wipe them tears, daddy's here, no more nightmares ",
         "We gon' pull together through it, we gon' do it ",
         "Laney uncle's crazy, ain't he? Yeah, but he loves you girl and you better know it ",
         "We're all we got in this world, when it spins, when it swirls ",
         "When it whirls, when it twirls, two little beautiful girls",
         "What the fuck are you doing?"
      ]

df = []
for i in text:
    start = timer()
    s2t = speech_to_text()
    labels, output, scores, ranking = s2t.preprocess(i)
    end = timer()
    df.append(
        {
            'TEXT': i,
            'SENTIMENTS': labels[ranking[0]],
            'SCORES': scores[ranking[0]],
            'TIME_IN_MINUTES': round((end - start) / 60, 2)
        }
    )

b = timer()
print("Overall time taken:", round((b - a) / 60, 2))

Overall time taken: 0.45


In [34]:
df1 = pd.DataFrame(df)
df1

Unnamed: 0,TEXT,SENTIMENTS,SCORES,TIME_IN_MINUTES
0,"Hailie, I know you miss your mom, and I know y...",neutral,0.532371,0.05
1,"When I'm gone, but I'm trying to give you the ...",neutral,0.536321,0.05
2,"'Cause you're scared, I ain't there, daddy's w...",neutral,0.622295,0.05
3,"No more crying, wipe them tears, daddy's here,...",neutral,0.455002,0.05
4,"We gon' pull together through it, we gon' do it",positive,0.805857,0.05
5,"Laney uncle's crazy, ain't he? Yeah, but he lo...",positive,0.407606,0.05
6,"We're all we got in this world, when it spins,...",neutral,0.545459,0.05
7,"When it whirls, when it twirls, two little bea...",positive,0.855047,0.05
8,What the fuck are you doing?,negative,0.965532,0.05


In [9]:
df = []
for i in range(scores.shape[0]):
    #print(ranking[i])
    l = labels[ranking[i]]
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")
    break
    df.append(
        {
            l: s
        }
    )

1) positive 0.855


In [10]:
df1 = pd.DataFrame(df)
df1

In [11]:
df

[]

In [12]:
a = df[0]
a

IndexError: ignored

In [13]:
b = {**a, list(df[1].keys())[0]: list(df[1].values())[0]}
c = {**b, list(df[2].keys())[0]: list(df[2].values())[0]}

IndexError: ignored

In [14]:
c

NameError: ignored

In [None]:
d = pd.DataFrame(c, index = [0])
d