# Connection to Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Imports

In [2]:
import tensorflow as tf

# Get the GPU device name.
device_name = tf.test.gpu_device_name()

# The device name should look like the following:
if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    raise SystemError('GPU device not found')

Found GPU at: /device:GPU:0


In [3]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla T4


# Install transformers

In [4]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Import dataset

In [6]:
import pandas as pd

# Example: Import data from csv-file stored on Google Drive

df = pd.read_csv('/content/gdrive/My Drive/Colab Datasets/dataset_new_sa_values.csv')

In [7]:
df.head(5)

Unnamed: 0,lyrics,genre,num_syllables,pos,year,fog_index,flesch_index,num_lines,sentiment,title,...,number_of_tokens,number_of_types,decades,stemmed_text,POS_tags,POS,values,pred,label,score
0,here she comes mmm just like an angelseems ...,"['alternative rock', 'glam metal', 'hard rock']",405.0,27,1990,4.8,89.75,43,"{'neg': 0.156, 'neu': 0.714, 'pos': 0.131, 'co...",(Can't Live Without Your) Love And Affection,...,323.0,133.0,1990s,here she come mmm just like an angelseem like...,"['_SP', 'RB', 'PRP', 'VBZ', '_SP', 'FW', '_SP'...",missingcan't outside hard mindbut nothing chan...,0.037037,1,POSITIVE,0.988107
1,look into my eyes you will seewhat you mean t...,"['acoustic rock', 'adult contemporary', 'album...",258.3,1,1991,4.4,107.69,36,"{'neg': 0.14100000000000001, 'neu': 0.708, 'po...",(Everything I Do) I Do It For You,...,233.0,95.0,1990s,look into my eye you will seewhat you mean to...,"['VB', 'IN', 'PRP$', 'NNS', '_SP', 'PRP', 'MD'...",worth worth worth other worth worth heart soul...,1.0,1,POSITIVE,0.998717
2,i can't get no satisfactioni can't get no sati...,"['blues', 'blues rock', 'britannique', 'britis...",296.1,3,1965,4.8,106.67,37,"{'neg': 0.126, 'neu': 0.667, 'pos': 0.20800000...",(I Can't Get No) Satisfaction,...,260.0,83.0,1960s,i ca n't get no satisfactioni ca n't get no s...,"['PRP', 'MD', 'RB', 'VB', 'DT', 'NN', 'MD', 'R...",satisfaction'cause nowhen drivin useless satis...,0.333333,0,NEGATIVE,0.999485
3,oh i i just died in your arms tonightit must...,['classic pop and rock'],372.6,32,1987,4.4,99.23,45,"{'neg': 0.164, 'neu': 0.766, 'pos': 0.07, 'com...",(I Just) Died In Your Arms,...,310.0,130.0,1980s,oh i i just die in your arm tonightit must 'v...,"['UH', '_SP', 'PRP', '_SP', 'PRP', 'RB', 'VBD'...",easy thisher final many long hot easy gonei to...,0.03125,0,NEGATIVE,0.99831
4,your love is fadin' i feel it fadeah your lo...,"['american', 'psychedelic rock', 'psychedelic ...",251.1,55,1970,4.4,99.23,31,"{'neg': 0.148, 'neu': 0.795, 'pos': 0.057, 'co...",(I Know) I'm Losing You,...,203.0,104.0,1970s,your love is fadin ' i feel it fadeah your lo...,"['PRP$', 'NN', 'VBZ', 'VBG', ""''"", '_SP', 'PRP...",cancan eyesa hearted worried love love love wo...,0.018182,0,NEGATIVE,0.997664


# First Try with just a sentence

In [7]:
from transformers import AutoTokenizer, AutoModelWithLMHead

tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")

model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-emotion")



Downloading pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]

In [8]:
def get_emotion(text):
  input_ids = tokenizer.encode(text + '</s>', return_tensors='pt')

  output = model.generate(input_ids=input_ids,
               max_length=2)
  
  dec = [tokenizer.decode(ids) for ids in output]
  label = dec[0]
  return label

In [10]:
get_emotion("i feel as if i havent blogged in ages are at least truly blogged i am doing an update cute") # Output: 'joy'


'<pad> joy'

In [11]:
get_emotion("i have a feeling i kinda lost my best friend") # Output: 'sadness'

'<pad> sadness'

# Apply on a dataset

In [8]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelWithLMHead

tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-emotion")



In [9]:
def get_emotion(text):
    max_length = 512
    if len(text) > max_length:
        text = text[:max_length]
    input_ids = tokenizer.encode(text + '</s>', return_tensors='pt')
    output = model.generate(input_ids=input_ids, max_length=2)
    dec = [tokenizer.decode(ids) for ids in output]
    label = dec[0]
    return label

In [10]:
dataset = df.lyrics

In [11]:
emotions_df = pd.DataFrame(columns=['text', 'emotion_label'])

for text in dataset:
    emotion_label = get_emotion(text)
    emotions_df = emotions_df.append({'text': text, 'emotion_label': emotion_label}, ignore_index=True)

[1;30;43mDie letzten 5000 Zeilen der Streamingausgabe wurden abgeschnitten.[0m
  emotions_df = emotions_df.append({'text': text, 'emotion_label': emotion_label}, ignore_index=True)
  emotions_df = emotions_df.append({'text': text, 'emotion_label': emotion_label}, ignore_index=True)
  emotions_df = emotions_df.append({'text': text, 'emotion_label': emotion_label}, ignore_index=True)
  emotions_df = emotions_df.append({'text': text, 'emotion_label': emotion_label}, ignore_index=True)
  emotions_df = emotions_df.append({'text': text, 'emotion_label': emotion_label}, ignore_index=True)
  emotions_df = emotions_df.append({'text': text, 'emotion_label': emotion_label}, ignore_index=True)
  emotions_df = emotions_df.append({'text': text, 'emotion_label': emotion_label}, ignore_index=True)
  emotions_df = emotions_df.append({'text': text, 'emotion_label': emotion_label}, ignore_index=True)
  emotions_df = emotions_df.append({'text': text, 'emotion_label': emotion_label}, ignore_index=True)
 

In [12]:
emotions_df.head(5)

Unnamed: 0,text,emotion_label
0,here she comes mmm just like an angelseems ...,<pad> sadness
1,look into my eyes you will seewhat you mean t...,<pad> joy
2,i can't get no satisfactioni can't get no sati...,<pad> joy
3,oh i i just died in your arms tonightit must...,<pad> sadness
4,your love is fadin' i feel it fadeah your lo...,<pad> sadness


In [13]:
emotions_df.shape

(3669, 2)

In [16]:
from collections import Counter

Counter(emotions_df.emotion_label)

Counter({'<pad> sadness': 928,
         '<pad> joy': 1765,
         '<pad> surprise': 33,
         '<pad> anger': 410,
         '<pad> fear': 252,
         '<pad> love': 281})

In [18]:
emotions_df.to_csv('/content/gdrive/My Drive/Colab Datasets/emotions_dataset.csv', index=False)