# NLP Speech Sentiment Analysis: Preprocess and combine data

#### Import libraries

In [1]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from collections import Counter
from collections import defaultdict


#### Import datasets

In [2]:
# GoEmotions: A Dataset of Fine-Grained Emotions
# Reference: Demszky, Dorottya and Movshovitz-Attias, Dana and Ko, Jeongwoo and Cowen, Alan and Nemade, Gaurav and Ravi, Sujith, https://github.com/google-research/google-research/tree/master/goemotions/data/full_dataset
goe1 = pd.read_csv('data/goemotions_1.csv')
goe2 = pd.read_csv('data/goemotions_2.csv')
goe3 = pd.read_csv('data/goemotions_3.csv')

# Emotion Classification dataset
# Reference: https://www.kaggle.com/datasets/abdallahwagih/emotion-dataset
ed = pd.read_csv('data/Emotion_classify_Data.csv')

# Emotions dataset
# Reference: https://www.kaggle.com/datasets/nelgiriyewithana/emotions
emo = pd.read_csv('data/text.csv')

# Sentiment140 dataset
# Reference: https://www.kaggle.com/datasets/kazanova/sentiment140
sent = pd.read_csv('data/training.1600000.processed.noemoticon.csv', encoding='latin1')

## Helper functions

### Preprocessing functions

In [3]:
# Preprocesses sentences of the 'text' column
def preprocess_text(df, column_name='text'):
    # Remove non-alphabetic characters
    df[column_name] = df[column_name].apply(lambda x: re.sub(r'[^a-zA-Z\s]', '', x))
    
    # Convert to lowercase
    df[column_name] = df[column_name].str.lower()
    
    # Remove numbers
    df[column_name] = df[column_name].str.replace(r'\d+', '', regex=True)
    
    # Replace multiple spaces with a single space
    df[column_name] = df[column_name].str.replace(r'\s+', ' ', regex=True)
    
    # Remove URLs
    df[column_name] = df[column_name].str.replace(r'http\S+', '', regex=True)
    df[column_name] = df[column_name].str.replace(r'href\S+', '', regex=True)

    # Return dataframe with preprocessed sentences
    return df

In [4]:
# Retrieves most frequent words for emotions: Used to define and expand keywords
def word_freq_top(df, emotion, N, emotion_col, text_col='text'):
    # Filter texts by the specified emotion
    emotion_texts = df[df[emotion_col] == emotion][text_col]

    # Define stopwords
    nltk_stop_words = set(stopwords.words('english')) # NLTK stopwords
    custom_stop_words = { # Custom stopwords 
        "im", 'days', 'well', 'quite', 'look', 'find', 'come', 'year','lot', 'part', 'take',
        'href', 'every', 'able', 'though','left', 'need', 'new', 'http', 'sure', 'around', 'say',
        'also', 'work', 'today', 'pretty', 'feels', 'going', 'feelings', 'back', 'way', 'always',
        'things', "feel", "thats", "one", 'actually', 'right', 'many', 'thing', 'seen', 'thought',
        'believe', 'didnt', 'want', 'time', 'makes', "even", "day", "go", "made", "yeah", "man", 
        'youre', 'ive', 'much', 'good', "know", 'see', 'cant', 'never', "got", 'think', "would",
        "still", "dont", "people", "like", "really", "get", "name", "i", "you", "really", "name",
        "make", 'could', "oh", 'something', 'little', 'bit', 'life', 'feeling', 'something', 'home',
        'enough', 'sometimes', 'important',
        
    }
    # Join NLTK and Custom stopwords
    stop_words = nltk_stop_words.union(custom_stop_words)

    # Tokenize text
    words = [
        word.lower()
        for text in emotion_texts
        for word in word_tokenize(text)
        if word.lower() not in stop_words and word.isalnum()
    ]
    # Count word frequency
    word_freq = Counter(words)

    # Return the top N words 
    return word_freq.most_common(N)

### Emotion classification functions

In [5]:
# Classifies tokenized text based on emotional keywords
def classify_emotion(text):
    # Tokenize text and convert to lowercase
    words = word_tokenize(text.lower())
    # Initialize dictionary for emotion scores (all set to zero initially)
    emotion_scores = {emotion: 0 for emotion in emotions_dict}

    # Iterate over tokenized words
    for word in words:
        # Iterate keywords dictionary, look for if current word and increment score if found
        for emotion, keywords in emotions_dict.items():
            if word in keywords:
                emotion_scores[emotion] += 1 # Increment corresponding emotion in the emotion_scores dictionary
    
    # Identify emotion with the highest score
    classified_emotion = max(emotion_scores, key=emotion_scores.get)
    
    # Return the identified emotion if its score is greater than 0. Otherwise, return "unknown"
    return classified_emotion if emotion_scores[classified_emotion] > 0 else "unknown"

In [6]:
# Appends identified emotion (i.e. used as column of input dataframe)
def label_text(sentences):
    # Initialize an empty list for identified emotions
    identified_emotions = []
    # Iterate over all texts
    for text in sentences:
        # Classify emotion and append
        emotion = classify_emotion(text)
        identified_emotions.append(emotion)
    
    # Return list with classified emotions for corresponding sentences
    return identified_emotions

In [7]:
# Function to count occurrences of abbreviations 
def count_abbr(df1, df2, text_col='text'):
    abbr = {
    "LMAO": "laugh my ass off",
    "LOL": "laughing out loud",
    "ROFL": "rolling on the floor laughing"
    }
    
    # Combine the text columns from both dfs
    combined_text = pd.concat([df1[text_col], df2[text_col]])

    # Initialize a count dictionary for each chat word
    abbr_word_counts = {word: 0 for word in abbr}

    # Iterate over the abbreviations and count occurrences
    for word, meaning in abbr.items():
        abbr_word_counts[word] = combined_text.str.count(r'\b' + re.escape(word) + r'\b', flags=re.IGNORECASE).sum()

    # Print results
    for word, count in abbr_word_counts.items():
        if count > 0:
            print(f"'{word}' found {count} times in the combined DataFrame.")

## Preprocessing

### GoEmotions dataset

In [8]:
# Concatenate GoEmotions dataset 1, 2 and 3
goe_merged = pd.concat([goe1, goe2, goe3], ignore_index=True)
goe_merged.head()

Unnamed: 0,text,id,author,subreddit,link_id,parent_id,created_utc,rater_id,example_very_unclear,admiration,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,That game hurt.,eew5j0j,Brdd9,nrl,t3_ajis4z,t1_eew18eq,1548381000.0,1,False,0,...,0,0,0,0,0,0,0,1,0,0
1,>sexuality shouldn’t be a grouping category I...,eemcysk,TheGreen888,unpopularopinion,t3_ai4q37,t3_ai4q37,1548084000.0,37,True,0,...,0,0,0,0,0,0,0,0,0,0
2,"You do right, if you don't care then fuck 'em!",ed2mah1,Labalool,confessions,t3_abru74,t1_ed2m7g7,1546428000.0,37,False,0,...,0,0,0,0,0,0,0,0,0,1
3,Man I love reddit.,eeibobj,MrsRobertshaw,facepalm,t3_ahulml,t3_ahulml,1547965000.0,18,False,0,...,1,0,0,0,0,0,0,0,0,0
4,"[NAME] was nowhere near them, he was by the Fa...",eda6yn6,American_Fascist713,starwarsspeculation,t3_ackt2f,t1_eda65q2,1546669000.0,2,False,0,...,0,0,0,0,0,0,0,0,0,1


In [9]:
# Filter the merged GoEmotions df 
# Only include rows where at least one of the specified emotions has a value of 1 (True)
goe_merged = goe_merged[(goe_merged[['anger', 'disgust', 'joy', 'sadness', 'surprise', 'fear']].eq(1)).any(axis=1)]

# Create the emotion column and append the correponding true emotions as list
goe_merged['emotion_with_1'] = goe_merged[['anger', 'disgust', 'joy', 'sadness', 'surprise', 'fear']].apply(
    lambda row: row.index[row == 1].tolist(), axis=1
)

# Handle entries where multiple emotions are defined as true
#goe_merged['emotion_with_1'] = goe_merged['emotion_with_1'].apply(lambda x: x[0] if x else None) # Selects first emotion from the list 
goe_merged['emotion_with_1'] = goe_merged[['anger', 'disgust', 'joy', 'sadness', 'surprise', 'fear']].apply(lambda row: row.index[row == 1].tolist(), axis=1).apply(lambda x: x[0] if len(x) == 1 else None) # Remove entries with multiple emotions and saves and string instead of list

# Drop all unrelated columns
goe_merged = goe_merged[[ 'text', 'emotion_with_1']]

# Return df head
goe_merged.head()

Unnamed: 0,text,emotion_with_1
0,That game hurt.,sadness
13,That is odd.,disgust
29,So happy for [NAME]. So sad he's not here. Ima...,
32,"Dark and funny, but not really nice guy. He ha...",disgust
43,By far the coolest thing I've seen on this thr...,joy


In [10]:
# Preprocess text using the 'preprocess_text' helper function
goe_merged = preprocess_text(goe_merged, column_name='text')
goe_merged.head()

Unnamed: 0,text,emotion_with_1
0,that game hurt,sadness
13,that is odd,disgust
29,so happy for name so sad hes not here imagine ...,
32,dark and funny but not really nice guy he has ...,disgust
43,by far the coolest thing ive seen on this thre...,joy


In [11]:
# Print most frequent words using the 'word_freq_top' helper function

# Print anger top word frequencies
print("anger:")
print(word_freq_top(goe_merged, 'anger', 10, 'emotion_with_1', 'text'))

# Print disgust top word frequencies
print("disgust:")
print(word_freq_top(goe_merged, 'disgust', 10, 'emotion_with_1', 'text'))

# Print joy top word frequencies
print("joy:")
print(word_freq_top(goe_merged, 'joy', 10, 'emotion_with_1', 'text'))

# Print Sadness top word frequencies
print("Sadness:")
print(word_freq_top(goe_merged, 'sadness', 10, 'emotion_with_1', 'text'))

# Print surprise top word frequencies
print("surprise:")
print(word_freq_top(goe_merged, 'surprise', 10, 'emotion_with_1', 'text'))

# Print fear top word frequencies
print("fear:")
print(word_freq_top(goe_merged, 'fear', 10, 'emotion_with_1', 'text'))

anger:
[('fuck', 746), ('hate', 581), ('fucking', 520), ('shit', 247), ('damn', 234), ('stupid', 224), ('hell', 218), ('stop', 187), ('bad', 154), ('angry', 149)]
disgust:
[('disgusting', 327), ('weird', 295), ('bad', 242), ('worst', 237), ('awful', 226), ('worse', 223), ('hate', 163), ('shit', 138), ('terrible', 122), ('fucking', 116)]
joy:
[('happy', 1144), ('glad', 844), ('love', 597), ('enjoy', 422), ('fun', 415), ('lol', 290), ('haha', 192), ('game', 177), ('great', 173), ('hope', 169)]
Sadness:
[('sorry', 810), ('sad', 742), ('bad', 392), ('sadly', 178), ('hard', 175), ('poor', 165), ('miss', 161), ('hope', 126), ('crying', 108), ('years', 105)]
surprise:
[('wow', 690), ('surprised', 433), ('wonder', 256), ('omg', 240), ('wondering', 117), ('shocked', 112), ('surprise', 104), ('hes', 94), ('years', 82), ('damn', 75)]
fear:
[('scared', 230), ('terrible', 226), ('afraid', 224), ('scary', 157), ('horrible', 145), ('terrifying', 95), ('fear', 94), ('worried', 86), ('cringe', 64), ('c

In [12]:
# Create dictionary with emotional keywords: obtained using the GoEmotions and Emotion datasets.
emotions_dict = {
    "anger": [
        "angry",'fuck', 'rude', 'jealous', 'bothered', 'selfish','hate',"mad",'stop', 'hell', 'shit', 'stupid', 'fucking',
        "pissed off", "upset", "enraged", "irritated", "fuming", "outraged", "infuriated", "bitter", "exasperated", 
        "frustrated", "livid", "annoyed", "agitated", "annoyance", "hostile", "cross", "vexed", "unbearable", "wrath",
        "heated", "outrageous", "displeased", "resentful", "scornful", 
    ],
    "disgust": [
        "disgusted", 'disgusting', 'awful', 'worst', 'worse', 'weird', 'terrible', "gross", "nauseous", "sickened", "repulsed", 
        "revolted", "put off", "offensive", "shudder", "uncomfortable", "unpalatable", "putrid", "icky", "unthinkable", 
        "nasty", "foul", "unpleasant", "horrible", "appalled", "detestable", "vile", "repugnant", "ugh", "abhorred", 
        "cringe", "disturbing", "loathsome"
    ],
    "joy": [
        "happy", 'enjoy', 'fun', 'lol', 'lmao', 'rofl', 'comfortable', 'confident', 'love', "excited", "thrilled", "ecstatic", 
        "joyful", "delighted", "cheerful", "elated", "overjoyed", "euphoric", "exuberant", "bubbly", "merry", "radiant", 
        "grateful", "beaming", "glad", "content", "satisfied", "jubilant", "lighthearted", "smiling", "laughing", "peaceful",
        "giggling", "blissful", "on cloud nine", "in high spirits", "happiness"
    ],
    "sadness": [
        "sad", 'sorry', 'poor', 'hard', 'alone', 'miss', 'sadly', "down", "heartbroken", "tearful", "grieving", "mournful", 
        "blue", "unhappy", "disappointed",  "depressed", "wretched", "lonely", "melancholy", "despondent", "forsaken", 
        "distraught", "painful", "anguished", "lost", "distressed", "regretful", "downcast", "defeated", "sorrowful",
        "lament", "shattered", "hurt", "hopeless", "inconsolable", "crushed", 
    ],
    "surprise": [
        "surprised", 'curious', 'wondering', 'overwhelmed', 'wtf', 'omg', 'strange', 'impressed', 'wonder', "shocked", "amazed", 
        "astonished", "startled", "unexpected", "incredible", "unbelievable", "taken aback", "jaw-dropping", "wow", 
        "dumbfounded", "flabbergasted", "stunned", "gobsmacked", "wide-eyed", "unexpectedly", "out of nowhere", "baffled", 
        "marveled", "astounded", "bewildered", "speechless", "in awe", "mind-blowing", "startling", "revealing"
    ],
    "fear": [
        "scared", 'worried', 'fear', 'scary', 'afraid', "terrified", "frightened", "alarmed", "anxious", "panicked", "nervous", 
        "dread", "horrified", "shaking", "spooked", "paranoid", "uneasy", "apprehensive", "paralyzed", "petrified", "worry",
         "disturbed", "intimidated", "fearful", "creepy", "haunted", "insecure", "vulnerable", "shuddering", "cold sweat", 
         "terrifying", "chilled", "tense"
    ]
}

# Ensure we dont have overlaps or duplicates
word_count = defaultdict(list)
for category, words in emotions_dict.items():
    for word in words:
        word_count[word].append(category)

# Find duplicate words (either same or other emotion label)
duplicates = {word: categories for word, categories in word_count.items() if len(categories) > 1}

# Display results
if duplicates:
    print("Duplicate words found:")
    for word, categories in duplicates.items():
        print(f"{word} found in categories: {', '.join(categories)}")
else:
    print("No duplicate words found.")

No duplicate words found.


In [13]:
# Classify emotions using the tokenized keywords with 'label_text' helper function
goe_merged['Emotion'] = label_text(goe_merged['text'])
goe_merged.head()

Unnamed: 0,text,emotion_with_1,Emotion
0,that game hurt,sadness,sadness
13,that is odd,disgust,unknown
29,so happy for name so sad hes not here imagine ...,,disgust
32,dark and funny but not really nice guy he has ...,disgust,unknown
43,by far the coolest thing ive seen on this thre...,joy,unknown


In [14]:
# Count no. of unclassified entries (unknown)
print(f"Number of 'unknown' values: {(goe_merged == 'unknown').sum().sum()}")

Number of 'unknown' values: 17117


In [15]:
# Drop rows where emotion is classified as unknown
goe_merged = goe_merged[~(goe_merged == "unknown").any(axis=1)].reset_index(drop=True)

# Drop emotion column 
goe_merged.drop(columns=['emotion_with_1'], inplace=True)

# Drop rows with identical text 
goe_merged = goe_merged.drop_duplicates(subset='text', keep='first')

goe_merged

Unnamed: 0,text,Emotion
0,that game hurt,sadness
1,so happy for name so sad hes not here imagine ...,disgust
2,sending love and strength vibes,joy
3,no were getting kinsler and be happy with it,joy
4,omg lala a housewife i cant even imagine,surprise
...,...,...
18273,she already had plans with some of her friends...,disgust
18275,hahaha ok i was worried there for a second,fear
18276,ok i get that like i said different morals for...,surprise
18277,well im glad youre out of all that now how awf...,anger


In [16]:
# Save as csv file.
goe_merged.to_csv('data/goe_merged.csv', index=False)

### Emotions dataset

In [17]:
# Emotions dataset preprocessing
# Rename the column 'label' to 'Emotion'
emo.rename(columns={'label': 'Emotion'}, inplace=True)

# Define the label mapping of numeric values to their corresponding emotion strings
emotion_mapping = {0: 'sadness', 1: 'joy', 3: 'anger', 4: 'fear', 5: 'surprise'}

# Map the numeric labels to string emotions
emo['Emotion'] = emo['Emotion'].map(emotion_mapping)

# Drop unrelevant columns
emo.drop(columns=['Unnamed: 0'], inplace=True)

# Drop rows where 'Emotion' is NaN (i.e. label does not correspond to any of the defined string emotions)
emo = emo.dropna(subset=['Emotion']).reset_index(drop=True)

# Return the head
emo.head()

Unnamed: 0,text,Emotion
0,i just feel really helpless and heavy hearted,fear
1,ive enjoyed being able to slouch about relax a...,sadness
2,i gave up my internship with the dmrg and am f...,fear
3,i dont know i feel so lost,sadness
4,i am a kindergarten teacher and i am thoroughl...,fear


In [18]:
# Preprocess text using the 'preprocess_text' helper function
emo = preprocess_text(emo, column_name='text')
emo.head()

Unnamed: 0,text,Emotion
0,i just feel really helpless and heavy hearted,fear
1,ive enjoyed being able to slouch about relax a...,sadness
2,i gave up my internship with the dmrg and am f...,fear
3,i dont know i feel so lost,sadness
4,i am a kindergarten teacher and i am thoroughl...,fear


In [19]:
# Print most frequent words using the 'word_freq_top' helper function

# Print anger top word frequencies
print("Anger:")
print(word_freq_top(emo, 'anger', 10, 'Emotion', 'text'))

# Print joy top word frequencies
print("Joy:")
print(word_freq_top(emo, 'joy', 10, 'Emotion', 'text'))

# Print Sadness top word frequencies
print("Sadness:")
print(word_freq_top(emo, 'sadness', 10, 'Emotion', 'text'))

# Print surprise top word frequencies
print("Surprise:")
print(word_freq_top(emo, 'surprise', 10, 'Emotion', 'text'))

# Print fear top word frequencies
print("Fear:")
print(word_freq_top(emo, 'fear', 10, 'Emotion', 'text'))

Anger:
[('angry', 2273), ('frustrated', 1720), ('annoyed', 1665), ('cold', 1602), ('selfish', 1572), ('stressed', 1552), ('irritated', 1494), ('rude', 1477), ('jealous', 1475), ('bothered', 1459)]
Joy:
[('love', 5350), ('happy', 3932), ('better', 2866), ('excited', 2288), ('help', 2119), ('comfortable', 1961), ('confident', 1917), ('amazing', 1912), ('less', 1911), ('strong', 1910)]
Sadness:
[('bad', 2605), ('love', 2598), ('alone', 2500), ('sad', 2356), ('help', 2218), ('lost', 2209), ('hurt', 2149), ('depressed', 1930), ('sorry', 1916), ('without', 1891)]
Surprise:
[('amazed', 1415), ('impressed', 1414), ('weird', 1413), ('strange', 1400), ('overwhelmed', 1391), ('surprised', 1389), ('amazing', 1370), ('shocked', 1348), ('funny', 1321), ('curious', 1302)]
Fear:
[('scared', 1811), ('afraid', 1780), ('anxious', 1742), ('nervous', 1670), ('uncomfortable', 1574), ('insecure', 1523), ('unsure', 1507), ('overwhelmed', 1497), ('weird', 1475), ('strange', 1465)]


In [20]:
# Print count of specified abbreviations using the 'count_abbr' helper function
count_abbr(emo, goe_merged)

'LMAO' found 110 times in the combined DataFrame.
'LOL' found 1035 times in the combined DataFrame.
'ROFL' found 8 times in the combined DataFrame.


In [21]:
# Save as csv file
emo.to_csv('data/emo.csv', index=False)

### Emotion Classification dataset

In [22]:
ed.head()

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [23]:
# Rename 'Comment' column to 'text' to align with the other dataframes
ed.rename(columns={'Comment': 'text'}, inplace=True)

In [24]:
# Preprocess text using the 'preprocess_text' helper function
ed = preprocess_text(ed, column_name='text')

In [25]:
# Classify emotions using the tokenized keywords with 'label_text' helper function
ed['Emotion'] = label_text(ed['text'])
ed.head()

Unnamed: 0,text,Emotion
0,i seriously hate one subject to death but now ...,anger
1,im so full of life i feel appalled,disgust
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,anger
4,i feel suspicious if there is no one outside l...,unknown


In [26]:
# Count no. of unclassified entries (unknown)
print(f"Number of 'unknown' values: {(ed == 'unknown').sum().sum()}")

Number of 'unknown' values: 3249


In [27]:
# Drop rows where emotion is classified as unknown
ed = ed[~(ed == "unknown").any(axis=1)].reset_index(drop=True)
ed

Unnamed: 0,text,Emotion
0,i seriously hate one subject to death but now ...,anger
1,im so full of life i feel appalled,disgust
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,anger
4,i feel jealous becasue i wanted that kind of l...,anger
...,...,...
2683,i feel angry because i have led myself to lead...,anger
2684,i think we often feel this way about planting ...,sadness
2685,i have lost touch with the things that i feel ...,sadness
2686,i begun to feel distressed for you,sadness


In [28]:
# Save as csv file
ed.to_csv('data/ed.csv', index=False)

### Sentiment140 dataset 

In [29]:
# Only keep last column with sentences and rename to 'text' to align with the other dataframe structures
sent = sent.iloc[:, -1].to_frame(name="text")
sent

Unnamed: 0,text
0,is upset that he can't update his Facebook by ...
1,@Kenichan I dived many times for the ball. Man...
2,my whole body feels itchy and like its on fire
3,"@nationwideclass no, it's not behaving at all...."
4,@Kwesidei not the whole crew
...,...
1599994,Just woke up. Having no school is the best fee...
1599995,TheWDB.com - Very cool to hear old Walt interv...
1599996,Are you ready for your MoJo Makeover? Ask me f...
1599997,Happy 38th Birthday to my boo of alll time!!! ...


In [30]:
# Preprocess text using the 'preprocess_text' helper function
sent = preprocess_text(sent, column_name='text')

In [31]:
# Classify emotions using the tokenized keywords with 'label_text' helper function
sent['Emotion'] = label_text(sent['text'])
sent.head()

Unnamed: 0,text,Emotion
0,is upset that he cant update his facebook by t...,anger
1,kenichan i dived many times for the ball manag...,unknown
2,my whole body feels itchy and like its on fire,unknown
3,nationwideclass no its not behaving at all im ...,anger
4,kwesidei not the whole crew,unknown


In [32]:
# Count no. of unclassified entries (unknown)
print(f"Number of 'unknown' values: {(sent == 'unknown').sum().sum()}")

Number of 'unknown' values: 1166893


In [33]:
# Drop rows where emotion is classified as unknown
sent = sent[~(sent == "unknown").any(axis=1)].reset_index(drop=True)
sent

Unnamed: 0,text,Emotion
0,is upset that he cant update his facebook by t...,anger
1,nationwideclass no its not behaving at all im ...,anger
2,loltrish hey long time no see yes rains a bit ...,joy
3,iamjazzyfizzle i wish i got to watch it with y...,sadness
4,hollis death scene will hurt me severely to wa...,sadness
...,...,...
433101,mshiphop im glad ur doing well,joy
433102,rmedina latati mmmm that sounds absolutely per...,disgust
433103,cliffforster yeah that does work better than j...,surprise
433104,happy th birthday to my boo of alll time tupac...,joy


In [34]:
# Save as csv file
sent.to_csv('data/sent.csv', index=False)