In [1]:
import pandas as pd
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
import re
from nltk.sentiment.vader import SentimentIntensityAnalyzer

%matplotlib inline


In [2]:
posts = [
    {
        "date": "2023-07-16",
        "content": "Today is a beautiful day. I woke up early and went for a walk in the park. The sun was shining and the birds were singing. I felt so peaceful and happy. After my walk, I came home and made a delicious breakfast. Then, I spent some time reading and writing. In the afternoon, I went to the library to return some books and check out some new ones. I had dinner with my friends and then we went to see a movie. It was a great day!"
    },
    {
        "date": "2023-07-17",
        "content": "Today was a productive day. I worked on my project all morning and made some great progress. In the afternoon, I went to the gym and worked out. Then, I met up with some friends for dinner. We had a lot of fun and I'm feeling really good about myself right now."
    },
    {
        "date": "2023-07-18",
        "content": "Today was a bit of a down day. I didn't get much done and I'm feeling a bit stressed out. I think I need to take some time for myself and relax. I'm going to go for a walk in the park and then read a book. Hopefully, that will help me feel better."
    },
    {
        "date": "2023-07-19",
        "content": "Today was a much better day. I woke up feeling refreshed and motivated. I got a lot done at work and I even had some time to relax and enjoy myself. I went to the movies with my friends and we had a great time. I'm feeling really happy and positive right now."
    },
    {
        "date": "2023-07-20",
        "content": "Today was a day of self-care. I woke up early and went for a yoga class. Then, I took a long bath and read a book. I felt so relaxed and rejuvenated. In the afternoon, I went out to lunch with my mom. We had a great time catching up and I felt so loved and supported. It was a perfect day."
    },
    {
        "date": "2023-07-21",
        "content": "Today was a day of adventure. I went hiking in the mountains with my friends. We had a lot of fun exploring and we even saw some wildlife. We ended the day with a campfire and roasted marshmallows. It was a perfect way to end the week."
    },
    {
        "date": "2023-07-18",
        "content": "I had a really tough day today. I woke up feeling stressed and anxious, and it just got worse as the day went on. I had a meeting at work that didn't go well, and then I got into a fight with my partner. I'm feeling really down right now, and I don't know how to make myself feel better."
    },
    {
        "date": "2023-07-20",
        "content": "I'm feeling really lonely today. I haven't been able to connect with my friends or family lately, and I'm starting to feel isolated. I know I need to reach out to people, but I'm just not feeling motivated. I'm feeling really down and I don't know what to do."
    },
    {
        "date": "2023-07-21",
        "content": "I'm feeling really disappointed today. I had been working really hard on a project, and I thought it was going really well. But then I got feedback from my boss, and she said that I needed to make some major changes. I'm feeling really discouraged, and I don't know if I can do it."
    }
]


In [3]:
data = pd.DataFrame(posts)
data


Unnamed: 0,date,content
0,2023-07-16,Today is a beautiful day. I woke up early and ...
1,2023-07-17,Today was a productive day. I worked on my pro...
2,2023-07-18,Today was a bit of a down day. I didn't get mu...
3,2023-07-19,Today was a much better day. I woke up feeling...
4,2023-07-20,Today was a day of self-care. I woke up early ...
5,2023-07-21,Today was a day of adventure. I went hiking in...
6,2023-07-18,I had a really tough day today. I woke up feel...
7,2023-07-20,I'm feeling really lonely today. I haven't bee...
8,2023-07-21,I'm feeling really disappointed today. I had b...


In [4]:
def clean_text(text):
    cleaned_text = re.sub(r"[^a-zA-Z]", " ", text).lower()
    cleaned_text = re.sub(r'\n', ' ', cleaned_text)
    return cleaned_text


In [5]:
df = pd.DataFrame()
df['time'] = data['date']


In [6]:
df['content'] = data['content'].apply(clean_text)
df['content'] = df['content'].apply(nltk.word_tokenize)
df


Unnamed: 0,time,content
0,2023-07-16,"[today, is, a, beautiful, day, i, woke, up, ea..."
1,2023-07-17,"[today, was, a, productive, day, i, worked, on..."
2,2023-07-18,"[today, was, a, bit, of, a, down, day, i, didn..."
3,2023-07-19,"[today, was, a, much, better, day, i, woke, up..."
4,2023-07-20,"[today, was, a, day, of, self, care, i, woke, ..."
5,2023-07-21,"[today, was, a, day, of, adventure, i, went, h..."
6,2023-07-18,"[i, had, a, really, tough, day, today, i, woke..."
7,2023-07-20,"[i, m, feeling, really, lonely, today, i, have..."
8,2023-07-21,"[i, m, feeling, really, disappointed, today, i..."


In [7]:
stop_words = nltk.corpus.stopwords.words('english')
stop_words.extend(['today', 'day'])


In [8]:
df['content'] = df['content'].apply(
    lambda x: [word for word in x if not word in stop_words])
df


Unnamed: 0,time,content
0,2023-07-16,"[beautiful, woke, early, went, walk, park, sun..."
1,2023-07-17,"[productive, worked, project, morning, made, g..."
2,2023-07-18,"[bit, get, much, done, feeling, bit, stressed,..."
3,2023-07-19,"[much, better, woke, feeling, refreshed, motiv..."
4,2023-07-20,"[self, care, woke, early, went, yoga, class, t..."
5,2023-07-21,"[adventure, went, hiking, mountains, friends, ..."
6,2023-07-18,"[really, tough, woke, feeling, stressed, anxio..."
7,2023-07-20,"[feeling, really, lonely, able, connect, frien..."
8,2023-07-21,"[feeling, really, disappointed, working, reall..."


In [60]:
ss = SentimentIntensityAnalyzer()
df['positive'] = [ss.polarity_scores(i)['pos'] for i in data['content']]
df['negative'] = [ss.polarity_scores(i)['neg'] for i in data['content']]
df['neutral'] = [ss.polarity_scores(i)['neu'] for i in data['content']]
df['compound'] = ['Happy' if ss.polarity_scores(i)['compound'] > 0 else 'Sad' for i in data['content']]
df


Unnamed: 0,time,content,positive,negative,neutral,compound,pos_word_count,neg_word_count
0,2023-07-16,"[beautiful, woke, early, went, walk, park, sun...",0.246,0.0,0.754,Happy,5,0
1,2023-07-17,"[productive, worked, project, morning, made, g...",0.305,0.0,0.695,Happy,8,0
2,2023-07-18,"[bit, get, much, done, feeling, bit, stressed,...",0.23,0.044,0.726,Happy,1,0
3,2023-07-19,"[much, better, woke, feeling, refreshed, motiv...",0.455,0.0,0.545,Happy,9,0
4,2023-07-20,"[self, care, woke, early, went, yoga, class, t...",0.297,0.0,0.703,Happy,6,0
5,2023-07-21,"[adventure, went, hiking, mountains, friends, ...",0.246,0.0,0.754,Happy,2,0
6,2023-07-18,"[really, tough, woke, feeling, stressed, anxio...",0.094,0.219,0.687,Sad,5,2
7,2023-07-20,"[feeling, really, lonely, able, connect, frien...",0.152,0.131,0.717,Sad,1,2
8,2023-07-21,"[feeling, really, disappointed, working, reall...",0.088,0.142,0.77,Sad,1,2


In [12]:
pos_words = []
with open('./positive-words.txt', 'r') as f:
    s = f.readlines()
    pos_words.extend(s)
neg_words = []
with open('./negative-words.txt') as f:
    neg_words.extend(f.readlines())

In [42]:
pos_words_cleaned = [word.replace('\n', '') for word in pos_words]
neg_words_cleaned = [word.replace('\n', '') for word in neg_words]

In [43]:
def PositiveWordCount(content) :
    count = 0
    
    for word in content:
        if word in pos_words_cleaned:
            count += 1
        
    return count

def NegativeWordCount(content) :
    count = 0
    
    for word in content:
        if word in neg_words_cleaned:
            count += 1
        
    return count

In [44]:
df['pos_word_count'] = df['content'].apply(PositiveWordCount)
df['neg_word_count'] = df['content'].apply(NegativeWordCount)

In [45]:
df

Unnamed: 0,time,content,positive,negative,neutral,compound,pos_word_count,neg_word_count
0,2023-07-16,"[beautiful, woke, early, went, walk, park, sun...",0.246,0.0,0.754,0.9755,5,0
1,2023-07-17,"[productive, worked, project, morning, made, g...",0.305,0.0,0.695,0.9516,8,0
2,2023-07-18,"[bit, get, much, done, feeling, bit, stressed,...",0.23,0.044,0.726,0.8519,1,0
3,2023-07-19,"[much, better, woke, feeling, refreshed, motiv...",0.455,0.0,0.545,0.9819,9,0
4,2023-07-20,"[self, care, woke, early, went, yoga, class, t...",0.297,0.0,0.703,0.966,6,0
5,2023-07-21,"[adventure, went, hiking, mountains, friends, ...",0.246,0.0,0.754,0.9081,2,0
6,2023-07-18,"[really, tough, woke, feeling, stressed, anxio...",0.094,0.219,0.687,-0.7787,5,2
7,2023-07-20,"[feeling, really, lonely, able, connect, frien...",0.152,0.131,0.717,-0.2303,1,2
8,2023-07-21,"[feeling, really, disappointed, working, reall...",0.088,0.142,0.77,-0.5908,1,2


In [70]:
df_final = df.drop(['content'], axis=1)

In [71]:
df_final['positive'] = (df['positive'] - df['positive'].min()) / (df['positive'].max() - df['positive'].min())
df_final['negative'] = (df['negative'] - df['negative'].min()) / (df['negative'].max() - df['negative'].min())
df_final['neutral'] = (df['neutral'] - df['neutral'].min()) / (df['neutral'].max() - df['neutral'].min())

In [72]:
df_final

Unnamed: 0,time,positive,negative,neutral,compound,pos_word_count,neg_word_count
0,2023-07-16,0.430518,0.0,0.928889,Happy,5,0
1,2023-07-17,0.591281,0.0,0.666667,Happy,8,0
2,2023-07-18,0.386921,0.200913,0.804444,Happy,1,0
3,2023-07-19,1.0,0.0,0.0,Happy,9,0
4,2023-07-20,0.569482,0.0,0.702222,Happy,6,0
5,2023-07-21,0.430518,0.0,0.928889,Happy,2,0
6,2023-07-18,0.016349,1.0,0.631111,Sad,5,2
7,2023-07-20,0.174387,0.598174,0.764444,Sad,1,2
8,2023-07-21,0.0,0.648402,1.0,Sad,1,2


In [69]:
# for i in range(df_final.shape[0]):
#     if(df_final['positive'][i] < 0.5 and df_final['negative'][i] < 0.5):
#         df_final['compound'][i] = 'Neutral'
#     elif df_final['positive'][i]  > 0.5:
#         df_final['compound'][i] = 'Happy'
#     else:
#         df_final['compound'][i] = 'Sad'
    