# Joey Cartwright 
### *Basic Sentiment Anlysis Applied to All Replies and Posts On Reddit*

In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
import json

df = pd.read_json("LosAngelesData.json")
df

Unnamed: 0,title,location,score,date,url,num_comments,replies
0,Los Angeles Wildfires - The Solution:,LosAngeles,5177,2025-01-25,https://i.redd.it/xmnfadgii7fe1.jpeg,1358,[{'body': '“I’m signing a declaration that wat...
1,Khloé Kardashian Called Hypocrite After Critic...,LosAngeles,2719,2025-01-13,https://amp.tmz.com/2025/01/12/khloe-kardashia...,570,"[{'body': '[removed]', 'date': '01-13-2025'}, ..."
2,President Biden announces $770 payments for Ca...,LosAngeles,2568,2025-01-14,https://thehill.com/homenews/administration/50...,400,[{'body': 'before this comment section gets ou...
3,"""Customers Are Not Coming In"": LA Restaurants ...",LosAngeles,1080,2025-01-18,https://la.eater.com/2025/1/17/24346323/los-an...,431,[{'body': 'My New Year’s resolution after goin...
4,Edison wants to raise rates to pay for wildfir...,LosAngeles,1387,2025-01-30,https://www.latimes.com/environment/story/2025...,223,[{'body': 'This kind of bullshit should absolu...
...,...,...,...,...,...,...,...
150,We're Ready For Wildfire - Are You?,LosAngeles,21,2024-11-06,https://lafd.org/wildfire,4,"[{'body': 'Judging by election results, it loo..."
151,Wildfire smoke?,LosAngeles,17,2024-09-23,https://www.reddit.com/r/LosAngeles/comments/1...,5,[{'body': 'There’s a vacant building at Bundy/...
152,"Find recovery resources for LA, San Bernardino...",LosAngeles,9,2024-09-18,https://www.nbclosangeles.com/news/california-...,1,[{'body': 'Please keep comments and discussion...
153,Wildfires from above,LosAngeles,23,2024-09-11,https://i.redd.it/8r72vetht3od1.jpeg,0,[]


In [2]:
from datetime import datetime

def format_date(data):
    data["date"] = pd.to_datetime(data["date"])

    # Now you can use .dt.strftime() to format the dates
    data["date"] = data["date"].dt.strftime("%m-%d-%Y")
    return data

In [3]:
df_new = format_date(df)

In [4]:
# Want to create a dictionary to store my dates that point to post titles / replies

def create_dict(data):
    # Start with titles and dates 

    date_dict = {}

    for _, row in data.iterrows():
        date = row["date"]
        title = str(row["title"])
        replies = row["replies"]
        
        if date not in date_dict:
            date_dict[date] = []
            date_dict[date].append(row["title"])
        else:
            date_dict[date].append(row["title"])

        # Add replies as well
        for reply in replies:
            if reply["date"] not in date_dict:
                date_dict[reply["date"]] = []
                date_dict[reply["date"]].append(reply["body"])
            else:
                date_dict[reply["date"]].append(reply["body"])
            
    return date_dict

date_dict = create_dict(df)

In [5]:
unique_keys = list(date_dict.keys())
date_dict

{'01-25-2025': ['Los Angeles Wildfires - The Solution:',
  '“I’m signing a declaration that water will be the wettest it’s ever been. And fire will be less hot in California.”',
  'He should sign an executive order banning natural disasters nationwide.',
  'ah yes. The north to south flow of water wile e coyote blocked with the giant acme valve.',
  'I long for the good old days when we naively thought Bush was embarrassing.',
  "This is on us. One of us should have remembered to just turn the water on! Thank god he reminded us. Problem solved, good job everyone, i'll write a note so we don't forget again.",
  'It’s that easy, huh?',
  '“I’m signing an executive order to not do anything :)”',
  'This is the onion right?',
  'This is truly the friggen dumbest MF God ever created.',
  'Where is he today?  I have time to go boo him.',
  'Can I still shower and so forth when they open the valves? Or am I going to lose pressure? \n\nAsking for a friend',
  'Just turn on the faucet. Mind blo

In [6]:
import nltk
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

class sentiment_analysis_la():
    
    def __init__(self,json_data):
        self.data = json_data
        self.reply_scores = []
        return 

    # Given a dictionary, replace string values of dict to float sentiment scores
    def replace_with_sentiment(self,data):
        new_dict = {}

        analyzer = SentimentIntensityAnalyzer()
        
        for key, values in data.items():
            for reply in values:
                sentiment = analyzer.polarity_scores(reply)
                compound_score = sentiment['compound']

                if key not in new_dict:
                    new_dict[key] = []
                    new_dict[key].append(compound_score)
                else:
                    new_dict[key].append(compound_score)

        
        return new_dict

    # create sentiment score for all replies, returns a list of scores 
    def reply_analysis(self,replies):
        analyzer = SentimentIntensityAnalyzer()

        scores = [analyzer.polarity_scores(reply)['compound'] for reply in replies]

        self.reply_scores.append(scores)
        return 

    # calculate all scores for all replies by using reply_analysis funciton 
    def calc_replies(self):
        for replies in self.data["replies"]:  
            self.reply_analysis(replies)
            

    # Creates a csv of dates and sentiment scores
    def create_csv(self):
        dates = self.data["date"]

        self.calc_replies()

        sent_dict = {}

        for date in dates:
            date = date.strftime('%Y-%m-%d')
            sent_dict[date] = []
            for reply in self.reply_scores:
                sent_dict[date].append(reply)

        data_list = [{"date": date, "replies": replies} for date, replies in sent_dict.items()]
        
        return sent_dict

    

In [7]:
la_model = sentiment_analysis_la(df)
new_dict = la_model.replace_with_sentiment(date_dict)
# la_model.calc_replies()
# la_data_list = la_model.create_csv()
print(new_dict)

{'01-25-2025': [0.3182, -0.34, -0.2732, 0.1531, 0.0772, 0.7976, 0.4404, 0.0, 0.0, 0.4019, 0.0, -0.264, 0.0, 0.6705, 0.1611, -0.0516, -0.9561, 0.0, 0.4588, -0.9674, -0.3382, -0.1779, -0.296, -0.25, -0.5106, -0.4404, -0.9765, 0.0, 0.7319, -0.5574, 0.6597, 0.5106, 0.0, 0.7941, -0.2878, 0.9312, -0.6249, -0.9231, 0.0, 0.0, -0.0516, -0.5187, 0.0, 0.4404, 0.0, 0.1769, 0.0857, 0.0, 0.0, 0.0, 0.3939, -0.6767, 0.4404, -0.3734, 0.0, -0.8591, -0.0772, -0.2023, 0.2462, 0.0, -0.9128, 0.0, 0.6249, 0.7845, -0.8176, -0.7399, -0.5688, -0.7783, 0.0, 0.0, 0.1027, -0.296, 0.5284, -0.659, -0.4767, -0.759, 0.7096, 0.0, 0.0772, 0.3612, -0.7269, 0.4753, 0.1779, 0.4019, -0.2263, 0.0, 0.0, -0.296, -0.8126, -0.2732, 0.0, 0.5229, 0.0, -0.0258, -0.7325, 0.0, 0.6705, 0.0, 0.0, 0.6981, -0.18, -0.743, 0.0, 0.2023, 0.4496, 0.0, 0.296, -0.9538, 0.0772, -0.8496, -0.4574, -0.4939, -0.3016, -0.6428, -0.5849, -0.659, 0.68, 0.296, 0.1779, 0.9392, 0.1531, 0.4215, 0.0, -0.0772, -0.7855, 0.0, 0.0, -0.5574, 0.0, -0.7553, 0.0, -0

In [8]:
import json

with open('sentiment_data_replies.json', 'w') as file:
    json.dump(new_dict, file, indent=2)