In [None]:
# Standard imports
import logging
import datetime
import pandas as pd

# Related 3rd party imports
import praw 

# Local imports
from api import APIConnection
from get_data import GetData
from database import DatabaseManager

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [None]:
# Config
CLIENT_ID = 
CLIENT_SECRET = 
USER_AGENT = 

api_inst = APIConnection(client_id=CLIENT_ID,
                         client_secret=CLIENT_SECRET,
                         user_agent=USER_AGENT)

reddit_client = api_inst.initialise_client()

In [None]:
data_inst = GetData(reddit_client=reddit_client,
                    firm_list_path='firms.csv',
                    subreddit='wallstreetbets')

last_run_time = datetime.datetime(2024, 3, 27, 10, 0)

df = data_inst.get_comments(comment_target=10, last_run_time=last_run_time)

In [None]:
df = data_inst.clean_comments(df)


In [None]:
database_manager = DatabaseManager(db_path="reddit-sqlite.db")
database_manager.connect()
database_manager.create_table()
database_manager.insert_new_comments(df)
database_manager.close()

## Sentiment Analysis

In [None]:
# Load sentiment analyser
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Initialise VADER
sia = SentimentIntensityAnalyzer()

In [None]:
# Apply VADER analysis on text column
df['sentiment_scores'] = df['comment'].apply(lambda x: sia.polarity_scores(x))
df['compound'] = df['sentiment_scores'].apply(lambda score_dict: score_dict['compound'])
df['sentiment'] = df['compound'].apply(lambda c: 'POSITIVE' if c >= 0.05 else ('NEGATIVE' if c <= -0.05 else 'NEUTRAL'))
df.head(5)

In [None]:
import openpyxl
df.to_excel('reddit_sample.xlsx')

## Read in db file

In [None]:
import pandas as pd
import sqlite3

# Connect to db
conn = sqlite3.connect('reddit-sqlite.db')
# Query
query = "SELECT * FROM comments"
df = pd.read_sql_query(query, conn)
# Close connection
conn.close()


In [None]:
len(df)

In [None]:
df.to_csv("raw_data.csv")