### This must be run locally after unzipping the stocktwits_data folder

In [183]:
import pandas as pd
from datetime import datetime, timedelta
import os

In [184]:
def process_folder(stock_ticker):
    df = pd.DataFrame(columns = [
        'id',
        'text',
        'time',
        'sentiment'
    ])
    directory = 'data/stocktwits_data/' + stock_ticker
    for file in os.listdir(directory):
        filename = directory + '/' + file
        new_df = pd.read_json(filename)
        df = pd.concat([df, new_df], axis=0).reset_index(drop=True)
    df.time = df.time.apply(datetime.fromtimestamp)
    return df

In [142]:
def compute_value_counts(df, start_date, time_delta):
    end_date = start_date + timedelta(days=time_delta)
    filtered_df = df[(df.time > start_date) & (df.time < end_date)].sentiment
    pos = filtered_df.value_counts().get('Bullish', 0)
    neg = filtered_df.value_counts().get('Bearish', 0)
    return pos, neg

In [186]:
def add_sentiment_by_day(df, df_by_day, end):
    start_date = datetime(year=2022, day=1, month=1)
    time_delta_days = 1
    while start_date < end:
        pos, neg = compute_value_counts(df, start_date, time_delta_days)
        new_df = pd.DataFrame({
            'date': start_date,
            'bullish': pos,
            'bearish': neg},index=[0])
        df_by_day = df_by_day.append(new_df, ignore_index=True)
        start_date += timedelta(days=time_delta_days)
    return df_by_day

In [189]:
def gen_sentiment_csvs(stocks, end):
    for stock in stocks:
        df = process_folder(stock)
        df_by_day = pd.DataFrame(columns = [
            'date',
            'bullish',
            'bearish'
        ])
        df_by_day = add_sentiment_by_day(df, df_by_day, end)
        df_by_day.to_csv(stock + '.csv', index=False)

In [191]:
stocks = ['AAPL', 'MSFT', 'NVDA']

In [192]:
end = datetime(year=2022, day=15, month=5)

In [193]:
gen_sentiment_csvs(stocks, end)