In [None]:
import sys
import os
import pprint
import pandas as pd
import torch
from FinABSA import ABSA
from tqdm.auto import tqdm # A good progress bar library

In [None]:
news = pd.read_csv('News/guardian_financial_news_master.csv')
news['pub_date'] = pd.to_datetime(news['pub_date'])

start_date = '2020-06-30'
end_date = '2020-09-30'
filtered_df = news.loc[(news['pub_date'] >= start_date) & (news['pub_date'] <= end_date)].copy()
filtered_df.shape

In [None]:
def sentiment_model(text, absa):
    o = absa.run_absa(input_str = text)
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    return o

#print("\n--- Running with pandas ---")
absa = ABSA()
#filtered_df.loc[:, 'sentiment_results']= filtered_df['body'].apply(sentiment_model, args=(absa,))

# 2. Define your batch size
# Start small (e.g., 16 or 32). If you don't get memory errors, 
# you can try increasing it to find the sweet spot.
batch_size = 16

# 3. Create a list to store all results
all_results = []

# 4. Loop through the DataFrame in chunks (batches)
# This uses tqdm to give you a nice progress bar
for i in tqdm(range(0, len(filtered_df), batch_size)):
    
    # Get a small batch of texts
    batch_texts = filtered_df['body'][i : i + batch_size].tolist()

    # Process just that batch (you can use a list comprehension)
    # Your original 'run_absa' is called here for each item in the *small batch*
    batch_results = [absa.run_absa(text) for text in batch_texts]
    
    # Add the results from this batch to your main list
    all_results.extend(batch_results)
    
    # 5. CRITICAL STEP: Clear the GPU cache
    # This frees up memory before the next batch, preventing the OOM error.
    torch.cuda.empty_cache()

# 6. Add the results back to your DataFrame
filtered_df['sentiment_results'] = all_results

In [None]:
long_series = filtered_df.set_index('pub_date')['sentiment_results'].apply(pd.Series).stack()

long_df = long_series.reset_index()
long_df.columns = ['date', 'entity', 'details']

long_df['classification'] = long_df['details'].apply(lambda x: x['classification_output'])
long_df['softmax_scores'] = long_df['details'].apply(lambda x: x['logits'])

long_df = long_df.drop(columns='details')

print("\n--- Long DataFrame ---")
long_df.to_csv('/home/ktallam/signaltest.csv', index=False)