In [18]:
import os
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize Sentiment analyzer
analyzer = SentimentIntensityAnalyzer() 

def calculate_sentiment(text):
    """Calculates sentiment using VADER. Add preprocessing if needed"""
    score = analyzer.polarity_scores(text)['compound']  
    return score

def process_data(apple_path, samsung_path):
    results = []
    for filename in os.listdir(apple_path) + os.listdir(samsung_path): 
        if filename.startswith('a_'):
            brand = 'apple'
        elif filename.startswith('s_'):
            brand = 'samsung'
        else:
            continue  

        # Extract date from filename (dd-mm-yyyy format)
        date_str = filename.split('_')[2] + "-" + filename.split('_')[1] + "-" + filename.split('_')[3].split('.')[0] 

        # Reformat date for sorting  (mm-dd-yyyy)
        mmddyyyy_date_str = date_str[3:5] + "-" + date_str[:2] + "-" + date_str[6:]

        filepath = os.path.join(apple_path if brand == 'apple' else samsung_path, filename)
        df = pd.read_csv(filepath)

        # Calculate daily sentiment 
        daily_sentiment = df['tweet_text_element'].apply(calculate_sentiment).mean()

        # Find or create the matching date entry 
        existing_entry = next((item for item in results if item['day'] == mmddyyyy_date_str), None) 
        if existing_entry:
            existing_entry[f'{brand}_sentiment'] = daily_sentiment
        else:   
            results.append({
                'day': mmddyyyy_date_str,  
                f'{brand}_sentiment': daily_sentiment 
            })

    # Sort after processing all files 
    results.sort(key=lambda item: item['day'])  

    return pd.DataFrame(results)

# Example usage with the specific paths
apple_path = 'Apple'
samsung_path = 'Samsung'
df = process_data(apple_path, samsung_path)
df.to_csv('sentiment_analysis_results.csv', index=False, date_format='%m-%d-%Y')  # index=False to avoid an extra index column

