In [None]:
import re
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import os

# Download VADER lexicon
nltk.download('vader_lexicon')

# Initialize Sentiment Analyzer
sid = SentimentIntensityAnalyzer()

# Function to load chat from text, CSV, or Excel
def load_chat(filename):
    file_extension = os.path.splitext(filename)[-1].lower()
    
    if file_extension == ".txt":
        with open(filename, "r", encoding="utf-8") as file:
            chat_data = file.readlines()
        
        # Initialize lists to store parsed data
        dates, times, senders, messages = [], [], [], []
        
        # Parse each line in the chat
        for line in chat_data:
            # Regex pattern to match date, time, sender, and message
            match = re.match(r'(\d{2}/\d{2}/\d{4}), (\d{2}:\d{2}) - (.*?): (.*)', line)
            if match:
                dates.append(match.group(1))
                times.append(match.group(2))
                senders.append(match.group(3))
                messages.append(match.group(4))
        
        # Create a DataFrame
        df = pd.DataFrame({'Date': dates, 'Time': times, 'Sender': senders, 'Message': messages})
        df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')
    
    elif file_extension == ".csv":
        df = pd.read_csv(filename)
    
    elif file_extension == ".xlsx":
        df = pd.read_excel(filename)
    
    else:
        raise ValueError("Unsupported file format. Please use .txt, .csv, or .xlsx files.")
    
    return df

# Perform sentiment analysis and categorize sentiments
def analyze_and_categorize_sentiment(df):
    df['Sentiment Score'] = df['Message'].apply(lambda msg: sid.polarity_scores(msg)['compound'])
    
    # Categorize sentiment based on the score
    conditions = [
        (df['Sentiment Score'] > 0.05),
        (df['Sentiment Score'] < -0.05),
        (df['Sentiment Score'].between(-0.05, 0.05))
    ]
    choices = ['Positive', 'Negative', 'Neutral']
    df['Sentiment Category'] = pd.cut(df['Sentiment Score'], bins=[-1, -0.05, 0.05, 1], labels=choices, include_lowest=True)
    
    return df

# Save DataFrame to Excel or CSV
def save_to_file(df, filename, file_format="excel"):
    if file_format == "excel":
        df.to_excel(f"{filename}.xlsx", index=False)
    elif file_format == "csv":
        df.to_csv(f"{filename}.csv", index=False)
    else:
        print("Unsupported file format. Please choose 'excel' or 'csv'.")

# Main execution
if __name__ == "__main__":
    # Get the input file and output format from the user
    input_file = input("Enter the path to the chat file (.txt, .csv, or .xlsx): ")
    output_format = input("Enter the output format ('excel' or 'csv'): ").strip().lower()
    
    # Load chat data
    df = load_chat(input_file)
    
    # Perform sentiment analysis and categorize
    df = analyze_and_categorize_sentiment(df)
    
    # Save to chosen output format
    save_to_file(df, "WhatsApp_Chat_Sentiment_Analysis", file_format=output_format)
    
    print("Sentiment analysis completed and saved successfully!")
