In [15]:
import os
import pandas as pd

#### 1. Load the Loughran & McDonald Directory

In [16]:
# Define MasterDictionary class
class MasterDictionary:
    def __init__(self, cols, _stopwords):
        for ptr, col in enumerate(cols):
            if col == '':
                cols[ptr] = '0'
        self.word = cols[0].upper()
        self.sequence_number = int(cols[1])
        self.word_count = int(cols[2])
        self.word_proportion = float(cols[3])
        self.average_proportion = float(cols[4])
        self.std_dev_prop = float(cols[5])
        self.doc_count = int(cols[6])
        self.negative = int(cols[7]) > 0
        self.positive = int(cols[8]) > 0
        self.uncertainty = int(cols[9]) > 0
        self.litigious = int(cols[10]) > 0
        self.strong_modal = int(cols[11]) > 0
        self.weak_modal = int(cols[12]) > 0
        self.constraining = int(cols[13]) > 0
        self.syllables = int(cols[14])
        self.source = cols[15]
        self.stopword = self.word in _stopwords

In [17]:
# Load MasterDictionary function
def load_masterdictionary(file_path, print_flag=False, _stopwords=set()):
    _master_dictionary = {}
    _sentiment_categories = ['negative', 'positive', 'uncertainty', 'litigious', 'strong_modal', 'weak_modal', 'constraining']
    _sentiment_dictionaries = {sentiment: dict() for sentiment in _sentiment_categories}

    with open(file_path, 'r', encoding='utf-8') as f:
        _md_header = f.readline()  # Consume header line
        for line in f:
            cols = line.strip().split(',')
            if cols[0]:  # Ensure there's a word
                _master_dictionary[cols[0]] = MasterDictionary(cols, _stopwords)
                for sentiment in _sentiment_categories:
                    if getattr(_master_dictionary[cols[0]], sentiment, False):
                        _sentiment_dictionaries[sentiment][cols[0].upper()] = True

    if print_flag:
        print(f"Master Dictionary loaded with {len(_master_dictionary)} words.")

    return _master_dictionary, _sentiment_dictionaries

The dictionary is regularly updated here: https://sraf.nd.edu/loughranmcdonald-master-dictionary/


In [18]:
# Load the Loughran-McDonald dictionary
md_path = 'C:/Users/domin/Documents/GitHub/xai_budgeting/001_data/003_experiment/002_justifications/Loughran-McDonald_MasterDictionary_1993-2023.csv'
master_dictionary, sentiment_dictionaries = load_masterdictionary(md_path, print_flag=True)

Master Dictionary loaded with 86553 words.


#### 2. Process Text File

In [19]:
def process_text_file(file_path, sentiment_dictionaries):
    results = []

    # Attempt to read the file and analyze each line, ignoring the first row
    with open(file_path, 'r', encoding='utf-8') as file:
        next(file)  # Skip the first line
        for line in file:
            # Strip the newline character from each line
            text = line.strip()
            
            # Skip empty lines
            if not text:
                continue

            # Initialize dictionary to count occurrences of each sentiment category
            counts = {sentiment: 0 for sentiment in sentiment_dictionaries.keys()}

            # Count sentiment words in the text
            for word in text.upper().split():
                for sentiment, dictionary in sentiment_dictionaries.items():
                    if word in dictionary:
                        counts[sentiment] += 1

            # Add the text snippet and counts to the results list
            results.append({
                'Text': text[:50],  # Include the first 50 characters of the text for reference
                **counts  # Unpack the sentiment counts into the dictionary
            })

    # Convert the list of dictionaries to a DataFrame
    df_results = pd.DataFrame(results)
    
    return df_results

#### 3. Save as .csv file

Save initial submissions

In [20]:
# Define the path of the file to process
text_path = 'C:/Users/domin/Documents/GitHub/xai_budgeting/001_data/003_experiment/002_justifications/initial_submission.txt'

# Process the specified text file
df_results = process_text_file(text_path, sentiment_dictionaries)

# Define the path for the output CSV file
output_csv_path = 'C:/Users/domin/Documents/GitHub/xai_budgeting/001_data/003_experiment/002_justifications/loughran_initial_results.csv'

# Save the DataFrame to a CSV file
df_results.to_csv(output_csv_path, index=False)

print(f"Sentiment analysis results saved to {output_csv_path}")

Sentiment analysis results saved to C:/Users/domin/Documents/GitHub/xai_budgeting/001_data/003_experiment/002_justifications/loughran_initial_results.csv


Save AI resubmissions

In [21]:
# Define the path of the file to process
text_path = 'C:/Users/domin/Documents/GitHub/xai_budgeting/001_data/003_experiment/002_justifications/ai_resubmissions.txt'

# Process the specified text file
df_results = process_text_file(text_path, sentiment_dictionaries)

# Define the path for the output CSV file
output_csv_path = 'C:/Users/domin/Documents/GitHub/xai_budgeting/001_data/003_experiment/002_justifications/loughran_airesubmissions_results.csv'

# Save the DataFrame to a CSV file
df_results.to_csv(output_csv_path, index=False)

print(f"Sentiment analysis results saved to {output_csv_path}")

Sentiment analysis results saved to C:/Users/domin/Documents/GitHub/xai_budgeting/001_data/003_experiment/002_justifications/loughran_airesubmissions_results.csv
