In [None]:

from google.colab import drive
import os
import json
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [None]:
def process_json_file(filepath):
    with open(filepath, 'r') as file:
        data = json.load(file)

        transformed_data = {key: [value] for key, value in data.items()}

    with open(filepath, 'w') as file:
        json.dump(transformed_data, file, indent=4)

def process_directory(root_directory):
    for dirpath, dirnames, filenames in os.walk(root_directory):
        print(f"Processing folder: {dirpath}")
        for filename in filenames:
            if filename.endswith('.json'):
                filepath = os.path.join(dirpath, filename)
                process_json_file(filepath)
                print(f"Processed {filepath}")


In [None]:
root_directory = "/content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas"
process_directory(root_directory)
print("Processing complete.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/32.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/33.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/34.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/35.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/36.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/37.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/38.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/39.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/B

In [None]:
sid = SentimentIntensityAnalyzer()

def analyze_sentiment(sentence):
    sentiment_scores = sid.polarity_scores(sentence)
    if sentiment_scores['compound'] >= 0.05:
        return 'positive'
    elif sentiment_scores['compound'] <= -0.05:
        return 'negative'
    else:
        return 'neutral'

def process_json_file(filepath):
    with open(filepath, 'r') as file:
        data = json.load(file)

        for key, value in data.items():
            sentiment_label = analyze_sentiment(key)
            if len(value) == 1:
                value.append(sentiment_label)
            elif len(value) > 1:
                value[1] = sentiment_label
            data[key] = value

    with open(filepath, 'w') as file:
        json.dump(data, file, indent=4)

def process_directory(root_directory):
    for dirpath, dirnames, filenames in os.walk(root_directory):
        print(f"Processing folder: {dirpath}")
        for filename in filenames:
            if filename.endswith('.json'):
                filepath = os.path.join(dirpath, filename)
                process_json_file(filepath)
                print(f"Processed {filepath}")


In [None]:
root_directory = "/content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas"
process_directory(root_directory)
print("Processing complete.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/32.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/33.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/34.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/35.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/36.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/37.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/38.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/Bloomingdale borough/39.json
Processed /content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas/B

In [None]:
from collections import defaultdict
import pandas as pd

def analyze_folder(folder_path):
    combination_counts = defaultdict(int)
    total_count = 0

    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r') as f:
                data = json.load(f)
                for key, value in data.items():
                    if isinstance(value, list) and len(value) >= 2:
                        label1 = value[0]
                        label2 = value[1]
                        combination_counts[(label1, label2)] += 1
                        total_count += 1

    percentage_dict = {f"{k[0]}_{k[1]}": (v / total_count) * 100 for k, v in combination_counts.items()}

    return percentage_dict

def analyze_root_path(root_path):
    results = []

    for folder_name in os.listdir(root_path):
        folder_path = os.path.join(root_path, folder_name)
        if os.path.isdir(folder_path):
            percentages = analyze_folder(folder_path)
            percentages['Folder'] = folder_name
            results.append(percentages)

    df = pd.DataFrame(results).fillna(0)

    return df

root_path = "/content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas"

results_df = analyze_root_path(root_path)

#results_df.to_csv('label_percentages.csv', index=False)


In [None]:
results_df.to_csv('/content/drive/MyDrive/PSEG Research Project/New Jersey Areas/label_percentages.csv', index=False)
