In [None]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import folium
import geopandas as gpd

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def process_json_file(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)

    social_positive_count = 0
    social_negative_count = 0
    social_neutral_count = 0

    for key, value in data.items():
        if value[0].lower() == 'social':
            if value[1].lower() == 'positive':
                social_positive_count += 1
            elif value[1].lower() == 'negative':
                social_negative_count += 1
            elif value[1].lower() == 'neutral':
                social_neutral_count += 1

    return social_positive_count, social_negative_count, social_neutral_count

def process_folder(folder_path):
    social_positive_total = 0
    social_negative_total = 0
    social_neutral_total = 0

    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.json'):
                file_path = os.path.join(root, file)
                social_positive, social_negative, social_neutral = process_json_file(file_path)

                social_positive_total += social_positive
                social_negative_total += social_negative
                social_neutral_total += social_neutral

    return social_positive_total, social_negative_total, social_neutral_total




In [None]:
root_folder = '/content/drive/MyDrive/PSEG Research Project/New Jersey Areas/Areas'

folder_names = []
social_positive_counts = []
social_negative_counts = []
social_neutral_counts = []
averages = []

for folder_name in os.listdir(root_folder):
    folder_path = os.path.join(root_folder, folder_name)
    if os.path.isdir(folder_path):
        print(f"Processing folder: {folder_name}")
        social_positive, social_negative, social_neutral = process_folder(folder_path)
        average = (social_positive + social_negative + social_neutral) / 3

        folder_names.append(folder_name)
        social_positive_counts.append(social_positive)
        social_negative_counts.append(social_negative)
        social_neutral_counts.append(social_neutral)
        averages.append(average)

df = pd.DataFrame({
    'Folder Name': folder_names,
    'Social Positive Count': social_positive_counts,
    'Social Negative Count': social_negative_counts,
    'Social Neutral Count': social_neutral_counts,
    'Average': averages
})


Processing folder: Aberdeen township
Processing folder: Absecon city
Processing folder: Alexandria township
Processing folder: Allamuchy township
Processing folder: Alloway township
Processing folder: Andover borough
Processing folder: Andover township
Processing folder: Atlantic City city
Processing folder: Avon-by-the-Sea borough
Processing folder: Barnegat Light borough
Processing folder: Barnegat township
Processing folder: Bass River township
Processing folder: Bayonne city
Processing folder: Beach Haven borough
Processing folder: Bedminster township
Processing folder: Berkeley township
Processing folder: Berlin borough
Processing folder: Berlin township
Processing folder: Bernards township
Processing folder: Bethlehem township
Processing folder: Beverly city
Processing folder: Blairstown township
Processing folder: Bloomfield township
Processing folder: Bloomingdale borough
Processing folder: Bogota borough
Processing folder: Boonton township
Processing folder: Bordentown city
Pr

In [None]:
df['Average'] = (df['Social Positive Count'] - df['Social Negative Count'])/3
df['Average_P_N'] = (df['Social Positive Count'] - df['Social Negative Count'])/2

In [None]:
from sklearn.preprocessing import MinMaxScaler

positive_values = df[df['Average_P_N'] >= 0]['Average_P_N']
negative_values = df[df['Average_P_N'] < 0]['Average_P_N']

scaler_pos = MinMaxScaler(feature_range=(0, 1))
scaled_pos = scaler_pos.fit_transform(positive_values.values.reshape(-1, 1))

scaler_neg = MinMaxScaler(feature_range=(-1, 0))
scaled_neg = scaler_neg.fit_transform(negative_values.values.reshape(-1, 1))

df.loc[df['Average_P_N'] >= 0, 'Scaled_Average_P_N'] = scaled_pos
df.loc[df['Average_P_N'] < 0, 'Scaled_Average_P_N'] = scaled_neg


In [None]:
df

Unnamed: 0,Folder Name,Social Positive Count,Social Negative Count,Social Neutral Count,Average,Average_P_N,Scaled_Average_P_N
0,Aberdeen township,104,31,212,24.333333,36.5,0.208571
1,Absecon city,213,48,275,55.000000,82.5,0.471429
2,Alexandria township,71,32,177,13.000000,19.5,0.111429
3,Allamuchy township,182,25,238,52.333333,78.5,0.448571
4,Alloway township,34,3,10,10.333333,15.5,0.088571
...,...,...,...,...,...,...,...
330,Woodbury city,45,6,37,13.000000,19.5,0.111429
331,Woodland township,23,6,42,5.666667,8.5,0.048571
332,Woodstown borough,8,1,9,2.333333,3.5,0.020000
333,Woolwich township,35,11,33,8.000000,12.0,0.068571


In [None]:
df.to_csv('social_sentiments_summary.csv', index=False)