In [None]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import folium
import geopandas as gpd

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Weighted Social Negative Index (WSNI)

In [None]:
df_score=pd.read_csv('/content/drive/MyDrive/PSEG Research Project/Social/social_sentiments_summary.csv')

In [None]:
w_pos = 0.2
w_neg = 0.6
w_neu = 0.2

def calculate_wn_sns(row):
    S_pos = row['Social Positive Count']
    S_neg = row['Social Negative Count']
    S_neu = row['Social Neutral Count']

    S_total = S_pos + S_neg + S_neu

    if S_total == 0:
        return 0

    N_pos = S_pos / S_total
    N_neg = S_neg / S_total
    N_neu = S_neu / S_total

    WN_pos = N_pos * w_pos
    WN_neg = N_neg * w_neg
    WN_neu = N_neu * w_neu

    WN_SNS = WN_neg - (WN_pos + WN_neu)
    return WN_SNS

df_score['WN-SNS'] = df_score.apply(calculate_wn_sns, axis=1)


In [None]:
df_score['WN-SNS']=df_score['WN-SNS']*100

### Ranges and Scales of the WN-SNS

The Weighted and Normalized Social Negative Score (WN-SNS) is designed to provide a nuanced measure of social sentiment, particularly the negative sentiment, relative to the total interactions. Understanding the ranges and scales of this score helps in interpreting the results effectively.

#### 1. **Normalized Values Range**

The normalized values (\(N_{pos}\), \(N_{neg}\), \(N_{neu}\)) are ratios, and their range is from 0 to 1.

- If \(S_{pos} = 0\), \(N_{pos} = 0\).
- If \(S_{neg} = 0\), \(N_{neg} = 0\).
- If \(S_{neu} = 0\), \(N_{neu} = 0\).

Conversely, if \(S_{pos}\), \(S_{neg}\), or \(S_{neu}\) are the only counts and the others are zero, their normalized value would be 1.

#### 2. **Weighted Normalized Values Range**

The weighted normalized values (\(WN_{pos}\), \(WN_{neg}\), \(WN_{neu}\)) also range from 0 to the weight assigned to them:

- \(WN_{pos}\) ranges from 0 to \(w_{pos}\) (0.2 in this case).
- \(WN_{neg}\) ranges from 0 to \(w_{neg}\) (0.6 in this case).
- \(WN_{neu}\) ranges from 0 to \(w_{neu}\) (0.2 in this case).

#### 3. **WN-SNS Score Range**

The WN-SNS score can theoretically range from -0.4 to 0.6, given the weight configuration used:

\[ \text{WN-SNS} = WN_{neg} - (WN_{pos} + WN_{neu}) \]

- **Minimum Value**: When \(S_{neg} = 0\), \(S_{pos}\) and \(S_{neu}\) are at their maximum relative values (i.e., \(S_{pos} + S_{neu} = S_{total}\)), the minimum WN-SNS would be:
\[ \text{WN-SNS}_{\min} = 0 - (0.2 + 0.2) = -0.4 \]

- **Maximum Value**: When \(S_{pos} = 0\) and \(S_{neu} = 0\), and \(S_{neg} = S_{total}\), the maximum WN-SNS would be:
\[ \text{WN-SNS}_{\max} = 0.6 - (0 + 0) = 0.6 \]

#### 4. **Interpretation of the Scale**

- **Positive WN-SNS**: Indicates a higher proportion of negative interactions compared to positive and neutral interactions. The closer the score is to 0.6, the more dominant the negative interactions are.
- **Negative WN-SNS**: Indicates that positive and neutral interactions outweigh negative interactions. The closer the score is to -0.4, the more dominant the positive and neutral interactions are.
- **WN-SNS near Zero**: Indicates a balance between negative, positive, and neutral interactions. A score close to zero suggests that no single type of interaction is overwhelmingly dominant.

### Practical Considerations

1. **Weight Adjustments**: The choice of weights (\(w_{pos}\), \(w_{neg}\), \(w_{neu}\)) significantly affects the WN-SNS. These should be chosen based on the specific context or desired emphasis in the analysis.
2. **Handling Zero Totals**: As previously noted, if \(S_{total} = 0\), the score should be set to a neutral value (e.g., 0) to avoid undefined behavior.
3. **Context-Specific Interpretation**: The interpretation of the WN-SNS should consider the context in which it is applied. For example, in a highly positive environment, even a slightly negative WN-SNS might be significant.

By understanding these ranges and scales, the WN-SNS can be effectively used to measure and interpret social sentiment in a detailed and nuanced manner.

# Crime Index

In [None]:
df_crime=pd.read_excel('/content/drive/MyDrive/PSEG Research Project/Social/Crime.xls')

In [None]:
#df_score['Folder Name'] = df_score['Folder Name'].str.lower()
#df_crime['City'] = df_crime['City'].str.lower()

In [None]:
sum_columns = df_crime.iloc[:, 2:13].sum(axis=1)
result = sum_columns / df_crime['Population']
df_crime['Crime_Index'] = result

In [None]:
df_crime['Crime_Index']=df_crime['Crime_Index']*10000

In [None]:
df_crime['Crime_Index'].describe()

Unnamed: 0,Crime_Index
count,488.0
mean,374.233031
std,1851.312891
min,0.0
25%,119.506142
50%,214.009083
75%,359.397622
max,40588.235294


In [None]:
df_crime.to_csv('/content/drive/MyDrive/PSEG Research Project/Social/crime_index.csv', index=False)

In [None]:
df_crime.head()

Unnamed: 0,City,Population,Violent\ncrime,Murder and\nnonnegligent\nmanslaughter,Rape1,Robbery,Aggravated\nassault,Property\ncrime,Burglary,Larceny-\ntheft,Motor\nvehicle\ntheft,Arson,Crime_Index
0,Aberdeen Township,18378.0,11.0,0.0,2.0,2.0,7.0,163.0,18.0,137.0,8.0,0.0,189.35684
1,Absecon,8252.0,17.0,1.0,1.0,5.0,10.0,181.0,16.0,159.0,6.0,0.0,479.883665
2,Allendale,6865.0,3.0,0.0,1.0,0.0,2.0,30.0,4.0,23.0,3.0,0.0,96.13984
3,Allenhurst,488.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,4.0,3.0,0.0,286.885246
4,Allentown,1809.0,0.0,0.0,0.0,0.0,0.0,5.0,1.0,4.0,0.0,0.0,55.27916


In [None]:
df_score.head()

Unnamed: 0,Folder Name,Social Positive Count,Social Negative Count,Social Neutral Count,Average,Average_P_N,Scaled_Average_P_N,WN-SNS
0,Aberdeen township,104,31,212,24.333333,36.5,0.208571,-12.853026
1,Absecon city,213,48,275,55.0,82.5,0.471429,-12.835821
2,Alexandria township,71,32,177,13.0,19.5,0.111429,-10.857143
3,Allamuchy township,182,25,238,52.333333,78.5,0.448571,-15.505618
4,Alloway township,34,3,10,10.333333,15.5,0.088571,-14.893617


# Map

In [None]:
df_map=pd.read_csv("/content/drive/MyDrive/PSEG Research Project/New Jersey Areas/NJ_with_lat_long.csv")
#df_map['Municipality'] = df_map['Municipality'].str.lower()
df_map

Unnamed: 0,Municipality,County,MRI Score,Latitude,Longitude,Bing Search Link
0,Aberdeen township,Monmouth,2.33,40.407114,-74.221629,https://www.bing.com/search?q=Aberdeen townshi...
1,Absecon city,Atlantic,-2.94,39.436241,-74.517871,https://www.bing.com/search?q=Absecon city+New...
2,Alexandria township,Hunterdon,3.95,40.591417,-75.012824,https://www.bing.com/search?q=Alexandria towns...
3,Allamuchy township,Warren,4.03,40.921765,-74.810166,https://www.bing.com/search?q=Allamuchy townsh...
4,Alloway township,Salem,0.83,39.560857,-75.362397,https://www.bing.com/search?q=Alloway township...
...,...,...,...,...,...,...
348,Woodbury city,Gloucester,-6.38,39.845233,-75.147237,https://www.bing.com/search?q=Woodbury city+Ne...
349,Woodland township,Burlington,-3.83,39.842075,-74.523058,https://www.bing.com/search?q=Woodland townshi...
350,Woodstown borough,Salem,-1.92,39.651287,-75.329138,https://www.bing.com/search?q=Woodstown boroug...
351,Woolwich township,Gloucester,2.89,39.741482,-75.348055,https://www.bing.com/search?q=Woolwich townshi...


In [None]:
df_x = pd.merge(df_score, df_map, left_on='Folder Name', right_on='Municipality', how='inner')
df_x

Unnamed: 0,Folder Name,Social Positive Count,Social Negative Count,Social Neutral Count,Average,Average_P_N,Scaled_Average_P_N,WN-SNS,Municipality,County,MRI Score,Latitude,Longitude,Bing Search Link
0,Aberdeen township,104,31,212,24.333333,36.5,0.208571,-12.853026,Aberdeen township,Monmouth,2.33,40.407114,-74.221629,https://www.bing.com/search?q=Aberdeen townshi...
1,Absecon city,213,48,275,55.000000,82.5,0.471429,-12.835821,Absecon city,Atlantic,-2.94,39.436241,-74.517871,https://www.bing.com/search?q=Absecon city+New...
2,Alexandria township,71,32,177,13.000000,19.5,0.111429,-10.857143,Alexandria township,Hunterdon,3.95,40.591417,-75.012824,https://www.bing.com/search?q=Alexandria towns...
3,Allamuchy township,182,25,238,52.333333,78.5,0.448571,-15.505618,Allamuchy township,Warren,4.03,40.921765,-74.810166,https://www.bing.com/search?q=Allamuchy townsh...
4,Alloway township,34,3,10,10.333333,15.5,0.088571,-14.893617,Alloway township,Salem,0.83,39.560857,-75.362397,https://www.bing.com/search?q=Alloway township...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
348,Woodbury city,45,6,37,13.000000,19.5,0.111429,-14.545455,Woodbury city,Gloucester,-6.38,39.845233,-75.147237,https://www.bing.com/search?q=Woodbury city+Ne...
349,Woodland township,23,6,42,5.666667,8.5,0.048571,-13.239437,Woodland township,Burlington,-3.83,39.842075,-74.523058,https://www.bing.com/search?q=Woodland townshi...
350,Woodstown borough,8,1,9,2.333333,3.5,0.020000,-15.555556,Woodstown borough,Salem,-1.92,39.651287,-75.329138,https://www.bing.com/search?q=Woodstown boroug...
351,Woolwich township,35,11,33,8.000000,12.0,0.068571,-8.860759,Woolwich township,Gloucester,2.89,39.741482,-75.348055,https://www.bing.com/search?q=Woolwich townshi...


In [None]:
df_x.columns

Index(['Folder Name', 'Social Positive Count', 'Social Negative Count',
       'Social Neutral Count', 'Average', 'Average_P_N', 'Scaled_Average_P_N',
       'WN-SNS', 'Municipality', 'County', 'MRI Score', 'Latitude',
       'Longitude', 'Bing Search Link'],
      dtype='object')

In [None]:
df_x=df_x[['Social Negative Count','WN-SNS', 'Municipality', 'County', 'MRI Score', 'Latitude',
       'Longitude']]

In [None]:
df_x.to_csv('sample.csv')

In [None]:
SN_geo_cri=pd.read_csv('/content/drive/MyDrive/PSEG Research Project/Social/SN_geo_cri.csv')

In [None]:
SN_geo_cri

Unnamed: 0.1,Unnamed: 0,Social Negative Count,WN-SNS,Municipality,County,MRI Score,Latitude,Longitude,Crime_Index
0,0,31,-12.853026,aberdeen township,Monmouth,2.33,40.407114,-74.221629,189.356840
1,1,48,-12.835821,absecon city,Atlantic,-2.94,39.436241,-74.517871,479.883665
2,5,37,-13.051643,andover borough,Sussex,-1.39,40.988875,-74.741991,53.908356
3,6,7,-12.911392,andover township,Sussex,0.94,41.015609,-74.729750,55.279160
4,7,24,-13.663366,atlantic city,Atlantic,-20.13,39.364285,-74.422935,761.673330
...,...,...,...,...,...,...,...,...,...
285,348,6,-14.545455,woodbury city,Gloucester,-6.38,39.845233,-75.147237,976.810999
286,349,6,-13.239437,woodland township,Burlington,-3.83,39.842075,-74.523058,248.456911
287,350,1,-15.555556,woodstown borough,Salem,-1.92,39.651287,-75.329138,278.503046
288,351,11,-8.860759,woolwich township,Gloucester,2.89,39.741482,-75.348056,79.138968


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import folium
import geopandas as gpd
from matplotlib import cm
from matplotlib.colors import Normalize
import matplotlib.colors as colors
from branca.element import Template, MacroElement

df_x = SN_geo_cri
df_x['Municipality'] = df_x['Municipality'].str.title()

total_sum = df_x['Social Negative Count'].sum()

df_x['SN_ratio'] = df_x['Social Negative Count'] / total_sum

vmin = np.percentile(df_x['WN-SNS'], 10)
vmax = np.percentile(df_x['WN-SNS'], 90)

norm = colors.Normalize(vmin=vmin, vmax=vmax)

color_map = cm.ScalarMappable(norm=norm, cmap='coolwarm')

m = folium.Map(location=[40.0583, -74.4057], zoom_start=8)

for idx, row in df_x.iterrows():
    color = color_map.to_rgba(row['WN-SNS'])
    color_hex = colors.to_hex(color)

    popup = folium.Popup(
        f"<b>Area:</b> {row['Municipality']}<br><b>Weighted Social Negative Index (WSNI):</b> {row['WN-SNS']}<br><b>Crime index:</b> {row['Crime_Index']}<br><b>Social Negative Articles Ratio:</b> {row['SN_ratio']}",
        max_width=300
    )

    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=popup,
        icon=folium.Icon(color="gray", icon_color=color_hex)  # Set icon color to gray for visibility, but actual marker color will be from color
    ).add_to(m)



legend_html = '''
<div style="
    position: fixed;
    bottom: 50px; left: 50px; width: 300px; height: auto;
    border:2px solid grey; z-index:9999; font-size:14px;
    background-color:white; padding: 10px;
">
    <b>Legend</b><br>
    <b>Weighted Social Negative Index (WSNI):</b> Measure of social sentiment, particularly the negative sentiment, relative to the total interactions. Larger WSNI indicates a higher proportion of negative interactions compared to positive and neutral interactions.<br>
    <b>Crime index:</b> Total number of reported crimes divided by population.<br>
    <b>Social Negative Articles Ratio:</b> Ratio of the total number of social negative articles in this area to the total number of social negative articles.
</div>
'''

legend = MacroElement()
legend._template = Template(legend_html)
m.get_root().add_child(legend)
m.save('WN_SNS_map.html')
m


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import folium
import geopandas as gpd
from matplotlib import cm
from matplotlib.colors import Normalize
import matplotlib.colors as colors
from branca.element import Template, MacroElement

df_x = SN_geo_cri
df_x['Municipality'] = df_x['Municipality'].str.title()

total_sum = df_x['Social Negative Count'].sum()

df_x['SN_ratio'] = df_x['Social Negative Count'] / total_sum

vmin = np.percentile(df_x['Crime_Index'], 10)
vmax = np.percentile(df_x['Crime_Index'], 90)

norm = colors.Normalize(vmin=vmin, vmax=vmax)

color_map = cm.ScalarMappable(norm=norm, cmap='PRGn')

m = folium.Map(location=[40.0583, -74.4057], zoom_start=8)

for idx, row in df_x.iterrows():
    color = color_map.to_rgba(row['Crime_Index'])
    color_hex = colors.to_hex(color)

    popup = folium.Popup(
        f"<b>Area:</b> {row['Municipality']}<br><b>Crime index:</b> {row['Crime_Index']}<br><b>Weighted Social Negative Index (WSNI)</b> {row['Crime_Index']}<br><b>Social Negative Articles Ratio:</b> {row['SN_ratio']}",
        max_width=300  # Adjust the max width as needed
    )

    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=popup,
        icon=folium.Icon(color="gray", icon_color=color_hex)
    ).add_to(m)


m
#m.save('Crime_map.html')
