In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import spacy

In [None]:
df =pd.read_csv("/content/drive/MyDrive/NLP/example.csv")

In [None]:
filtered_df = df[df['entity_context'].apply(lambda x: x != '[]' and bool(x))]
filtered_df = filtered_df.reset_index(drop=True)

In [None]:
entity_context_data = filtered_df['entity_context']

In [None]:
entity_context_data

Unnamed: 0,entity_context
0,['case germany right author claim jewish year ...
1,['melachim ii ii king chapter ahaziah twentytw...
2,['hello interest know jewish israeli think ara...
3,['may move israel near future take job offer j...
4,['expect mail israeli university outside israe...
...,...
17762,['see uptick amount comment accuse people supp...
17763,['israeli authority decline release ahmad mana...
17764,['important new palestinian topic understand p...
17765,['past two month leave frustrated upset native...


In [None]:
valence = pd.read_csv("/content/drive/MyDrive/NLP/valence-NRC-VAD-Lexicon.txt", sep="\t", header=None, names=["word", "score"])

In [None]:
valence

Unnamed: 0,word,score
0,generous,1.000
1,love,1.000
2,very positive,1.000
3,magnificent,1.000
4,happily,1.000
...,...,...
19966,disheartening,0.010
19967,mistreated,0.010
19968,toxic,0.008
19969,nightmare,0.005


In [None]:
valence = valence.dropna()

In [None]:
valence.info()

<class 'pandas.core.frame.DataFrame'>
Index: 19970 entries, 0 to 19970
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   word    19970 non-null  object 
 1   score   19970 non-null  float64
dtypes: float64(1), object(1)
memory usage: 468.0+ KB


In [None]:
valence = valence.reset_index(drop=True)

In [None]:
lexicon_dict = dict(zip(valence['word'], valence['score']))

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
def clean_text(text):
    cleaned_text = text.strip("[]").replace("'", "").strip()
    return cleaned_text

In [None]:
entity_context = [clean_text(entity) for entity in entity_context_data]

In [None]:
entities_to_analyze = ["hamas", "israel"]
results = {"hamas": [], "israel": []}

In [None]:
def analyze_entity_sentiment(text, entity, lexicon):
    doc = nlp(text)
    entity_verbs_modifiers = []

    for token in doc:
        if token.text == entity and token.dep_ == "nsubj":
            verbs = [child.text for child in token.children if child.pos_ == "VERB"]
            modifiers = [child.text for child in token.children if child.pos_ in ["ADJ", "ADV"]]
            entity_verbs_modifiers.extend(verbs + modifiers)

    sentiment_scores = [lexicon[word] for word in entity_verbs_modifiers if word in lexicon]
    if sentiment_scores:
        avg_sentiment = sum(sentiment_scores) / len(sentiment_scores)
    else:
        avg_sentiment = None

    return {
        "entity": entity,
        "words": entity_verbs_modifiers,
        "average_sentiment": avg_sentiment
    }

In [None]:
for text in entity_context:
  for entity in entities_to_analyze:
    result = analyze_entity_sentiment(text, entity, lexicon_dict)
    if result['average_sentiment'] is not None:
            results[entity].append(result['average_sentiment'])

In [None]:
hamas_avg_sentiment = sum(results['hamas']) / len(results['hamas'])
israel_avg_sentiment = sum(results['israel']) / len(results['israel'])

In [None]:
print(f"Average sentiment for 'hamas': {hamas_avg_sentiment}")
print(f"Average sentiment for 'israel': {israel_avg_sentiment}")

Average sentiment for 'hamas': 0.5412050473186111
Average sentiment for 'israel': 0.5556480280929228
