# Sentiment Analysis and Visualization
Note: The sentiment annotation program has to be run first

In [None]:
import csv
import pandas as pd
import seaborn as sns
from unsc_sentiment.corpus_utils import get_debates_by_theme
from config import META_PATH, SPEAKER_PATH, SENTENCE_ANNOTATIONS, SPEECH_ANNOTATIONS

## Analyse speeches with specific debate theme

### Extract debate ids
The topic can be adjusted.\
Note: A theme might have multiple spellings/wordings, so synonyms are used as function input.

In [None]:
def get_debates_by_topic(topic, unsc_meta_path):
    """Get UNSC debate ids concerning the specified debate theme and return ids.

    Args
    ----
    theme (str): String that should occur in the topic of interest.
    unsc_meta_path (str): Path to tsv file containing UNSC meta data.

    Return
    ------
    list of strings: List containing the debate ids.
    """
    debates = []
    topic = topic.lower()
    with open(unsc_meta_path, 'r', encoding='utf-8') as meta_file:
        meta_reader = csv.reader(meta_file, delimiter='\t')
        for row in meta_reader:
            speech_id = row[0]
            if topic in row[3].lower():
                debates.append(speech_id)
    meta_file.close()
    return debates

Uncomment and run this if you have the exact topic name(s):

In [None]:
# theme_synonyms = ["Women and peace and security", "Women, peace, and security"]
# debate_ids = get_debates_by_theme(theme_synonyms, META_PATH)
# print(len(debate_ids))

Uncomment and run this if you want to extract topics containing a certain string:

In [None]:
# topic = "Yugoslavia"
# topic_debate_ids = get_debates_by_topic(topic, META_PATH)
# print(len(debate_ids))

### Get sentiment of the debate speeches and save as csv

In [None]:
# Paths to the output sentiment csvs
topic_sentiment_per_sentence_csv = "data_v3/beispielanalyse/unsc_wps_sentiment_per_sentence.csv"
topic_sentiment_per_speech_csv = "data_v3/beispielanalyse/unsc_wps_sentiment_per_speech.csv"

In [None]:
# Function for checking if a speech is part of a WPS debate
def is_topic(speech_id, topic_ids):
    for sid in topic_ids:
        if sid in speech_id:
            return True
    return False

#### Create the sentence level output csv

In [None]:
topic_sentiment_ps_header = ["speech_id", "paragraphIndex", "paragraphSentenceIndex", "totalSentenceIndex", "sentimentScore"]
with open(topic_sentiment_per_sentence_csv, "w", encoding="utf-8") as topic_sentiment_ps_csv:
    topic_writer = csv.writer(topic_sentiment_ps_csv)
    topic_writer.writerow(topic_sentiment_ps_header)
    # Iterate the annotated UNSC speeches
    # If a speech belongs to the topic debates copy entry to output file
    with open(SENTENCE_ANNOTATIONS, "r", encoding="utf-8") as unsc_sentiment_csv:
        unsc_reader = csv.reader(unsc_sentiment_csv, delimiter="\t")
        # skip header
        next(unsc_reader, None)
        for entry in unsc_reader:
            if is_topic(entry[0], topic_debate_ids):
                speech_id = entry[0].split("/")[-1][:-4]
                p_index = entry[1]
                ps_index = entry[2]
                ts_index = entry[2]
                score = entry[5]
                topic_writer.writerow([speech_id, p_index, ps_index, ts_index, score])
        unsc_sentiment_csv.close()
    topic_sentiment_ps_csv.close()

#### Create the speech level output csv

In [None]:
topic_sentiment_header = ["speech_id", "speechPath", "sentimentScore", "speech_annotation_path"]
with open(topic_sentiment_per_speech_csv, "w", encoding="utf-8") as topic_sentiment_csv:
    topic_writer = csv.writer(topic_sentiment_csv)
    topic_writer.writerow(topic_sentiment_header)
    # Iterate the annotated UNSC speeches
    # If a speech belongs to the topic debates copy entry to output file
    with open(SPEECH_ANNOTATIONS, "r", encoding="utf-8") as unsc_sentiment_csv:
        unsc_reader = csv.reader(unsc_sentiment_csv, delimiter="\t")
        # skip header
        next(unsc_reader, None)
        for entry in unsc_reader:
            if is_topic(entry[0], topic_debate_ids):
                speech_id = entry[0]
                speech_path = entry[1]
                score = entry[2]
                annotation_path = entry[3]
                topic_writer.writerow([speech_id, speech_path, score, annotation_path])
        unsc_sentiment_csv.close()
    topic_sentiment_csv.close()

### Get meta data of the extracted speeches

In [None]:
# Path to the output meta data csv
topic_meta_csv = "data_v3/beispielanalyse/topic_meta_for_analysis.csv"

In [None]:
# Create the output csv 
topic_meta_header = ["speech_id", "year", "month", "day", "country"]
with open(topic_meta_csv, "w", encoding="utf-8") as topic_metacsv:
    topic_writer = csv.writer(topic_metacsv)
    topic_writer.writerow(topic_meta_header)
    # Iterate the speaker meta data of the UNSC speeches
    # If a speech belongs to the topic debates copy entry to output file
    with open(SPEAKER_PATH, "r", encoding="utf-8") as speakerfile:
        speaker_reader = csv.reader(speakerfile, delimiter="\t")
        for entry in speaker_reader:
            if is_topic(entry[12], topic_debate_ids):
                speech_id = entry[12][:-4]
                year = entry[9]
                month = entry[10]
                day = entry[11]
                country = entry[1]
                topic_writer.writerow([speech_id, year, month, day, country])
        speakerfile.close()
    topic_metacsv.close()

### Merge meta and sentiment data

#### Sentence level data

In [None]:
topic_only_sentiment_ps_df = pd.read_csv(topic_sentiment_per_sentence_csv)
topic_meta_df = pd.read_csv(topic_meta_csv)
topic_sentiment_ps_df = pd.merge(topic_only_sentiment_ps_df, topic_meta_df, on="speech_id")

#### Speech level data

In [None]:
topic_only_sentiment_df = pd.read_csv(topic_sentiment_per_speech_csv)
topic_meta_df = pd.read_csv(topic_meta_csv)
topic_sentiment_df = pd.merge(topic_only_sentiment_df, topic_meta_df, on="speech_id")

### Visualize sentence level topic specific sentiment over the years

In [None]:
sns.set_context('paper')

In [None]:
topic_total_fig = sns.relplot(x="year", y="sentimentScore", kind="line", data=topic_sentiment_ps_df, height=4, aspect=1.5)
topic_total_fig.fig.suptitle("Sentence-level sentiment of speeches concerning Yugoslavia", x=0.5, y=1)
#topic_total_fig.savefig("data_v3/beispielanalyse/paper_yugoslavia_all_countries_sentence_sentiment.png") 

### Visualize speech level topic specific sentiment over the years


In [None]:
topic_total_speech_fig = sns.relplot(x="year", y="sentimentScore", kind="line", data=topic_sentiment_df, height=4, aspect=1.5)
topic_total_speech_fig.fig.suptitle("Speech-level sentiment of speeches concerning Yugoslavia", x=0.5, y=1)
#topic_total_speech_fig.savefig("data_v3/beispielanalyse/paper_yugoslavia_all_countries_speech_sentiment.png") 

## Analyse P5 speeches

### Get meta data of all speeches

In [None]:
# Path to the output meta data csv
meta_csv = "data/unsc_meta_for_analysis.csv"

In [None]:
# Create the output csv 
meta_header = ["speech_id", "year", "month", "day", "country"]
with open(meta_csv, "w", encoding="utf-8") as metafile:
    meta_writer = csv.writer(metafile)
    meta_writer.writerow(meta_header)
    with open(SPEAKER_PATH, "r", encoding="utf-8") as speakerfile:
        speaker_reader = csv.reader(speakerfile, delimiter="\t")
        next(speaker_reader, None)
        for entry in speaker_reader:
            speech_id = entry[12][:-4]
            year = entry[9]
            month = entry[10]
            day = entry[11]
            country = entry[1]
            meta_writer.writerow([speech_id, year, month, day, country])
        speakerfile.close()
    metafile.close()

### Merge meta and sentiment data


In [None]:
meta_df = pd.read_csv(meta_csv)
only_sentiment_df = pd.read_csv(SPEECH_ANNOTATIONS, sep="\t")
sentiment_df = pd.merge(only_sentiment_df, meta_df, on="speech_id")

In [None]:
sentiment_df.shape # Should be (82165, 8) for all speeches

### Analyse sentiment of P5 speeches


In [None]:
# Names of the 5 permanent members 
p5 = ["China", "France", "Russian Federation", "United Kingdom Of Great Britain And Northern Ireland", "United States Of America"]
p5_df = sentiment_df[sentiment_df["country"].isin(p5)]

In [None]:
sns.set_context('paper', font_scale =2, rc={"grid.linewidth": 1, "legend.fontsize": 15})

In [None]:
sns_plot = sns.relplot(x="year", y="sentimentScore", hue="country", kind="line",ci=None, style="country", data=p5_df, height=8, aspect=1.5, linewidth=2, legend="brief")
sns_plot.fig.suptitle("Speech-level sentiment of all P5 speeches", x=0.5, y=1)
#sns_plot.savefig("data_v3/beispielanalyse/poster_p5_speech_sentiment.png") 