In [None]:
!pip install variationist

In [None]:
!pip install stopwordsiso

In [14]:
import pandas as pd
from variationist import Inspector, InspectorArgs, Visualizer, VisualizerArgs

In [15]:
import stopwordsiso
from wordcloud import WordCloud
import matplotlib.pyplot as plt

In [16]:
english_stopwords = list(stopwordsiso.stopwords("en"))
english_stopwords.extend(["super", "wien", "pause", "germany", "austria", "mega", "fein", "pre", "obwohl", "beste", "franzen", "lugar", "österreich", "imgur", "buffet"])

In [17]:
locations = ['Allianz_Stadion',
             'Donauinsel',
             'Ernst_Happel_Stadion',
             'Heldenplatz',
             'Rathausplatz',
             'Schottenring',
             'Waehring']

In [18]:
# from google.colab import drive
# drive.mount('/content/drive')

### Show distribution of emotions in Vienna locations.
Here we take the average emotion scores for anger, anticipation, disgust, fear, joy, sadness, surprise, and trust, and show how their distribution changes across different areas of the city.


In [19]:
emotion_plot_data = {}

emotions = ["anger", "anticipation", "disgust", "fear", "joy", "sadness", "surprise", "trust"]
for location in locations:
    emotion_distrib= []
    dataframe = pd.read_csv(f"./content3/precrisis-text-analysis/{location}_dataset_EmotionsNorm_mean.tsv", sep="\t")
    for emotion in emotions:
        emotion_distrib.append(float(dataframe[emotion].values[0]))
    emotion_plot_data[location] = emotion_distrib

In [None]:
cmap = plt.cm.gist_heat

for location in locations:
    print("\n\n")
    print(location)
    fig1, ax1 = plt.subplots()
    plt.rcParams['font.size'] = 10.5
    explode = [0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.005]
    outer_colors = [cmap(.5), cmap(.55), cmap(.6), cmap(.65), cmap(.7), cmap(.75), cmap(.8), cmap(.85)]
    # outer_colors.reverse()
    ax1.axis('equal')
    _, _, autopcts = ax1.pie(emotion_plot_data[location], explode=explode, labels=emotions, autopct='%1.f%%', shadow=False,  pctdistance=0.8, radius=1, colors = outer_colors, startangle=270)
    plt.setp(autopcts, **{'color':'black', 'fontsize':10})
    centre_circle = plt.Circle((0,0),0.40,fc='white')
    fig1.gca().add_artist(centre_circle)
    plt.tight_layout()
    plt.show()

### Create wordclouds for each emotion
Here, for each location we are analyzing, we create a word cloud for negative emotions: disgust, fear, and anger, showing the most relevant words for each emotion in a specific area. To calculate this, we used a modified version of PMI.


In [None]:
wordcloud_dict = {}
res_dict = {}

for location in locations:
    dataframe = pd.read_csv(f"./content3/precrisis-text-analysis/{location}_dataset_EmotionsNorm.tsv", sep="\t")
    print(location.upper())
    wordcloud_dict[location] = {}
    res_dict[location] = {}
    for emotion in ["nrcPosNegDict_Negative"]:
        curr_emotion = pd.cut(dataframe[emotion],bins=2,retbins=False,labels=["low", "high"])
        dataframe[emotion] = curr_emotion

        ins_args = InspectorArgs(text_names=["text_prep"],
                            var_names=[emotion],
                            var_types=["nominal"],
                            metrics=["npw_relevance"],
                            n_tokens=1,
                            language="de",
                            stopwords=True,
                            custom_stopwords=english_stopwords,
                            lowercase=False
                            )

        # Run the inspector and get the results
        res = Inspector(dataframe.dropna(), args=ins_args).inspect()
        res_dict[location][emotion] = res
        for bin in res["metrics"]["npw_relevance"][emotion]:
            if len(res["metrics"]["npw_relevance"][emotion][bin]) > 2 and bin == "high":
                print(res["metrics"]["npw_relevance"][emotion][bin])
                wc = WordCloud(font_path="./content3/precrisis-text-analysis/Symbola.otf", width=800, height=400, max_words=20, background_color='white', colormap='gist_heat_r', random_state=4).generate_from_frequencies(res["metrics"]["npw_relevance"][emotion][bin])
                wordcloud_dict[location][emotion] = wc


In [None]:
wordcloud_dict['Donauinsel']

In [None]:
for location in wordcloud_dict:
    for emotion in wordcloud_dict[location]:
        wc = wordcloud_dict[location][emotion]
        print(wc)
        print("\n\n")
        print(f"{location.upper()} - {emotion.split('_')[-1]} emotions")
        plt.imshow(wc, interpolation='bilinear')
        plt.axis('off')
        plt.show()

In [None]:
locations = ['Allianz_Stadion',
             'Donauinsel',
             'Ernst_Happel_Stadion',
             'Heldenplatz',
             'Rathausplatz',
             'Schottenring',
             'Waehring']
emotions = ["anger", "anticipation", "disgust", "fear", "joy", "sadness", "surprise", "trust"]

emotion_plot_data = []

for location in locations:
    dataframe = pd.read_csv(f"./content3/precrisis-text-analysis/{location}_dataset_EmotionsNorm_mean.tsv", sep="\t")
    for emotion in emotions:
        d = {"city": "Vienna","location": location, "emotion": emotion, "score": float(dataframe[emotion].values[0])}
        base = {
                "measurement": "emotions",
                "tags": d,
                "fields": d,
            }
        emotion_plot_data.append(base)

emotion_plot_data
    



In [None]:
import base64

plots = []
for location in wordcloud_dict:
    for emotion in wordcloud_dict[location]:
        wc = wordcloud_dict[location][emotion]
        print("\n\n")
        print(f"{location.upper()} - {emotion.split('_')[-1]} emotions")
        plt.figure(figsize=(20, 20))
        plt.imshow(wc, interpolation='bilinear')
        plt.axis('off')
        plt.savefig("myimage.png", format='png')
        with open("myimage.png", "rb") as image_file:
            encoded_string = base64.b64encode(image_file.read())
            base = {
                "measurement": "wordclouds",
                "tags": {"city": "Vienna", "location": location},
                "fields": {"location": location, "image": encoded_string.decode("utf-8")},
            }
            plots.append(base)
print(plots)

In [26]:
import json
all = emotion_plot_data + plots

with open("text_analysis_vienna.json", "w") as j:
    json.dump(all, j)