# Visualisation of tweets and their sentiment on EU map

## Dataframe should contain:
    - Full text
    - Coordinates 
    - Sentiment score
Optionally:
    - Username 
    -Timestamp for a timeline filter

In [1]:
import folium
import pandas as pd
import webbrowser
import json

In [42]:
# load for ger data
tweets = []
with open('stayathome_ger_march_42037_tweets.json') as f:
    for line in f:
        tweets.append(json.loads(line))
print(len(tweets))

tweets


42037


[{'created_at': 'Tue Mar 31 21:29:00 +0000 2020',
  'id': 1245100809286815745,
  'id_str': '1245100809286815745',
  'full_text': 'Leider kein Aprilscherz. Stadtverwaltung #Blumberg verhängt #Ausgangssperre für #Riedböhringen ab dem 1. April 0Uhr.\n22 der 1000 Einwohner sind mit #COVID19 infiziert.\n#StayAtHome\n#vs\nhttps://t.co/OUGu37qQFB',
  'truncated': False,
  'display_text_range': [0, 207],
  'entities': {'hashtags': [{'text': 'Blumberg', 'indices': [41, 50]},
    {'text': 'Ausgangssperre', 'indices': [60, 75]},
    {'text': 'Riedböhringen', 'indices': [80, 94]},
    {'text': 'COVID19', 'indices': [148, 156]},
    {'text': 'StayAtHome', 'indices': [168, 179]},
    {'text': 'vs', 'indices': [180, 183]}],
   'symbols': [],
   'user_mentions': [],
   'urls': [{'url': 'https://t.co/OUGu37qQFB',
     'expanded_url': 'https://www.schwarzwaelder-bote.de/inhalt.blumberg-coronavirus-ausgangssperre-in-riedboehringen.f14d7407-a4e7-42dd-aa9f-01f3ed3b9d7f.html',
     'display_url': 'schwarzwa

In [43]:
coords = [[tweet['created_at'], tweet['user']['name'], tweet['coordinates']['coordinates'][0], tweet['coordinates']['coordinates'][1],\
           tweet['full_text']] for tweet in tweets if tweet['coordinates']]
print(len(coords))

461


In [44]:
# use center point of bounding box
place = [[tweet['created_at'], tweet['user']['name'], \
          (float(tweet['place']['bounding_box']['coordinates'][0][0][0])\
          + float(tweet['place']['bounding_box']['coordinates'][0][1][0])) / 2, \
           (float(tweet['place']['bounding_box']['coordinates'][0][0][1])\
          + float(tweet['place']['bounding_box']['coordinates'][0][3][1])) / 2, \
           tweet['full_text']] for tweet in tweets if tweet['place'] and not tweet['coordinates']]
print(len(place))

2036


In [45]:
coords= pd.DataFrame(coords)
coords.columns = ['time', 'username', 'longitude', 'latitude', 'text']


In [46]:
place = pd.DataFrame(place)
place.columns = ['time', 'username', 'longitude', 'latitude', 'text']


#### Adding sentiments and colour bins for mapping

In [47]:
from textblob_de import TextBlobDE

In [48]:
def sentiment_analyzer_scores(sentence):
    text = TextBlobDE(sentence)
    score = text.sentiment.polarity
    return score

In [49]:
coords["sentiment"] = coords.apply(lambda x: sentiment_analyzer_scores(x["text"]), axis=1)




In [50]:
coords[coords["sentiment"]==-1.00]["text"]

72     Schlimm, was diese Corona-Ausgangssperre mit e...
233    So traurig ohne Euch!#tanzwerkzade #stayathome...
445    Ist nicht alles schlecht die Tage #corona #sta...
Name: text, dtype: object

In [51]:
place["sentiment"] = place.apply(lambda x: sentiment_analyzer_scores(x["text"]), axis=1)


In [52]:
place['marker_color'] = pd.cut(coords['sentiment'], bins=5, 
                              labels=['red', 'arpicot', 'white', 'palegreen', 'lime'])

In [53]:
coords['marker_color'] = pd.cut(coords['sentiment'], bins=5, 
                              labels=['red', 'arpicot', 'white', 'palegreen', 'lime'])


In [54]:
coords.head()

Unnamed: 0,time,username,longitude,latitude,text,sentiment,marker_color
0,Tue Mar 31 20:57:26 +0000 2020,Stephan L.,6.9667,50.95,Prinz von Klopapier 3/3\n\nMe on the #stayatho...,0.0,white
1,Tue Mar 31 18:16:53 +0000 2020,MFS Fussballtraining,11.5833,48.15,#stayathome #beatthecoach \nFerdi hat alles ra...,0.0,white
2,Tue Mar 31 18:27:45 +0000 2020,Sue,13.7333,51.0416,Oohhkay...nach #Corona muss ich wahrscheinlich...,-0.35,arpicot
3,Tue Mar 31 16:50:51 +0000 2020,Sportclub_Alstertal_Langenhorn,10.0014,53.55,Neue Trainingsvideos für Euch online.\nViel Sp...,0.0,white
4,Tue Mar 31 16:54:21 +0000 2020,Dirk Schoemakers,6.348404,51.722235,==== HIS &amp; HERS ====\nAuch bei der Arbeit ...,0.0,white


In [55]:
df = place.append(coords)
df.head()
len(df)

2497

In [31]:
df['marker_color'] = pd.cut(coords['sentiment'], bins=5, 
                              labels=['red', 'orange', 'yellow', 'lime', 'green'])


In [16]:
#df = pd.DataFrame(coords, columns = ["username", "longitude", "latitude", "text"])

def plotDots(dataframe):
    # reading geodata into folium map
    folium.CircleMarker(location=[float(dataframe.latitude), float(dataframe.longitude)],
                        radius=6,
                        popup = str(dataframe.sentiment)+": "+dataframe.text,
                        color=[str(dataframe.marker_color)],
                        fill=True,
                        fill_color=[str(dataframe.marker_color)]).add_to(twitter_map)

def plotDots2(dataframe):
    # reading geodata into folium map
    folium.CircleMarker(location=[float(dataframe.latitude), float(dataframe.longitude)],
                        radius=6,
                        popup = str(dataframe.sentiment)+": "+dataframe.text,
                        color = [str(dataframe.marker_color)],
                        fill=True,
                        fill_color=str(dataframe.marker_color)).add_to(twitter_map)
    
# create folium map 
twitter_map = folium.Map(prefer_canvas=True)

# Apply plotDot to dataframe
df.apply(plotDots, axis = 1)
#place.apply(plotDots2, axis = 1)

# zoom in
twitter_map.fit_bounds(twitter_map.get_bounds())

# save map
twitter_map.save('visualisations/testall.html')
# open html file in new tab
webbrowser.open_new_tab('visualisations/testall.html')

True

In [32]:
from datetime import datetime

# dtime = tweet['created_at']
dtime = 'Fri Oct 09 10:01:41 +0000 2015'
new_datetime = datetime.strftime(datetime.strptime(dtime,'%a %b %d %H:%M:%S +0000 %Y'), '%Y-%m-%d %H:%M:%S')
print((new_datetime))

2015-10-09 10:01:41


ValueError: time data '2020-05-31' does not match format '%Y-%m-%d %H:%M:%S'

In [56]:
#fix timestamps for usability 

def converttime(timestamp):
    return datetime.strftime(datetime.strptime(timestamp, '%a %b %d %H:%M:%S +0000 %Y'), '%Y-%m-%d')

df["time"]= [converttime(i) for i in df["time"]]

print(df.head())

         time           username  longitude   latitude  \
0  2020-03-31               Lisa   7.310243  49.954493   
1  2020-03-31  Anne Lützelberger  13.424606  52.506701   
2  2020-03-31       Haus am Berg   8.488862  51.179394   
3  2020-03-31               llit   7.562621  51.484870   
4  2020-03-31   Madame Charlotte   9.761433  52.379798   

                                                text  sentiment marker_color  
0  Was würdet ihr sagen #Maskenpflicht ....😷\n#CO...       0.00        white  
1  Danke für diesen sachlichen Beitrag! 🦠💡#corona...       0.50        white  
2  Liebe treue Freunde &amp; Gäste,\nIhr fehlt un...       0.46      arpicot  
3  Dito, kannst #Kind1 schlecht vermitteln. #Coro...      -0.50        white  
4  Ich habe heute meinen weiblichen Familienmitgl...       0.35        white  


In [57]:
df.to_csv("visualisation_data_de.csv", index=True)