In [4]:
# import networkx as nx
import json
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
import mgclient

In [6]:
# Make a connection to the database
conn = mgclient.connect(host='127.0.0.1', port=7687)

# Create a cursor for query execution
cursor = conn.cursor()

def query(query):
    cursor.execute(query)
    return cursor.fetchall()

## community detection

In [31]:
# community detection, louvain algo

q = '''CALL community_detection.get()
YIELD node, community_id
WITH node, community_id
WHERE 'User' IN labels(node)
SET node.community = community_id
RETURN node, community_id;'''

res = query(q)

In [43]:
def extract(node):
    data = {
        'id': node.id,
        'label': node.labels,
        **node.properties
    }
    return data

users = pd.DataFrame({i[0].id:extract(i[0]) for i in res}).T

In [66]:
users

Unnamed: 0,id,label,community,created_at,description,favourites_count,followers_count,friends_count,ident,is_blue_verified,listed_count,location,media_count,name,normal_followers_count,possibly_sensitive,screen_name,statuses_count,verified
805384,805384,{User},0,2014-03-15T10:39:00+00:00,"apotheker, schepen van financi√´n Evergem, fede...",807,4390,605,2390807984,True,49,,521,kathleen depoorter,4390,False,KDepoorterMP,2328,False
805385,805385,{User},6,2010-05-15T08:47:16+00:00,"D√©put√© Bruxellois , Pr√©sident du Groupe PS - @...",14789,4264,1898,144099318,False,86,Evere....,2070,Ridouane Chahid,4264,False,RidouaneChahid,15501,False
805386,805386,{User},1,2014-08-21T13:58:31+00:00,"Welcome to Dominion! Collect heroes, create te...",16,35541,0,2752235233,True,111,,2,Hero Wars,35541,False,HeroWarsWeb,2,False
805387,805387,{User},3,2021-03-19T12:27:36+00:00,The platform engineering guy. üëæ I ask the best...,1046,31265,189,1372887446669647875,True,245,,933,Luca,31265,False,luca_cloud,2776,False
805388,805388,{User},4,2011-09-09T14:05:20+00:00,Parlementslid | West-Vlaams Lijsttrekker @de_N...,8548,18505,1851,370709666,True,213,Koksijde - Oostduinkerke,4249,Sander Loones,18505,False,SanderLoones,19280,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
896001,896001,{User},8,2024-04-30T23:47:59+00:00,√âducation. Enseignant. Voyage. vid√©o. √âcriture,0,0,0,1785456092090171392,False,0,France,0,Anne-Claire Sylvieü§µüçì,0,False,captainpinprick,3,False
896002,896002,{User},8,2023-12-10T08:16:32+00:00,Enraptured by #limitless üî•NFT Marketplaceüî• üî• C...,0,19,0,1733762597025972224,False,0,Naples,1,Nigel Calen,19,False,calen13662,1,False
896003,896003,{User},6,2015-01-13T17:14:06+00:00,,760,6,80,2976761255,False,0,,7,Vicduquenoy,6,False,vicduquenoy,364,False
896004,896004,{User},6,2022-12-13T10:03:37+00:00,,609,13,41,1602605066627727360,False,0,,0,Martine Demillequand,13,False,MartineDemille1,10,False


In [82]:
# a large number of identified communities are super small (1-4 members), while the largest on has over 20k member
users.groupby('community')['id'].count().value_counts().sort_index()

1        1717
2         199
3          19
4           8
5           1
7           1
11          1
13          1
22          1
25          2
26          1
38          1
54          1
70          2
77          1
92          1
96          1
97          1
106         1
128         1
131         1
133         1
155         1
185         1
200         1
203         1
212         1
248         1
261         1
387         1
393         1
464         1
484         1
485         1
575         1
625         1
670         1
842         1
861         1
887         1
1003        1
1023        1
1182        1
1321        1
1523        1
1551        1
1876        1
2344        1
2361        1
2701        1
2733        1
3013        1
4298        1
4655        1
6336        1
8522        1
11189       1
21424       1
Name: id, dtype: int64

## engagement by politican

In [87]:
# engagements by politician 

q = '''MATCH (u:User)-[:MEMBER_OF]->(p:Party), (u)-[:POSTED]->(t:Tweet)
WITH u.name AS Politician, p.ident AS Party, AVG(t.reply_count) AS AvgReply, AVG(t.retweet_count) AS AvgRetweet, AVG(t.favorite_count) AS AvgLikes, count(t) AS NumTweets
ORDER BY AvgReply DESC
RETURN Politician, Party, AvgReply, AvgRetweet, AvgLikes, NumTweets'''

pol_eng = query(q)

In [90]:
df_pol_eng = pd.DataFrame(pol_eng, columns='Politician, Party, AvgReply, AvgRetweet, AvgLikes, NumTweets'.split(',')).round(1)
df_pol_eng

Unnamed: 0,Politician,Party,AvgReply,AvgRetweet,AvgLikes,NumTweets
0,Emir KIR,ONAFH,233.7,24.8,106.4,31
1,Melissa Depraetere,Vooruit,144.8,48.9,98.0,36
2,Darya Safai MP,N-VA,66.7,525.6,718.5,245
3,Wouter De Vriendt,Ecolo-Groen,66.3,457.9,46.0,50
4,Kristof Calvo,Ecolo-Groen,45.1,7.0,41.0,42
...,...,...,...,...,...,...
132,Olivier Vajda,Ecolo-Groen,0.0,14.3,0.1,35
133,Philippe TISON,PS,0.0,37.2,0.1,20
134,Stefaan Vanhecke,Ecolo-Groen,0.0,11.0,0.0,2
135,Erik Gilissen,VB,0.0,46.3,1.5,28


maybe let's see who's spewing nonsense, and whether people buy it?

In [91]:
# which politician got community-noted? does the engagement increase?

q = '''MATCH (u:User)-[:MEMBER_OF]->(p:Party), (u)-[:POSTED]->(t:Tweet), (u)-[:POSTED]->(tc:Tweet {has_birdwatch_notes: True})
WITH u.name AS Politician, p.ident AS Party,
AVG(t.reply_count) AS AvgReply, AVG(t.retweet_count) AS AvgRetweet, AVG(t.favorite_count) AS AvgLikes, count(t) AS NumTweets,
AVG(tc.reply_count) AS AvgReplyCN, AVG(tc.retweet_count) AS AvgRetweetCN, AVG(tc.favorite_count) AS AvgLikesCN, count(tc) AS NumTweetsCN
ORDER BY AvgReply DESC
RETURN Politician, Party, AvgReply, AvgRetweet, AvgLikes, NumTweets, AvgReplyCN, AvgRetweetCN, AvgLikesCN, NumTweetsCN'''

cn_eng = query(q)

In [92]:
df_cn_eng = pd.DataFrame(cn_eng, columns='Politician, Party, AvgReply, AvgRetweet, AvgLikes, NumTweets, AvgReplyCN, AvgRetweetCN, AvgLikesCN, NumTweetsCN'.split(',')).round(1)
df_cn_eng

Unnamed: 0,Politician,Party,AvgReply,AvgRetweet,AvgLikes,NumTweets,AvgReplyCN,AvgRetweetCN,AvgLikesCN,NumTweetsCN
0,Emir KIR,ONAFH,81.6,10.5,45.0,30,4798.0,455.0,1949.0,30
1,Darya Safai MP,N-VA,66.3,525.9,716.5,732,162.3,439.3,1213.7,732
2,Theo Francken MP,N-VA,22.8,171.7,107.1,8888,169.4,56.5,435.0,8888
3,Kim Buyst,Ecolo-Groen,20.8,10.0,6.6,42,283.0,7.0,27.0,42
4,Michael Freilich,N-VA,19.6,43.2,76.8,924,115.7,49.3,213.3,924
5,Peter De Roover,N-VA,17.2,33.8,160.5,119,189.0,9.0,146.0,119
6,Raoul Hedebouw,PVDA-PTB,16.3,20.7,41.7,325,132.0,19.0,53.0,325
7,Joris Vandenbroucke,Vooruit,15.9,10.2,13.5,419,135.0,6.0,42.0,419
8,Peter Mertens MP,PVDA-PTB,12.6,53.0,28.4,1143,69.3,19.3,69.3,1143
9,Denis Ducarme,MR,11.3,12.2,44.6,948,75.8,48.5,222.8,948


In [97]:
q = '''MATCH (u:User)-[:MEMBER_OF]->(p:Party), (u)-[:POSTED]->(tc:Tweet {has_birdwatch_notes: True})
RETURN u.name, p.ident, tc.birdwatch_classification, tc.full_text'''

cn_tweets = query(q)

In [98]:
# unfortunately, very few of the tweets are classified, en ik spreek geen nederlands (?)
df_cn_tweets = pd.DataFrame(cn_tweets)
df_cn_tweets

Unnamed: 0,0,1,2,3
0,Darya Safai MP,N-VA,,Ongelofelijk!In het Brusselse Parlement: een b...
1,Darya Safai MP,N-VA,,Which surah from the Quran did the Imam actual...
2,Simon Moutquin,Ecolo-Groen,,Palestine: 4 manifs organis√©es par la soci√©t√© ...
3,Peter Mertens MP,PVDA-PTB,MisinformedOrPotentiallyMisleading,Alle fiscale constructies van miljardairs en m...
4,Peter Mertens MP,PVDA-PTB,,Voor @PieterVBO is eerlijke fiscaliteit alleen...
5,Theo Francken MP,N-VA,,"24.620 martelaren.‚ÄòMartelaren‚Äô, ‚Äòshahids‚Äô, gee..."
6,Simon Moutquin,Ecolo-Groen,,(1/2) Les propos de l'ambassadrice d'Israel so...
7,Theo Francken MP,N-VA,,Zwak leiderschap zorgt voor een zwak Westen. W...
8,Denis Ducarme,MR,,On a retrouv√© de la coca√Øne dans le cabinet de...
9,Theo Francken MP,N-VA,,Majeure juridische overwinning voor Isra√´l. St...


In [104]:
# how do you do dutch sentiment analysis?

In [None]:
q = '''MATCH (u:User)-[:MEMBER_OF]->(p:Party)
CALL degree_centrality.get()
YIELD node, degree
RETURN node, degree'''

cent_deg = query(q)

: 

# sentiment analysis

In [7]:
q = '''
match (t:Tweet)
return *
'''

temp = query(q)

In [8]:
tweets = pd.DataFrame([i[0].properties for i in temp])

In [9]:
q = '''
match (p:Party) <-[:MEMBER_OF]- (u:User) -[:POSTED]-> (t:Tweet)
return p, u, t
'''

pol_tweets = query(q)

In [10]:
pol_tweets[0]

(<mgclient.Node(id=929337, labels={'Party'}, properties={'ident': 'N-VA'}) at 0x0000020E72BE11D0>,
 <mgclient.Node(id=805384, labels={'User'}, properties={'community': 0, 'created_at': '2014-03-15T10:39:00+00:00', 'description': 'apotheker, schepen van financi√´n Evergem, federaal volksvertegenwoordiger N-VA , commissie volksgezondheid ,commissie financi√´n, ontwikkelingssamenwerking.', 'favourites_count': 807, 'followers_count': 4390, 'friends_count': 605, 'ident': '2390807984', 'is_blue_verified': True, 'listed_count': 49, 'location': '', 'media_count': 521, 'name': 'kathleen depoorter', 'normal_followers_count': 4390, 'possibly_sensitive': False, 'screen_name': 'KDepoorterMP', 'statuses_count': 2328, 'verified': False}) at 0x0000020E72BE0ED0>,
 <mgclient.Node(id=464674, labels={'Tweet'}, properties={'created_at': '2024-02-18T17:37:46+00:00', 'favorite_count': 93, 'full_text': 'Schokkende beelden: ‚ÄúUNRWA-medewerker neemt lichaam van vermoorde Isra√´li√´r mee in voertuig tijdens Ham

In [11]:
pol_tweets = pd.DataFrame([{'party':i[0], 'user':i[1], **i[2].properties} for i in pol_tweets]) 

In [12]:
pol_tweets

Unnamed: 0,party,user,created_at,favorite_count,full_text,has_birdwatch_notes,ident,lang,quote_count,reply_count,retweet_count,views_count,ranked_classification,birdwatch_classification,birdwatch_tags
0,(:Party {'ident': 'N-VA'}),"(:User {'community': 0, 'created_at': '2014-03...",2024-02-18T17:37:46+00:00,93,Schokkende beelden: ‚ÄúUNRWA-medewerker neemt li...,False,1759271000850501861,nl,3,19,30,8025.0,,,
1,(:Party {'ident': 'N-VA'}),"(:User {'community': 0, 'created_at': '2014-03...",2024-02-18T12:13:18+00:00,445,"Vierdaagse werkweek van 32 uur, jongeren meer ...",False,1759189343275466980,nl,8,109,80,34771.0,,,
2,(:Party {'ident': 'N-VA'}),"(:User {'community': 0, 'created_at': '2014-03...",2024-02-17T12:16:23+00:00,11,@de_NVA #voorVlaamsewelvaert https://t.co/74d2...,False,1758827733210071107,qme,0,0,0,272.0,,,
3,(:Party {'ident': 'N-VA'}),"(:User {'community': 0, 'created_at': '2014-03...",2024-02-17T06:29:11+00:00,251,UNWRA:Minstens 30 medewerkers zouden volgens I...,False,1758740358400274623,nl,8,63,88,14767.0,,,
4,(:Party {'ident': 'N-VA'}),"(:User {'community': 0, 'created_at': '2014-03...",2024-02-16T12:33:31+00:00,31,1048 Jongeren tussen 18 en 24 jaar werden er i...,False,1758469657986564350,nl,2,10,5,1541.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16833,(:Party {'ident': 'VB'}),"(:User {'community': 5, 'created_at': '2010-02...",2024-05-02T13:46:52+00:00,0,"RT @vlbelang: ""Wij zijn zeker bereid om compro...",False,1786029594715324608,nl,0,0,7,,,,
16834,(:Party {'ident': 'VB'}),"(:User {'community': 5, 'created_at': '2010-02...",2024-05-02T13:46:24+00:00,0,"RT @vlbelang: ""Dit zijn de typische linkse rec...",False,1786029474951115134,nl,0,0,3,,,,
16835,(:Party {'ident': 'VB'}),"(:User {'community': 5, 'created_at': '2010-02...",2024-05-02T11:06:30+00:00,0,"RT @PBoonefaes: ""On sociaal te zijn hebben we ...",False,1785989235729695007,nl,0,0,2,,,,
16836,(:Party {'ident': 'VB'}),"(:User {'community': 5, 'created_at': '2010-02...",2024-05-02T10:55:57+00:00,0,"RT @PALNWS: ""Als vroeger onze competitiviteit ...",False,1785986579661476153,nl,0,0,1,,,,


In [13]:
dutch_tweets = pol_tweets[pol_tweets['lang'] == 'nl']

In [14]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from transformers import pipeline
import torch

In [15]:
model_name = "DTAI-KULeuven/robbert-v2-dutch-sentiment"
model = RobertaForSequenceClassification.from_pretrained(model_name)
tokenizer = RobertaTokenizer.from_pretrained(model_name)

classifier = pipeline('sentiment-analysis', model=model, tokenizer = tokenizer, device=0)

In [16]:
device = torch.device('cuda')
model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(40000, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [17]:
sentiment = classifier(dutch_tweets['full_text'].to_list())

In [18]:
sentiment

[{'label': 'Negative', 'score': 0.9339827299118042},
 {'label': 'Positive', 'score': 0.9970736503601074},
 {'label': 'Positive', 'score': 0.9728376865386963},
 {'label': 'Positive', 'score': 0.9990109205245972},
 {'label': 'Positive', 'score': 0.9985503554344177},
 {'label': 'Positive', 'score': 0.997927188873291},
 {'label': 'Positive', 'score': 0.8989135026931763},
 {'label': 'Positive', 'score': 0.7390094995498657},
 {'label': 'Positive', 'score': 0.9935051202774048},
 {'label': 'Negative', 'score': 0.8849559426307678},
 {'label': 'Positive', 'score': 0.999193012714386},
 {'label': 'Positive', 'score': 0.9236530661582947},
 {'label': 'Positive', 'score': 0.9991877675056458},
 {'label': 'Positive', 'score': 0.99257892370224},
 {'label': 'Positive', 'score': 0.910633385181427},
 {'label': 'Positive', 'score': 0.9252086877822876},
 {'label': 'Positive', 'score': 0.9961661100387573},
 {'label': 'Positive', 'score': 0.9989672899246216},
 {'label': 'Positive', 'score': 0.9713600277900696}

In [43]:
dutch_tweets = dutch_tweets.reset_index(drop=True)

In [45]:
dutch_tweets[['label', 'score']] = pd.DataFrame(sentiment)

In [47]:
dutch_tweets.to_csv('dutch_tweets_pol.csv')

In [19]:
# model_name = "DTAI-KULeuven/robbert-v2-dutch-sentiment"
# model = RobertaForSequenceClassification.from_pretrained(model_name)
# tokenizer = RobertaTokenizer.from_pretrained(model_name)

# classifier = pipeline('sentiment-analysis', model=model, tokenizer = tokenizer, device=0)

# Load the sentiment analysis pipeline
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
# model2 = 
sentiment_pipeline = pipeline("sentiment-analysis", model=model_name, device=0)

# Example tweet
# tweet = "UNWRA:Minstens 30 medewerkers zouden volgens Isra√´l betrokken zijn bij terroristische aanval op 7/10/23.185 Zouden linken met gewapende tak Hamas hebben, 51 met Islamitische Jihad.Wanneer handelt @carogennez ?https://t.co/AsycYv2pyW"

# Analyze sentiment
result_all = sentiment_pipeline(tweets[tweets['lang'] == 'nl']['full_text'].to_list())
# print(result)





  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [36]:
dutch_tweets_all = tweets[tweets['lang'] == 'nl'].reset_index(drop=True)

In [37]:
dutch_tweets_all[['label', 'score']] = pd.DataFrame(result_all)

In [38]:
dutch_tweets_all

Unnamed: 0,created_at,favorite_count,full_text,has_birdwatch_notes,ident,lang,quote_count,reply_count,retweet_count,views_count,birdwatch_classification,birdwatch_tags,ranked_classification,label,score
0,2024-02-18T17:37:46+00:00,93,Schokkende beelden: ‚ÄúUNRWA-medewerker neemt li...,False,1759271000850501861,nl,3,19,30,8025.0,,,,1 star,0.400208
1,2024-02-18T12:13:18+00:00,445,"Vierdaagse werkweek van 32 uur, jongeren meer ...",False,1759189343275466980,nl,8,109,80,34771.0,,,,5 stars,0.428922
2,2024-02-17T06:29:11+00:00,251,UNWRA:Minstens 30 medewerkers zouden volgens I...,False,1758740358400274623,nl,8,63,88,14767.0,,,,1 star,0.656130
3,2024-02-16T12:33:31+00:00,31,1048 Jongeren tussen 18 en 24 jaar werden er i...,False,1758469657986564350,nl,2,10,5,1541.0,,,,1 star,0.491863
4,2024-02-14T06:45:41+00:00,0,RT @MichaelFreilich: Dringend strenger optrede...,False,1757657344710787269,nl,0,0,163,,,,,1 star,0.703010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169232,2024-05-02T13:46:24+00:00,0,"RT @vlbelang: ""Dit zijn de typische linkse rec...",False,1786029474951115134,nl,0,0,3,,,,,1 star,0.543140
169233,2024-05-02T11:06:30+00:00,0,"RT @PBoonefaes: ""On sociaal te zijn hebben we ...",False,1785989235729695007,nl,0,0,2,,,,,1 star,0.674253
169234,2024-05-02T08:56:55+00:00,782,"Hij kwam ""demonstreren"" voor Hamas. Moeder pik...",True,1785956623896228210,nl,37,206,130,79662.0,,,,1 star,0.413162
169235,2024-05-02T10:55:57+00:00,0,"RT @PALNWS: ""Als vroeger onze competitiviteit ...",False,1785986579661476153,nl,0,0,1,,,,,1 star,0.279635


In [41]:
dutch_tweets_all.to_csv('dutch_tweets_all.csv')

In [30]:
pd.read_csv('dutch_tweets_all.csv')

  pd.read_csv('dutch_tweets_all.csv')


Unnamed: 0.1,Unnamed: 0,created_at,favorite_count,full_text,has_birdwatch_notes,ident,lang,quote_count,reply_count,retweet_count,views_count,birdwatch_classification,birdwatch_tags,ranked_classification,label,score
0,0,2024-02-18T17:37:46+00:00,93,Schokkende beelden: ‚ÄúUNRWA-medewerker neemt li...,False,1759271000850501861,nl,3,19,30,8025.0,,,,1 star,0.400208
1,1,2024-02-18T12:13:18+00:00,445,"Vierdaagse werkweek van 32 uur, jongeren meer ...",False,1759189343275466980,nl,8,109,80,34771.0,,,,5 stars,0.428922
2,5,2024-02-17T06:29:11+00:00,251,UNWRA:Minstens 30 medewerkers zouden volgens I...,False,1758740358400274623,nl,8,63,88,14767.0,,,,1 star,0.302064
3,6,2024-02-16T12:33:31+00:00,31,1048 Jongeren tussen 18 en 24 jaar werden er i...,False,1758469657986564350,nl,2,10,5,1541.0,,,,1 star,0.549122
4,7,2024-02-14T06:45:41+00:00,0,RT @MichaelFreilich: Dringend strenger optrede...,False,1757657344710787269,nl,0,0,163,,,,,1 star,0.601674
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169232,340704,2024-05-02T13:46:24+00:00,0,"RT @vlbelang: ""Dit zijn de typische linkse rec...",False,1786029474951115134,nl,0,0,3,,,,,,
169233,340705,2024-05-02T11:06:30+00:00,0,"RT @PBoonefaes: ""On sociaal te zijn hebben we ...",False,1785989235729695007,nl,0,0,2,,,,,,
169234,340707,2024-05-02T08:56:55+00:00,782,"Hij kwam ""demonstreren"" voor Hamas. Moeder pik...",True,1785956623896228210,nl,37,206,130,79662.0,,,,,
169235,340708,2024-05-02T10:55:57+00:00,0,"RT @PALNWS: ""Als vroeger onze competitiviteit ...",False,1785986579661476153,nl,0,0,1,,,,,,
