# Between Human Dignity and Security: Identifying Citizen and Elite Preferences and Concerns over Refugee Reception

---



In [1]:
# =====================================================
# Required Package Installation (Uncomment if needed)
# =====================================================
# Uncomment the lines below if you need to install these packages.
# install.packages("quanteda")
# install.packages("quanteda.textstats")
# install.packages("quanteda.textplots")
# install.packages("lubridate")
# install.packages("readtext")
# install.packages("corpus")
# install.packages("pastecs")
# install.packages("RColorBrewer")
# install.packages("tm")
# install.packages("ggwordcloud")
# install.packages("extrafont")
# install.packages("ggpubr")

# =====================================================
# Load Required Libraries
# =====================================================

library(readxl)            # For reading Excel files
library(tidyverse)         # Core tidyverse packages: dplyr, ggplot2, tidyr, etc.
library(writexl)           # For writing Excel files
library(RColorBrewer)      # Provides color palettes for visualizations
library(tm)                # Text mining package
library(gridExtra)         # For arranging grid-based plots
library(pastecs)           # Descriptive statistics
library(quanteda)          # Text analysis package
library(quanteda.textstats)# Text statistics for quanteda objects
library(quanteda.textplots)# Visualization tools for text data
library(readtext)          # Efficient reading of text files (e.g., PDFs, .txt)
library(SnowballC)         # Snowball stemmer for text preprocessing
library(ggwordcloud)       # Word cloud visualizations using ggplot2
library(extrafont)         # Additional fonts for publication-quality plots
library(ggpubr)            # 'ggplot2'-based publication-ready plots
library(readxl)
library(stringr)
library(dplyr)

"package 'stringr' was built under R version 4.3.3"
"package 'lubridate' was built under R version 4.3.3"
── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.2     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.4.2     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.1     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
"package 'RColorBrewer' was built under R version 4.3.1"
"packa

In [2]:
# Load data
citizens <- read_excel("data/citizens_keyness_analysis.xlsx")
councilors <- read_excel("data/councilors_keyness_analysis.xlsx")

# Define substitutions in a named vector
replacements <- c(
  "1%" = "ενα_τις_εκατο",
  "_πολυ" = "πολυ",
  "α πολυ τως" = "απολυτως",
  "διαβιωσεις" = "διαβιωσης",
  "διαβιωσης" = "διαβιωση",
  "διαβιωση" = "διαβιωσης",
  "ελεγχεται" = "ελεγχομενη",
  "παιδια" = "παιδι",
  "ελεγχομενα" = "ελεγχομενη",
  "μορφωσει" = "μορφωση",
  "διασφαλιζει" = "διασφαλιζε",
  "διασφαλιζε" = "διασφαλιζει",
  "εισβολεας" = "εισβολεα",
  "εισβολεα" = "εισβολεας",
  "νησια" = "νησι",
  "νησι" = "νησια",
  "ανθρωπινα" = "ανθρωπινες",
  "μουσουλμανοι" = "μουσουλμανο",
  "μουσουλμανο" = "μουσουλμανοι",
  "νομος" = "νομο",
  "νομο" = "νομος",
  "μορφωσε" = "μορφωση",
  "περιθαλψει" = "περιθαλψη",
  "κανονας" = "κανονα",
  "κανονα" = "κανονας",
  "τηρηθουν" = "τηρηση",
  "α κλειστες" = "κλειστες",
  "εξοδα" = "εξοδο",
  "προσωρινες" = "προσωρινα",
  "ποσοστο" = " % ",
  "γικλειστες" = "κλειστες",
  "σηκωσει" = "σηκωνει",
  "μικλειστες" = "κλειστες",
  "απελασει" = "απελαση",
  "εντασσει" = "ενταξη",
  "τηρει" = "τηρηση",
  "ενσωματωθει" = "ενσωματωση",
  "εγκληματιας" = "εγκληματιες",
  "βοηθα" = "βοηθεια",
  "επιβαλλει" = "επιβαλλουν",
  "αναγνωρισμενες" = "αναγνωρισμενοι"
)

# Apply replacements using str_replace_all from stringr
normalize_text <- function(text, replacements) {
  str_replace_all(text, replacements)
}

# Apply to both datasets
citizens <- citizens %>% mutate(cleaned = normalize_text(cleaned, replacements))
councilors <- councilors %>% mutate(cleaned = normalize_text(cleaned, replacements))


[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`


In [3]:
# Create label vectors for citizens and councilors
citizen_labels <- c(rep('citizen', nrow(citizens)))
councilors_labels <- c(rep('councilor', nrow(councilors)))

# Select relevant columns from citizens and add label
citizens_for_combined <- citizens %>% select('Anonymous_id', 'cleaned', 'Q26_edu', 'pol_orient', 'Q29_income', 'Q10_c_post', 'treat1')
citizens_for_combined['citizen_or_councilor'] = citizen_labels

# Select relevant columns from councilors and add label
councilors_for_combined <- councilors %>% select('id_anonymous', 'cleaned', 'Q26_edu', 'pol_orient_x', 'Q29_income', 'Q10_c', 'treat1')
councilors_for_combined['citizen_or_councilor'] = councilors_labels

# Rename columns in councilors_for_combined to match citizens_for_combined
colnames(councilors_for_combined) <- c('Anonymous_id', 'cleaned', 'Q26_edu', 'pol_orient', 'Q29_income', 'Q10_c_post', 'treat1', 'citizen_or_councilor')

# Convert Q10_c_post to numeric for councilors
councilors_for_combined$Q10_c_post <- as.numeric(as.character(councilors_for_combined$Q10_c_post))

# Combine citizens and councilors data into one dataframe
combined <- bind_rows(citizens_for_combined, councilors_for_combined)

# Create quanteda corpus objects for councilors, citizens, and combined data
corp_councilors <- corpus(c(councilors$cleaned))
corp_citizens <- corpus(c(citizens$cleaned))
corp_combined <- corpus(c(combined$cleaned))

"NAs introduced by coercion"
"NA is replaced by empty string"
"NA is replaced by empty string"
"NA is replaced by empty string"


In [4]:
# Load Greek stopwords from Excel file
stop_words <- read_excel("data/stopwords_GR.xlsx", sheet = 'stopwords')
stop_words <- c(stop_words$word)

# Extend stopwords with additional Greek words and common terms
stop_words <-  c(stop_words, "μου","των","στις","ως","τετοια","διοτι","λογο","ειχα","μπορει","μας","μεσα","μεσου","και","να","τα","με","τον","τους",
                     "σε","θα","οι","ειναι","καθως","στην","στο","γιατι","επειδη","αφου","στα","στον","θεωρουμε","δεν","καμια","δομ","του",
                     "ομως","πληθυσμο","της","απο","ισχυρα","εχει","ζησει","οχι","για","αποτελει","κυριως","εχει","εν","κι","την","το","κυριως",
                     "εκτος","επιπεδα","οτι","εδω","υποστηριζα","φυσικα","σχετικα","συμβαν","εδω","ειτε","μια","δε","ας","καθε","μαζι","ον","στ","τοπ",
                     "κραταει","θελαμε","γινανε","μονα","θελει","μεριας","δεχεται","κατοικει","ερχεται","ενταξει","θεωρει","καν","χρειαζεστε","προσωπα","δινει",
                     "ερθει","υποστηριξει","καταστασεις","υπολοιπα","βρισκανε","φιλοξενουμενο","πιστευει","λογης","γινει","μονη","υποστηριξα","προσωπικα","πιστευοντας",
                     "χρειαζεται","καταστασεων","κανενα","μερα","λα","θες","κοσμικου","αφορα","κλπ","αν","στοιχεια","πρεπει","περιπτωσεις","πολλα","δικα","ηρθα","ουτε",
                     "πρωτα","λιγα","τη","επιπλεον","διαφορετικα","οπως","οταν","δευτερη","κοσμο","δημος","πληρως","δινεται","ειμαστε","της","κατα","καποια","πραγμα","τρ",
                     "οποια","φιλοξενει","μεχρι","τις","εφοσον","προκειμενου","συστημα","κεντρα","αδυνατο","κυριαρχει","αιτησεις","αυτην","εως","μονον","λαβει",
                     "ζηταγαν","δυνατη","ζηταγα","ουσιαστικα","τωρα","συμβαινει","προσπαθεια","αυτα","αλλες","μεγαλες","πληρη","λυσει","τελος","ετσι","δηλωση","μην","σημαντικα","λαθος","εννοια",
                     "δυναμει","ζω","υπαρξει","ετη","θεωρειται","ηταν","εναντι","ειδη","εγκατασταθει","οσο","χρονια","καταλληλα","που","τεκνα","κερκυρα","βασει","ατομα","εμπειρια","εκει","δηλαδη",
                     "απολυτα","ακομα","φυγει","νομιζω","τουλαχιστον","δεδομενα","αλλα","επιθυμει","σχολη","θετικα","συνολικα","μακρα","εντελως","καθολου","πολιτες","φθανει","φευγουν","υποδεχθει","υπαρχει",
                        "τυπου","συναφη","υ","παει","τροπο","ελεγα","ερωτηματολογιο","πολεις","δυο","γενικα","φιλοξενιας","ς","υς","πηγαινα","επιλογες","ωστε","διαδ","ομορα","μεν","πανε","ζει","ορι",
                        "θεμα","φορα","σωστα","αριθμο","ζητα","ποσοστα","μπαιναν","υπηρχαν","προσπαθει","τυπικη","σπιτι","προσπαθει","μες","ξεραμε","βραν","μιλαει","σχεσει","μεινανε","ενω","χωρα","προσφυγας",
                         "μεταναστες","προυποθεσεις","διαμονη","λειτουργει","φερανε","παρεχει","αντιμετωπισει","προκειται","σκοπια","νεα","εγω","εκτη","ελευθερα","δοθει",
                        "στηριξει","μαζα","ιδια","εισερχεται","επρπε","γνωριζα","δυστυχως","αιτουντα","φυγανε","υποδοχη","διαφωνεις","οντως","τελει","αντιθετρα","παραμονες","ιστο","σιγουρα","ερευνα","παιρναμε",
                         "πλεον","μαθαμε","αντιμετωπιζαν","προκαλει","συγκεκριμενα","ασυλα","ελαχιστα","ειδα","βλεπει","απολυτως","επιστρεψουμε","ερωτησεις","δρασεις","ζητημα","διαμενουν","ροες","προσφεραν",
                         "πληρωναν","φιλοξενουμενοι","διαμενουν","ελλαδα","ελληνας","περιοχες","βασικα","επρεπε","στηριζα","προερχεται","ξενα","αρχας","δεχτει","υποχρεωσεις","βοηθεια","περισσοτερα","διαφορα",
                        "ζωνες","ελεγχαν","χωρανε","αρκετα","κατοικια","αμεσα","ορο","διαφωνω","γνωριζει","μεγαλα","παρολα","κτλ","α","'","αρα","τυχη","πω","φτιαχτε","πραξει","τυχει","τριτα","σειρα","συνεχιζει",
                        "ουτως","ουσια","οποιοδηποτε","ομαδα","λεγεται","επιτρεπει","λεξεις","απαντες","αντιστοιχα","στειλει","στοχο","χαρακτηρα","ντρεπεστε","κομματι","κυνηγαει","ανθρωπο","ετοιμο","αποκτησει",
                        "πλαισια","αυξημενες","αντιθετα","κυριο","καταρχας","εξυπηρετησουν","απεναντι","πτυχιο","συμπληρωσα","σουβλιου","επομενως","ιδιαιτερα","μεινει","λοιπον","μπει","απαντησεις","προτασεις",
                        "ερωτημα","ουδεν","ακολουθει","πολλου","αλλαξει","σημασια","βιωνουμε","αποτελειται","αιτημα","ωρα","αφηνει","παταει","αποψεις","απονομη","αποδοση","χρηζουν","αυξαναν","νιωθει","οσ","ος",
                        "μον","νε","εχ","λογ","τες","δι","ις","παν","βλεπε","πρεπε","ξερα","φεραν","μιλα","μερη","λτ","γτ","σχεδον","φευγανε","βαζει","τχα","ες","δνα","δν","απ","δειχνει","τελειως","δει","δομη",
                        "δομες","φτιαχτει","γεματα","κατασκευες","περνα","ακολουθανε","οδηγει","υποψη","διαθετει","δηθεν","επικρατει","ιαπωνια","ευρυτερη","παραπανω")

# Create bigram stopwords list (for n-gram removal), extended with the same terms
stop_words_bigrams = c(stop_words, "μου","των","στις","ως","τετοια","διοτι","λογο","ειχα","μπορει","μας","μεσα","μεσου","και","να","τα","με","τον","τους",
                     "σε","θα","οι","ειναι","καθως","στην","στο","γιατι","επειδη","αφου","στα","στον","θεωρουμε","δεν","καμια","δομ","του",
                     "ομως","πληθυσμο","της","απο","ισχυρα","εχει","ζησει","οχι","για","αποτελει","κυριως","εχει","εν","κι","την","το","κυριως",
                     "εκτος","επιπεδα","οτι","εδω","υποστηριζα","φυσικα","σχετικα","συμβαν","εδω","ειτε","μια","δε","ας","καθε","μαζι","ον","στ","τοπ",
                     "κραταει","θελαμε","γινανε","μονα","θελει","μεριας","δεχεται","κατοικει","ερχεται","ενταξει","θεωρει","καν","χρειαζεστε","προσωπα","δινει",
                     "ερθει","υποστηριξει","καταστασεις","υπολοιπα","βρισκανε","φιλοξενουμενο","πιστευει","λογης","γινει","μονη","υποστηριξα","προσωπικα","πιστευοντας",
                     "χρειαζεται","καταστασεων","κανενα","μερα","λα","θες","κοσμικου","αφορα","κλπ","αν","στοιχεια","πρεπει","περιπτωσεις","πολλα","δικα","ηρθα","ουτε",
                     "πρωτα","λιγα","τη","επιπλεον","διαφορετικα","οπως","οταν","δευτερη","κοσμο","δημος","πληρως","δινεται","ειμαστε","της","κατα","καποια","πραγμα","τρ",
                     "οποια","φιλοξενει","μεχρι","τις","εφοσον","προκειμενου","συστημα","κεντρα","αδυνατο","κυριαρχει","αιτησεις","αυτην","εως","μονον","λαβει",
                     "ζηταγαν","δυνατη","ζηταγα","ουσιαστικα","τωρα","συμβαινει","προσπαθεια","αυτα","αλλες","μεγαλες","πληρη","λυσει","τελος","ετσι","δηλωση","μην","σημαντικα","λαθος","εννοια",
                     "δυναμει","ζω","υπαρξει","ετη","θεωρειται","ηταν","εναντι","ειδη","εγκατασταθει","οσο","χρονια","καταλληλα","που","τεκνα","κερκυρα","βασει","ατομα","εμπειρια","εκει","δηλαδη",
                     "απολυτα","ακομα","φυγει","νομιζω","τουλαχιστον","δεδομενα","αλλα","επιθυμει","σχολη","θετικα","συνολικα","μακρα","εντελως","καθολου","πολιτες","φθανει","φευγουν","υποδεχθει","υπαρχει",
                        "τυπου","συναφη","υ","παει","τροπο","ελεγα","ερωτηματολογιο","πολεις","δυο","γενικα","φιλοξενιας","ς","υς","πηγαινα","επιλογες","ωστε","διαδ","ομορα","μεν","πανε","ζει","ορι",
                        "θεμα","φορα","σωστα","αριθμο","ζητα","ποσοστα","μπαιναν","υπηρχαν","προσπαθει","τυπικη","σπιτι","προσπαθει","μες","ξεραμε","βραν","μιλαει","σχεσει","μεινανε","ενω")

# List of specific bigram stopwords (custom phrases to remove)
st = c("μουσουλμανοι_μουσουλμανοι","αναλογα_αναλογα","απεναντι_παιδια","ανθρωπια_τοπικη","ανθρωπια_τουρκια","παιδια_ενταξη",
        "κλειστες_ανθρωπια","διαβιωση_σεβασμο","ανθρωπια_ανθρωπια","καθετα_αντιθετα","ανθρωπια_κοινωνια","εξετασει_ασυλο","αιτημα_ασυλο","χορηγηση_ασυλο",
        "πραγματικος_λαθρο","ομαλα_κοινωνια","παροχες_ασυλο","πραγματικος_εμπολεμη","βοηθα_ανθρωπια","πυλες_εισοδο","5_%","ανοικτη_κλειστες",
        "λυνεται_προβλημα","ανθρωπια_παρανομα","προβλημα_προβλημα","ασυλο_επιστρεφανε","ανθρωπια_πραγματικος","νομιμα_πυλες","κοινωνια_οικονομικη",
        "διαβιωση_ανθρωπια","βοηθα_οικονομικοι","κλειστες_ακατοικητα","ανδρες_γυναικα","στρατος_εκκλησια","ανθρωπινες_αξιοπρεπεια","πολεμος_οικονομικοι",
        "μουσουλμανοι_μουσουλμανοι","αναλογα_αναλογα","λαθρο_λαθρο","απεναντι_παιδια","ανθρωπια_τοπικη","ανθρωπια_τουρκια")

In [5]:
# Tokenize the cleaned text from citizens, removing stopwords, bigram stopwords, and custom bigram phrases
toks <- citizens$cleaned %>% tokens %>% 
  tokens_remove(pattern = phrase(c(stop_words, st, stop_words_bigrams)), valuetype = 'fixed')

# Further remove tokens that match stopwords, bigram stopwords, or custom bigram phrases
toks_ngram <- tokens_select(toks, pattern = phrase(c(stop_words, st, stop_words_bigrams)), selection = "remove")

# Create bigrams (n = 2) from the remaining tokens
toks_ngram <- tokens_ngrams(toks_ngram, n = 2)

# Remove any remaining stopwords, bigram stopwords, or custom bigram phrases from the bigrams and create a document-feature matrix
dfmat <- 
  toks_ngram %>%
  tokens_remove(c(stop_words, st, stop_words_bigrams)) %>%
  dfm()

# Calculate frequency statistics for the features in the document-feature matrix
fr_1 <- textstat_frequency(dfmat)

"NA is replaced by empty string"


In [6]:
# Tokenize the cleaned text from councilors, removing stopwords, bigram stopwords, and custom bigram phrases
toks <- councilors$cleaned %>% tokens %>% 
  tokens_remove(pattern = phrase(c(stop_words, st, stop_words_bigrams)), valuetype = 'fixed')

# Further remove tokens that match stopwords, bigram stopwords, or custom bigram phrases
toks_ngram <- tokens_select(toks, pattern = phrase(c(stop_words, st, stop_words_bigrams)), selection = "remove")

# Create bigrams (n = 2) from the remaining tokens
toks_ngram <- tokens_ngrams(toks_ngram, n = 2)

# Remove any remaining stopwords, bigram stopwords, or custom bigram phrases from the bigrams and create a document-feature matrix
dfmat <- 
  toks_ngram %>%
  tokens_remove(c(stop_words, st, stop_words_bigrams)) %>%
  dfm()

# Calculate frequency statistics for the features in the document-feature matrix
fr_2 <- textstat_frequency(dfmat)

"NA is replaced by empty string"


In [7]:
# Tokenize the cleaned text from citizens, removing bigram stopwords
toks <- citizens$cleaned %>% tokens %>% 
  tokens_remove(pattern = phrase(stop_words_bigrams), valuetype = 'fixed')

# Further remove tokens that match bigram stopwords
toks_ngram <- tokens_select(toks, pattern = phrase(stop_words_bigrams), selection = "remove")

# Create bigrams (n = 2) from the remaining tokens
toks_ngram <- tokens_ngrams(toks_ngram, n = 2)

# Remove any remaining stopwords or custom bigram phrases and create a document-feature matrix
dfmat <- 
  toks_ngram %>%
  tokens_remove(c(stop_words, st)) %>%
  dfm()

# Calculate frequency statistics for the features in the document-feature matrix
fr_1 <- textstat_frequency(dfmat)

"NA is replaced by empty string"


In [8]:
# Tokenize the cleaned text from councilors, removing bigram stopwords
toks <- councilors$cleaned %>% tokens %>% 
  tokens_remove(pattern = phrase(stop_words_bigrams), valuetype = 'fixed')

# Further remove tokens that match bigram stopwords
toks_ngram <- tokens_select(toks, pattern = phrase(stop_words_bigrams), selection = "remove")

# Create bigrams (n = 2) from the remaining tokens
toks_ngram <- tokens_ngrams(toks_ngram, n = 2)

# Remove any remaining stopwords or custom bigram phrases and create a document-feature matrix
dfmat <- 
  toks_ngram %>%
  tokens_remove(c(stop_words, st)) %>%
  dfm()

# Calculate frequency statistics for the features in the document-feature matrix
fr_2 <- textstat_frequency(dfmat)

"NA is replaced by empty string"


## Political orientation

In [9]:
# Assign political orientation labels based on pol_orient for citizens, councilors, and combined data
citizens$left_or_right = with(citizens, ifelse(pol_orient >= 5, "Right-wing citizens", "Left-wing citizens"))
councilors$left_or_right = with(councilors, ifelse(pol_orient_x >= 5, "Right-wing councilors", "Left-wing councilors"))
combined$left_or_right = with(combined, ifelse(pol_orient >= 5, "Δεξιοί", "Αριστεροί"))

# Assign education level labels for citizens, councilors, and combined data
citizens$edu_level = with(citizens, ifelse(Q26_edu == "Πτυχίο ΑΕΙ"|Q26_edu == "Μεταπτυχιακό ή/ και Διδακτορικό Δίπλωμα", "Πολίτες - Υψηλή εκπαίδευση", "Πολίτες - Χαμηλή εκπαίδευση"))
councilors$edu_level = with(councilors, ifelse(Q26_edu == "Πτυχίο ΑΕΙ"|Q26_edu == "Μεταπτυχιακό ή/ και Διδακτορικό Δίπλωμα", "Δημοτικοί Σύμβουλοι - Υψηλή εκπαίδευση", "Δημοτικοί Σύμβουλοι - Χαμηλή εκπαίδευση"))
combined$edu_level = with(combined, ifelse(Q26_edu == "Πτυχίο ΑΕΙ"|Q26_edu == "Μεταπτυχιακό ή/ και Διδακτορικό Δίπλωμα", "Υψηλή εκπαίδευση", "Χαμηλή εκπαίδευση"))

# Subset combined data for right-wing and left-wing, and assign group labels
right_citizen_or_councilor = combined[combined$pol_orient >= 5,]
right_citizen_or_councilor$citizen_or_councilor = with(right_citizen_or_councilor, ifelse(citizen_or_councilor == "citizen", "Right-wing citizens", "Right-wing councilors"))

left_citizen_or_councilor = combined[combined$pol_orient < 5,]
left_citizen_or_councilor$citizen_or_councilor = with(left_citizen_or_councilor, ifelse(citizen_or_councilor == "citizen", "Left-wing citizens", "Left-wing councilors"))

# Add Greek labels for citizen/councilor in combined data
combined$citizen_or_councilor_greek = with(combined, ifelse(citizen_or_councilor == "citizen", "Citizens", "Councilors"))

# Figure C. 2: Word clouds of right-wing citizens and right-wing councilors 

In [10]:
set.seed(1234) # for reproducibility 

# Create a corpus for right-wing citizens using the 'cleaned' text
corp_inaug_1 <- corpus(right_citizen_or_councilor[right_citizen_or_councilor$citizen_or_councilor == "Right-wing citizens",], text_field = "cleaned")
docid_1 <- paste(right_citizen_or_councilor[right_citizen_or_councilor$citizen_or_councilor == "Right-wing citizens",]$citizen_or_councilor, sep = " ")
docnames(corp_inaug_1) <- docid_1

# Tokenize, remove stopwords, and create ngrams (unigrams and bigrams)
toks_1 <- corp_inaug_1 %>% tokens %>% 
  tokens_remove(pattern = phrase(stop_words), valuetype = 'fixed')
toks_ngram_1 <- tokens_select(toks_1, pattern = phrase(stop_words_bigrams), selection = "remove")
toks_ngram_1 <- tokens_ngrams(toks_ngram_1, n = 1:2)

# Remove additional stopwords and custom bigram phrases, then create a document-feature matrix
dfmat_1 <- 
  toks_ngram_1 %>%
  tokens_remove(c(stop_words, st)) %>%
  dfm()

# Calculate frequency statistics for right-wing citizens
fr_1 <- textstat_frequency(dfmat_1)

# Replace feature names with English labels for plotting
x = c('greek','pseudo','closed','asylum','society','real','child','war','economic','problem','europe','illegal','family','deportation','non_governmental_organisations','war','turkey','controlled',
      'conditions','%','homeland','education','respected','jobs','law','integration','islands','right','language','money','legal','unfair','culture','customs','small','allowance','muslims','pay','syria',
      'governments','morals','humane','european_union','schools','strict','needs','women','security','religion','income','dignity','culture','moral_customs','infrastructure','livelihood','army','one_of_a_hundred',
      'job','joining','border','places','entitled','closed_controlled','national','greek_society','crime','papers','health','temporary','benefits','moment','origin','criminal','norm','conduct','christian',
      'international','minors','safe','police','respect','heavy','management','afghanistan','proportions','space','capability','invader','organizations','criteria','greek_language','compliance',
      'living_conditions','identity','mandatory','majorities','men')
fr_1$feature[1:length(x)] = x

# Create a corpus for right-wing councilors using the 'cleaned' text
corp_inaug_2 <- corpus(right_citizen_or_councilor[right_citizen_or_councilor$citizen_or_councilor == "Right-wing councilors",], text_field = "cleaned")
docid_2 <- paste(right_citizen_or_councilor[right_citizen_or_councilor$citizen_or_councilor == "Right-wing councilors",]$citizen_or_councilor, sep = " ")
docnames(corp_inaug_2) <- docid_2

# Tokenize, remove stopwords, and create ngrams (unigrams and bigrams)
toks_2 <- corp_inaug_2 %>% tokens %>% 
  tokens_remove(pattern = phrase(stop_words), valuetype = 'fixed')
toks_ngram_2 <- tokens_select(toks_2, pattern = phrase(stop_words_bigrams), selection = "remove")
toks_ngram_2 <- tokens_ngrams(toks_ngram_2, n = 1:2)

# Remove additional stopwords and custom bigram phrases, then create a document-feature matrix
dfmat_2 <- 
  toks_ngram_2 %>%
  tokens_remove(c(stop_words, st)) %>%
  dfm()

# Calculate frequency statistics for right-wing councilors
fr_2 <- textstat_frequency(dfmat_2)

# Replace feature names with English labels for plotting
x = c('society','local','closed','local_society','integration','Greek','asylum','problem','security','integration','economic','infrastructure','conditions','proportionate','one_hundredth','small',
  'real','jobs','benefits','education','children','health','controls','family','governments','livelihood','muslims','schools','normal','suitable','needs','culture','border','europe','strict',
  '%','plans','turkey','army','language','law','administration','unfair','criteria','exit','entrance','facilities','tourist','council','temporary','locations','permanent','pseudo','state','controlled',
  'entry_exit','european_union','humane','reciprocating','money','compensatory','customs','syria','compliance','policing','islands','religion','integration_local','indigenous','territory','space',
  'program','allocation','tourism','services','police','war','quotas','compensatory_benefits','unemployment','non_governmental_organisations','care','services','resources','remuneration',
  'culture','measures','living_conditions','severe','legal','enormity','greek_society','ghettoization','production','self-governance','local_self-governance','open','lesson','dignity','reinforcements',
  'assimilation','fair','relationships','shares','ethics','ethics_customs','organizations','community','Greek_language','raises','hirings','gradual','business','recruitment','necessary',
  'construction','easy','coherence','responsibility','assimilated','strict_criteria','difficult','%_one_of_a_hundred','humanitarian','treatment','religious','work','psychological','paradigm')
fr_2$feature[1:length(x)] = x

# Set plot size
options(repr.plot.width=10, repr.plot.height=15)

# Create word cloud for right-wing citizens
plot1 <- ggwordcloud(fr_1$feature, fr_1$frequency, scale = c(4, 1),
  max.words = 100, random.order = F, random.color = FALSE,
  rot.per = 0, colors = "black", ordered.colors = FALSE, shape = 'circle') +
  theme(plot.title = element_text(hjust = 0.5, size = 40, vjust=-2)) +
  ggtitle("Right-wing citizens")

# Create word cloud for right-wing councilors
plot2 <- ggwordcloud(fr_2$feature, fr_2$frequency, scale = c(4, 1),
  max.words = 100, random.order = F, random.color = FALSE,
  rot.per = 0, colors = "black", ordered.colors = FALSE, shape = 'circle') +
  theme(plot.title = element_text(hjust = 0.5, size = 40, vjust=-4)) +
  ggtitle("Right-wing councilors")

png("../figures/Figure C2.png", width = 2400, height = 3600, res = 200)

# Arrange both plots in a grid, one above the other
grid.arrange(plot1, plot2, nrow=2)

dev.off()

"NA is replaced by empty string"
"NA is replaced by empty string"


# Figure C. 3: Word clouds of left-wing citizens and left-wing councilors

In [11]:
set.seed(1234) # for reproducibility 

corp_inaug_1 <- corpus(left_citizen_or_councilor[left_citizen_or_councilor$citizen_or_councilor == "Left-wing citizens",], text_field = "cleaned")
docid_1 <- paste(left_citizen_or_councilor[left_citizen_or_councilor$citizen_or_councilor == "Left-wing citizens",]$citizen_or_councilor, sep = " ")
docnames(corp_inaug_1) <- docid_1



toks_1 <- corp_inaug_1 %>% tokens %>% 
  tokens_remove(pattern = phrase(stop_words), valuetype = 'fixed')

toks_ngram_1 <- tokens_select(toks_1, pattern = phrase(stop_words_bigrams), selection = "remove")

toks_ngram_1 <- tokens_ngrams(toks_ngram_1, n = 1:2)

dfmat_1 <- 
toks_ngram_1 %>%
tokens_remove(c(stop_words,st)) %>%
dfm()


fr_1 <- textstat_frequency(dfmat_1)

x = c('society','humane','living conditions','dignity','Greek','education','child','asylum','living_conditions','economic','problem','right','integration','work','needs','school','open','infrastructure',
      'closed','health','family','europe','human_conditions','money','places','possibility','culture','real','small','language','unfair','security','european_union','care','program','governments','respect',
      'necessities','pay','proper','management','join','war','dignity_conditions','churches','international','input','education','rule','greek_society','open','proportionate','community','controlled','law',
      'access','organizations','non_governmental_organizations','human_rights','benefits','war','women','dignity_of_living','web','solidarity','integration_society','exit','normal','religion','difficult','kids_school','papers','police',
      'organized','medical','resources','facilities','culture','benefits','reinforcements','work','update','plans','fear','medical','stealth','apartment','moment','permanent','racists','minors','transparency',
      'forms','camps','organization','responsible','integration_society','observance','entry_exit','customs','homeland','responsibilities','services','medical_care','illegal','education','prisons','assimilated',
      'funds','psychological','ought','secured','opportunities','care','learning','%','state','adults','healthy','urban','respect','morals','abide','participated','gave','covered','secured','villages','islands')

fr_1$feature[1:length(x)] = x



corp_inaug_2 <- corpus(left_citizen_or_councilor[left_citizen_or_councilor$citizen_or_councilor == "Left-wing councilors",], text_field = "cleaned")
docid_2 <- paste(left_citizen_or_councilor[left_citizen_or_councilor$citizen_or_councilor == "Left-wing councilors",]$citizen_or_councilor, sep = " ")
docnames(corp_inaug_2) <- docid_2



toks_2 <- corp_inaug_2 %>% tokens %>% 
  tokens_remove(pattern = phrase(stop_words), valuetype = 'fixed')

toks_ngram_2 <- tokens_select(toks_2, pattern = phrase(stop_words_bigrams), selection = "remove")

toks_ngram_2 <- tokens_ngrams(toks_ngram_2, n = 1:2)

dfmat_2 <- 
toks_ngram_2 %>%
tokens_remove(c(stop_words,st)) %>%
dfm()

fr_2 <- textstat_frequency(dfmat_2)

x = c('society','local','conditions','integration','humane','local_society','security','economics','livelihood','open','state','greek','schools','education','governments','dignity','respective','child',
      'jobs designs','benefits','human_conditions','suitable','sanitary','health','european_union','seriously','recruitment','facilities','management','acceptance','needs','europe','compensatory',
      'maintenance','organized','smooth','reliable','services','employment','dignity_conditions','responsibilities','professional','capacity','essentials','records','stay','reinforcements','rule','care',
      'integration_local','unit','integration_society','criteria','positions','measures','custody','easy','military','permanent','urban','gradual','assimilation','law','houses','negative','ghettoization',
      'difficult','basic','residents','compensating_benefits','culture','controlled','sanitary','program','care','respect_humanity','regular_integration','safe','budget','real','dignity_of_living','benefits',
      'learning','language','ethics','customs','locales','ethics','customs','ghetto','reactions','villages','surveillance','churches','hospitals','organizations','inform','non_governmental_organizations',
      'mandated','temporary','legal','implemented','overcome','adequate','poverty','state','doctors','enormous','distances','rewarding','rewarding_benefits','secures','barter','burden','war','unaccompanied',
      'overcome','sufficient','poverty','state','doctors','enormity','distances','rewarding','rewarding_benefits','ensure','exchanges','burden','war','unaccompanied','unaccompanied_child','national','crisis',
      'proportionality','region','profile','plans_join','logic','find','scientists','host','group','construction','construction_infrastructure','rural','symbol','suitable_infrastructure','health_conditions',
      'council','piece')

fr_2$feature[1:length(x)] = x


options(repr.plot.width=10, repr.plot.height=15)

plot1 <- ggwordcloud(fr_1$feature, fr_1$frequency, scale = c(4, 1),
max.words = 100, random.order = F, random.color = FALSE,
rot.per = 0, colors = "black", ordered.colors = FALSE, shape = 'circle') + theme(plot.title = element_text(hjust = 0.5, size = 40, vjust=-4)) + ggtitle("Left-wing citizens")

plot2 <- ggwordcloud(fr_2$feature, fr_2$frequency, scale = c(4, 1),
max.words = 100, random.order = F, random.color = FALSE,
rot.per = 0, colors = "black", ordered.colors = FALSE, shape = 'circle') + theme(plot.title = element_text(hjust = 0.5, size = 40, vjust=-3)) + ggtitle("Left-wing councilors")

png("../figures/Figure C3.png", width = 2400, height = 3600, res = 200)

grid.arrange(plot1, plot2, nrow=2)

dev.off()

"NA is replaced by empty string"
"NA is replaced by empty string"


# Figure 6: Keyness analysis on citizens, by political ideology

In [12]:
# Set seed for reproducibility
set.seed(1234)

# Create a text corpus from the 'citizens' data frame, using the 'cleaned' column as the text
corp_inaug <- corpus(citizens, text_field = "cleaned")

# Create document IDs based on political orientation (left or right)
docid <- paste(citizens$left_or_right, sep = " ")
docnames(corp_inaug) <- docid

# Tokenize the corpus and remove stop words using exact matching
toks <- corp_inaug %>%
  tokens() %>%
  tokens_remove(pattern = phrase(stop_words), valuetype = 'fixed')

# Further clean tokens by removing stop words again (may be redundant)
toks_ngram <- tokens_select(toks, pattern = phrase(stop_words), selection = "remove")

# Create unigrams and bigrams from the tokens
toks_ngram <- tokens_ngrams(toks_ngram, n = 1:2)

# Create a document-feature matrix (DFM) from the n-gram tokens
dfmat <- dfm(toks_ngram)

# Create a DFM from tokens after removing stop words again, along with a custom list `st`
dfmat1 <- toks_ngram %>%
  tokens_remove(c(stop_words, st)) %>%
  dfm()

# Trim the DFM to keep only features that appear in at least 3 documents
dfmat1 <- dfm_trim(
  dfmat1,
  min_docfreq = 3,
  docfreq_type = "count"
)

# Group the DFM by the political orientation (left or right)
dfmat1 <- dfm_group(dfmat1, groups = dfmat1$left_or_right)

# Calculate keyness statistics using the log-likelihood ratio, with "Right-wing citizens" as the target
tstat1 <- quanteda.textstats::textstat_keyness(dfmat1, measure = "lr", target = "Right-wing citizens")

# Replace the top N features (equal to length of vector `x`) with manually defined terms (likely manually selected keywords)
x = c('pseudo','deport','closed','illegal','subsidy','turkey','real','syria','legal','%','closed_controlled','respected','homeland','non_governmental_organizations','greek','muslim','islands',
      'invader','war','strict','entitled','embassy','controlled','national','afghanistan','uninhabited_islands','europe','uninhabited','islam','ethics','origin','customs','customs','interests',
      'entitle_asylum','returns','border','questions','alteration','men','east','indigenous','enforce','unchecked','law','distances','cost','real_war','asylum','asylum_practical','say','irregularities',
      'christians','indifference','armenia','incoming','examine','one_of_a_hundred','army','deportation_sneak','deterrence','left','intentions','clear','steps','classes','crime','safe','primary','identity',
      'athens','demands','proved','destroy','china','attitudes','family','otherwise','africa','inflows','impose','orthodox','fanatics','nowhere','majorities','cares','pakistan','tolerated','persecuted',
      'indoors','colonization','islamists','pseudo_invader','pseudo_pious','legally_wrong','believes','standards','somalia','pious','religion','turned','bothers','re-promotions','says','overcome',
      'traditions','endangered','attitudes','pakistan','delinquent_behavior','points','mandatory','mandatory','violent','germany','respected','fact','declared','mercy','wealth','replacement','distant',
      'rape','france','custom_religion','greek_greek','exceptions')

# Assign these manually curated feature names to the first N rows of the keyness results
tstat1$feature[1:length(x)] = x

# Define a second list of manually selected features for the opposite class (presumably left-wing)
x = c('assimilated','medical_care','psychological','education','human_rights','schools','medical','needs','open','community','apartment','health','care','transparency','information','possibility',
      'churches','appropriate','necessities','program','dignity_conditions','education','human_conditions','society','open','conditions','living_conditions','dignity','humane','livelihood','destroyed')

# Replace feature names in the specified range with these new values
# Note: Indexing assumes these are the rows related to the other class
tstat1[2559:2589,]$feature = x

# Set plot dimensions
options(repr.plot.width = 12, repr.plot.height = 14)

png("../figures/Figure 6.png", width = 2400, height = 3600, res = 200)

# Plot keyness statistics, highlighting the most distinctive terms between groups
textplot_keyness(tstat1, labelsize = 6, color = c('blue', 'red'), margin = 0.17, n = 25) +
  theme(
    legend.key.height = unit(1, 'cm'),
    legend.key.width = unit(1, 'cm'),
    legend.text = element_text(size = 14),
    
    # Customize the plot background and axis appearance
    axis.line = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_line(),
    panel.border = element_blank(),
    panel.background = element_blank()
  )

dev.off()


"NA is replaced by empty string"
