In [3]:
import pandas as pd
import numpy as np
import torch
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
from scipy.special import softmax

In [17]:
df = pd.read_csv("./data/classified_manifestos_pre99.csv")
df.head()

Unnamed: 0,party,date,text,topic
0,43320,196310,ARBEITSPROGRAMM 1963 Vor 75 Jahren erfolgte d...,305 - Political Authority
1,43320,196310,In Würdigung dieses historischen und für die w...,202 - Democracy
2,43320,196310,Recht auf Bildung — Chancen für die Jugend De...,506 - Education Expansion
3,43320,196310,Jedermann soll Anteil an den kulturellen Werte...,502 - Culture: Positive
4,43320,196310,Was dem ganzen Volke und vornehmlich der Jugen...,506 - Education Expansion


In [18]:
# Split the topic into the cmp_code
df["cmp_code"] = df["topic"].str.split(" - ", n=1, expand=True).loc[:,0]
df 

Unnamed: 0,party,date,text,topic,cmp_code
0,43320,196310,ARBEITSPROGRAMM 1963 Vor 75 Jahren erfolgte d...,305 - Political Authority,305
1,43320,196310,In Würdigung dieses historischen und für die w...,202 - Democracy,202
2,43320,196310,Recht auf Bildung — Chancen für die Jugend De...,506 - Education Expansion,506
3,43320,196310,Jedermann soll Anteil an den kulturellen Werte...,502 - Culture: Positive,502
4,43320,196310,Was dem ganzen Volke und vornehmlich der Jugen...,506 - Education Expansion,506
...,...,...,...,...,...
14722,43810,199510,• dass der körperliche Drogenentzug weiterhin ...,504 - Welfare State Expansion,504
14723,43810,199510,• dass sowohl der Entzug mit medikamentöser Be...,504 - Welfare State Expansion,504
14724,43810,199510,Der Entzug muss auch gegen den Willen der Betr...,201 - Freedom and Human Rights,201
14725,43810,199510,Es sind die geeigneten Massnahmen dafür bereit...,605 - Law and Order: Positive,605


In [19]:
# Add dictionary for mapping party IDs to party names
party_name_mapping = {
    43110: "GPS",
    43120: "GLP",
    43320: "SPS",
    43420: "FDP",
    43520: "CVP",
    43810: "SVP",
    43811: "BDP"
}

# Map the party IDs to the new 'party_name' column
df['party_name'] = df['party'].map(party_name_mapping)

df

Unnamed: 0,party,date,text,topic,cmp_code,party_name
0,43320,196310,ARBEITSPROGRAMM 1963 Vor 75 Jahren erfolgte d...,305 - Political Authority,305,SPS
1,43320,196310,In Würdigung dieses historischen und für die w...,202 - Democracy,202,SPS
2,43320,196310,Recht auf Bildung — Chancen für die Jugend De...,506 - Education Expansion,506,SPS
3,43320,196310,Jedermann soll Anteil an den kulturellen Werte...,502 - Culture: Positive,502,SPS
4,43320,196310,Was dem ganzen Volke und vornehmlich der Jugen...,506 - Education Expansion,506,SPS
...,...,...,...,...,...,...
14722,43810,199510,• dass der körperliche Drogenentzug weiterhin ...,504 - Welfare State Expansion,504,SVP
14723,43810,199510,• dass sowohl der Entzug mit medikamentöser Be...,504 - Welfare State Expansion,504,SVP
14724,43810,199510,Der Entzug muss auch gegen den Willen der Betr...,201 - Freedom and Human Rights,201,SVP
14725,43810,199510,Es sind die geeigneten Massnahmen dafür bereit...,605 - Law and Order: Positive,605,SVP


In [20]:
# Function to map cmp_code to main_topic
def map_to_main_topic(cmp_code):
    if str(cmp_code).startswith('1'):
        return "external relations"
    elif str(cmp_code).startswith('2'):
        return "freedom and democracy"
    elif str(cmp_code).startswith('3'):
        return "political system"
    elif str(cmp_code).startswith('4'):
        return "economy"
    elif str(cmp_code).startswith('5'):
        return "welfare and quality of life"
    elif str(cmp_code).startswith('6'):
        return "fabric of society"
    elif str(cmp_code).startswith('7'):
        return "social groups"
    else:
        return "other"  # Optional: For any other cases

# Add a new column 'main_topic' based on the 'cmp_code'
df['main_topic'] = df['cmp_code'].apply(map_to_main_topic)

df.head()

Unnamed: 0,party,date,text,topic,cmp_code,party_name,main_topic
0,43320,196310,ARBEITSPROGRAMM 1963 Vor 75 Jahren erfolgte d...,305 - Political Authority,305,SPS,political system
1,43320,196310,In Würdigung dieses historischen und für die w...,202 - Democracy,202,SPS,freedom and democracy
2,43320,196310,Recht auf Bildung — Chancen für die Jugend De...,506 - Education Expansion,506,SPS,welfare and quality of life
3,43320,196310,Jedermann soll Anteil an den kulturellen Werte...,502 - Culture: Positive,502,SPS,welfare and quality of life
4,43320,196310,Was dem ganzen Volke und vornehmlich der Jugen...,506 - Education Expansion,506,SPS,welfare and quality of life


In [21]:
# Save dataset for pre-1999 manifestos
df.to_pickle("./data/df_pre1999.pkl")

In [14]:
# Load dataset for 1999 and post manifestos
df_post1999 = pd.read_pickle("./data/df_all.pkl")
df_post1999

Unnamed: 0,party,date,text,cmp_code,eu_code,sentiment,score,year,party_name,main_topic
0,43110,1999-10-01,For a sustainable Switzerland,,,POSITIVE,0.998674,1999,GPS,other
1,43110,1999-10-01,Election platform 1999,,,POSITIVE,0.987673,1999,GPS,other
2,43110,1999-10-01,The Greens are pleased to present an election ...,501,,POSITIVE,0.998852,1999,GPS,welfare and quality of life
3,43110,1999-10-01,- to briefly present a sustainable Switzerland.,601,,POSITIVE,0.998062,1999,GPS,fabric of society
4,43110,1999-10-01,"In responsibility towards future generations, ...",201,,POSITIVE,0.998758,1999,GPS,freedom and democracy
...,...,...,...,...,...,...,...,...,...,...
18169,43811,2019-10-01,The same is true for other projects that lead ...,414,,POSITIVE,0.998668,2019,BDP,economy
18170,43811,2019-10-01,"Various reforms in the financial center, for e...",414,,POSITIVE,0.998879,2019,BDP,economy
18171,43811,2019-10-01,They must be seen as an opportunity.,414,,POSITIVE,0.994448,2019,BDP,economy
18172,43811,2019-10-01,There is no way around the recognition and imp...,107,,POSITIVE,0.998304,2019,BDP,external relations


In [53]:
# Unify date formats and drop columns
df_convert = pd.read_pickle("./data/df_pre1999.pkl")
df_convert["date"]= pd.to_datetime(df_convert["date"], format="%Y%m")
drop_col = ["topic"]
df_pre1999 = df_convert.drop(drop_col, axis=1)
df_pre1999

Unnamed: 0,party,date,text,cmp_code,party_name,main_topic
0,43320,1963-10-01,ARBEITSPROGRAMM 1963 Vor 75 Jahren erfolgte d...,305,SPS,political system
1,43320,1963-10-01,In Würdigung dieses historischen und für die w...,202,SPS,freedom and democracy
2,43320,1963-10-01,Recht auf Bildung — Chancen für die Jugend De...,506,SPS,welfare and quality of life
3,43320,1963-10-01,Jedermann soll Anteil an den kulturellen Werte...,502,SPS,welfare and quality of life
4,43320,1963-10-01,Was dem ganzen Volke und vornehmlich der Jugen...,506,SPS,welfare and quality of life
...,...,...,...,...,...,...
14722,43810,1995-10-01,• dass der körperliche Drogenentzug weiterhin ...,504,SVP,welfare and quality of life
14723,43810,1995-10-01,• dass sowohl der Entzug mit medikamentöser Be...,504,SVP,welfare and quality of life
14724,43810,1995-10-01,Der Entzug muss auch gegen den Willen der Betr...,201,SVP,freedom and democracy
14725,43810,1995-10-01,Es sind die geeigneten Massnahmen dafür bereit...,605,SVP,fabric of society


In [51]:
# Drop unnecessary columns in post 1999 dataset
drop_col = ["eu_code","sentiment", "score","year"]
df_post1999 = df_post1999.drop(drop_col, axis=1)
df_post1999

Unnamed: 0,party,date,text,cmp_code,party_name,main_topic
0,43110,1999-10-01,For a sustainable Switzerland,,GPS,other
1,43110,1999-10-01,Election platform 1999,,GPS,other
2,43110,1999-10-01,The Greens are pleased to present an election ...,501,GPS,welfare and quality of life
3,43110,1999-10-01,- to briefly present a sustainable Switzerland.,601,GPS,fabric of society
4,43110,1999-10-01,"In responsibility towards future generations, ...",201,GPS,freedom and democracy
...,...,...,...,...,...,...
18169,43811,2019-10-01,The same is true for other projects that lead ...,414,BDP,economy
18170,43811,2019-10-01,"Various reforms in the financial center, for e...",414,BDP,economy
18171,43811,2019-10-01,They must be seen as an opportunity.,414,BDP,economy
18172,43811,2019-10-01,There is no way around the recognition and imp...,107,BDP,external relations


In [56]:
# Combine the two datasets
dfs = [df_pre1999, df_post1999]
dfs = pd.concat(dfs)
dfs.shape

(32901, 6)

In [57]:
# Save dataframe for the sentiment analysis
dfs.to_csv("./data/manifesto_texts_topics_1963_2019.csv")

In [None]:
# Load dataset
df_orig = pd.read_csv("/content/drive/MyDrive/dlss24/dlss24/final/manifesto_texts_topics_1963_2019.csv")
df_orig

In [None]:
# Load model
MODEL = f"cardiffnlp/xlm-twitter-politics-sentiment"
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = model.to(device)

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

In [None]:
# Function for sentiment
def sentiment_analyzer_sentences (text):
  encoded_input = tokenizer(text, return_tensors='pt', truncation=True, padding='max_length', max_length=512)
  # Move to GPU
  encoded_input = encoded_input.to(device)

  output = model(**encoded_input)
  scores = output[0][0].cpu().detach().numpy()
  scores = softmax(scores)
  # assigning sentiment label based on highest score index
  sentiment_index = scores.argmax()
  sentiments = ["negative", "neutral", "positive"]
  sentiment = sentiments[sentiment_index]
  return sentiment

In [None]:
# Apply sentiment function to text 
df = df_orig.copy()
df.loc[:,"sentiment"] = df["text"].apply(sentiment_analyzer_sentences)

In [None]:
# Saved dataframe
df.to_csv("./data/manifesto-texts-sentiment-1963-2019.csv")