# Quantitative Analysis

In [2]:
import warnings

# Filter out all warnings
warnings.filterwarnings("ignore")
import pandas as pd
import analysis
from sklearn.metrics import mean_absolute_error

## Load dataset

#### Party Sentiments

In [None]:
party_df = pd.read_csv('/content/party_sentiment.csv')

#### Headline sentiments

required when loading the dataset from the sentiment analysis downloaded version (instead of using the one provided)

In [None]:
# headline_df = pd.read_csv('/content/headline_sentiments.csv')
# #Fix for bad data reading (if headlines contain commata then the csv split is incorrect)
# headline_df = analysis.fix_headline_df(headline_df)

In [162]:
import pandas as pd
headline_df = pd.read_csv('all_headlines_sentiment.csv')

### Clustering load and preparation

In [165]:
import pandas as pd

# Load each CSV file into a separate DataFrame
file_paths = ['/content/clustering.csv']
dataframes = [pd.read_csv(file) for file in file_paths]

# Merge all DataFrames into one
clustering_df = pd.concat(dataframes, ignore_index=True)


In [166]:
clustering_and_sentiment_df = pd.merge(headline_df, clustering_df, on=['date','headline','paper'])
clustering_and_sentiment_df= analysis.rename_columns(clustering_and_sentiment_df)

In [169]:
clustering_and_sentiment_df= analysis.sentiment_reassignment(clustering_and_sentiment_df)

Unnamed: 0,date,headline,paper,sentiment_SPD,sentiment_Bundeswehr,sentiment_Ukraine,sentiment_climate,sentiment_CDU,sentiment_migration,sentiment_education,...,sentiment_tax,sentiment_housing,sentiment_medication,sentiment_whole,headline_en_prep,headline_prep,cluster_label,tsne_x,tsne_y,cluster_name
0,2023-09-07 00:00:00,Freie Wähler BaWü: Imageschaden durch Aiwanger?,tagesschau,,,,,,,,...,,,,0.0258,free elector buw image damage aiwanger,Freie Wähler BaWü Imageschaden Aiwanger,26,-20.493458,9.113449,Leaflet Affair
1,2023-09-07 00:00:00,Bundesverwaltungsgericht : Anlasslose Vorratsd...,tagesschau,,,,,,,,...,,,,-0.7184,federal administrative court uncertainty stora...,Bundesverwaltungsgericht Anlasslose Vorratsdat...,19,-23.375679,-1.855597,Legal matters
2,2023-09-07 00:00:00,Innenausschuss zu Schönbohm-Absetzung: Faeser ...,tagesschau,-0.961756,,,,,,,...,,,,-0.6249,internal committee schnbohm submission faeser ...,Innenausschuss Schönbohm Absetzung Faeser fehl...,45,-5.392294,17.742073,miscellaneous
3,2023-09-07 00:00:00,"Sparmaßnahmen im Bildungsbereich: ""Eine absolu...",tagesschau,,,,,,,-0.927296,...,,,,-0.5256,economic measure field education absolutely wr...,Sparmaßnahmen Bildungsbereich absolut falsche ...,44,31.473452,-19.938667,Tax
4,2023-09-07 00:00:00,Zentralrat der Juden: Aiwanger und Schuster wo...,tagesschau,,,,,,,,...,,,,0.0772,central council jew aiwanger schuster want meet,Zentralrat Juden Aiwanger Schuster treffen,26,-14.826737,15.355260,Leaflet Affair
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2919,2023-09-07 00:00:00,Was Aiwanger auf Söders 25 Fragen geantwortet hat,sz,,,,,,,,...,,,,0.0000,aiwanger answered sders question,Aiwanger Söders Fragen geantwortet,26,-22.602112,14.436152,Leaflet Affair
2920,2023-09-07 00:00:00,Eine verhängnisvolle Affäre,sz,,,,,,,,...,,,,-0.5994,violent affair,verhängnisvolle Affäre,37,10.901472,-47.715050,Dissatisfaction with current state
2921,2023-09-07 00:00:00,Warum das Flugblatt keine Jugendsünde ist,sz,,,,,,,,...,,,,-0.5574,flight sheet youth sin,Warum Flugblatt Jugendsünde,37,27.730083,-18.153920,Dissatisfaction with current state
2922,2023-09-07 00:00:00,Aiwanger hat Fragen der CSU zu Flugblatt beant...,sz,,,,,0.0,,,...,,,,0.0000,aiwanger answered csus question airbags,Aiwanger Fragen CSU Flugblatt beantwortet,26,-49.886360,19.511583,Leaflet Affair


## Number of collected headlines per newspaper

In [170]:
paper_counts_with_totals = analysis.get_paper_counts(clustering_and_sentiment_df)
print(paper_counts_with_totals)


                         Count
bild                       398
faz politik deutschland    411
focus politik               31
n tv politics              174
spiegel                    355
sz                         211
t online                   286
tagesschau                 234
welt politik dtl           793
zeit online                 31
Total                     2924


In [173]:
nonzero_counts = clustering_and_sentiment_df.count()

print("Number of rows with non-zero values for each column:")
nonzero_counts

Number of rows with non-zero values for each column:


date                              2924
headline                          2924
paper                             2924
sentiment_SPD                      458
sentiment_Bundeswehr                48
sentiment_Ukraine                   42
sentiment_climate                  112
sentiment_CDU                      256
sentiment_migration                201
sentiment_education                 94
sentiment_AfD                      236
sentiment_right_wing_extremism      48
sentiment_leaflet_affair             5
sentiment_heating                   25
sentiment_Linke                     16
sentiment_Freie_Wähler              65
sentiment_Die_Grünen               197
sentiment_border_control            40
sentiment_foreign_policy            47
sentiment_EU                       103
sentiment_FDP                      150
sentiment_tax                       36
sentiment_housing                   25
sentiment_medication                 4
sentiment_whole                   2924
headline_en_prep         

In [174]:
def count_nonzero(series):
    return series.count()

nonzero_counts_grouped = clustering_and_sentiment_df.groupby('paper').apply(count_nonzero)
column_totals = nonzero_counts_grouped.sum()

totals_df = pd.DataFrame(column_totals).T
totals_df.index = ['Total'] 


nonzero_counts_grouped = pd.concat([nonzero_counts_grouped, totals_df])
print("Number of nonzero values for each column grouped by 'paper':")
nonzero_counts_grouped

Number of nonzero values for each column grouped by 'paper':


Unnamed: 0,date,headline,paper,sentiment_SPD,sentiment_Bundeswehr,sentiment_Ukraine,sentiment_climate,sentiment_CDU,sentiment_migration,sentiment_education,...,sentiment_tax,sentiment_housing,sentiment_medication,sentiment_whole,headline_en_prep,headline_prep,cluster_label,tsne_x,tsne_y,cluster_name
bild,398,398,398,86,4,3,52,27,16,16,...,11,5,0,398,398,398,398,398,398,398
faz politik deutschland,411,411,411,52,3,5,8,19,20,12,...,2,2,0,411,410,410,411,411,411,411
focus politik,31,31,31,3,1,0,2,1,3,0,...,0,0,0,31,31,31,31,31,31,31
n tv politics,174,174,174,22,3,0,3,13,5,6,...,2,5,1,174,172,174,174,174,174,174
spiegel,355,355,355,69,11,10,5,41,13,6,...,1,1,1,355,355,355,355,355,355,355
sz,211,211,211,15,1,1,7,17,6,3,...,0,3,0,211,211,211,211,211,211,211
t online,286,286,286,50,4,0,2,37,15,7,...,2,2,0,286,286,286,286,286,286,286
tagesschau,234,234,234,46,5,3,8,30,23,14,...,7,5,0,234,234,234,234,234,234,234
welt politik dtl,793,793,793,113,15,19,23,69,99,30,...,11,2,2,793,793,793,793,793,793,793
zeit online,31,31,31,2,1,1,2,2,1,0,...,0,0,0,31,31,31,31,31,31,31


## Number of sentiments per newspaper and topic

positive

In [176]:
pos_counts = analysis.get_analysis_per_paper_and_topic(clustering_and_sentiment_df, analysis.count_pos)
pos_counts

Unnamed: 0,sentiment_SPD,sentiment_Bundeswehr,sentiment_Ukraine,sentiment_climate,sentiment_CDU,sentiment_migration,sentiment_education,sentiment_AfD,sentiment_right_wing_extremism,sentiment_leaflet_affair,...,sentiment_Die_Grünen,sentiment_border_control,sentiment_foreign_policy,sentiment_EU,sentiment_FDP,sentiment_tax,sentiment_housing,sentiment_medication,sentiment_whole,total (topic only)
bild,7,0,0,2,2,2,2,3,0,0,...,3,1,0,0,3,1,1,0,77,30
faz politik deutschland,4,0,2,2,2,6,3,1,0,0,...,3,1,2,0,3,0,1,0,94,32
focus politik,0,0,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,6,2
n tv politics,0,0,0,1,2,3,1,0,0,0,...,1,0,1,0,0,0,0,0,50,9
spiegel,10,1,3,2,4,0,1,1,1,0,...,8,1,4,0,8,0,0,0,97,45
sz,3,0,0,2,4,1,1,0,1,1,...,5,0,0,0,3,0,1,0,52,22
t online,10,0,0,0,4,2,1,2,1,0,...,6,0,3,0,5,0,0,0,62,35
tagesschau,5,0,0,0,2,5,4,1,0,0,...,3,0,1,0,0,0,2,0,63,23
welt politik dtl,13,3,4,6,4,28,11,6,4,0,...,10,3,1,2,3,1,0,0,222,102
zeit online,0,0,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,10,2


negative

In [177]:
analysis.get_analysis_per_paper_and_topic(clustering_and_sentiment_df, analysis.count_neg)

Unnamed: 0,sentiment_SPD,sentiment_Bundeswehr,sentiment_Ukraine,sentiment_climate,sentiment_CDU,sentiment_migration,sentiment_education,sentiment_AfD,sentiment_right_wing_extremism,sentiment_leaflet_affair,...,sentiment_Die_Grünen,sentiment_border_control,sentiment_foreign_policy,sentiment_EU,sentiment_FDP,sentiment_tax,sentiment_housing,sentiment_medication,sentiment_whole,total (topic only)
bild,11,1,0,10,2,5,1,3,2,0,...,1,1,1,0,0,2,0,0,122,43
faz politik deutschland,5,0,0,1,2,3,1,2,3,0,...,5,0,3,0,5,0,0,0,98,31
focus politik,2,0,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,7,4
n tv politics,3,1,0,0,1,0,2,2,0,0,...,0,0,0,0,0,0,0,1,46,11
spiegel,6,2,0,1,8,4,0,9,2,1,...,3,0,3,0,3,0,0,0,116,43
sz,2,0,0,1,3,1,0,1,1,0,...,1,0,0,0,1,0,0,0,49,14
t online,10,0,0,0,8,5,0,6,2,1,...,4,0,2,2,5,1,1,0,103,49
tagesschau,6,1,1,0,5,4,1,3,2,1,...,0,1,0,0,0,3,0,0,70,30
welt politik dtl,20,2,2,2,10,29,4,10,6,0,...,7,2,5,0,8,6,2,0,293,118
zeit online,0,0,0,1,0,0,0,1,0,0,...,0,0,0,1,0,0,0,0,9,3


## Average sentiment

In [178]:
result = analysis.get_analysis_per_paper_and_topic(clustering_and_sentiment_df, analysis.avg_pos)
result


Unnamed: 0,sentiment_SPD,sentiment_Bundeswehr,sentiment_Ukraine,sentiment_climate,sentiment_CDU,sentiment_migration,sentiment_education,sentiment_AfD,sentiment_right_wing_extremism,sentiment_leaflet_affair,...,sentiment_Die_Grünen,sentiment_border_control,sentiment_foreign_policy,sentiment_EU,sentiment_FDP,sentiment_tax,sentiment_housing,sentiment_medication,sentiment_whole,total (topic only)
bild,0.597166,,,0.464822,0.42358,0.3793,0.795739,0.685171,,,...,0.395111,0.455838,,,0.460902,0.498333,0.1531,,0.328636,0.494578
faz politik deutschland,0.415285,,0.595271,0.710308,0.4939,0.277167,0.517429,0.558807,,,...,0.320967,0.0772,0.33575,,0.320967,,0.5859,,0.373703,0.461185
focus politik,,,,,,0.3182,,,,,...,0.546103,,,,,,,,0.2885,0.432151
n tv politics,,,,0.589977,0.645467,0.652941,0.502836,,,,...,0.729301,,0.3818,,,,,,0.370852,0.58372
spiegel,0.428053,0.744438,0.498801,0.697424,0.731997,,0.733882,0.779197,0.4404,,...,0.41756,0.547887,0.394,,0.385243,,,,0.34318,0.569965
sz,0.585233,,,0.655913,0.451982,0.604483,0.892885,,0.1027,0.4215,...,0.639201,,,,0.585233,,0.814959,,0.333144,0.575409
t online,0.481809,,,,0.591913,0.3397,0.70998,0.530507,0.3818,,...,0.475748,,0.329267,,0.536757,,,,0.355434,0.491473
tagesschau,0.696712,,,,0.766837,0.30872,0.694247,0.510948,,,...,0.766588,,0.1779,,,,0.59755,,0.338695,0.564938
welt politik dtl,0.633227,0.590624,0.465331,0.577876,0.673396,0.372819,0.772571,0.612673,0.3959,,...,0.571827,0.357133,0.0258,0.765214,0.305565,0.446962,,,0.367396,0.52261
zeit online,,,,0.4449,,0.4215,,,,,...,,,,,,,,,0.44772,0.4332


In [179]:
result = analysis.get_analysis_per_paper_and_topic(clustering_and_sentiment_df, analysis.avg_sentiment)
result

Unnamed: 0,sentiment_SPD,sentiment_Bundeswehr,sentiment_Ukraine,sentiment_climate,sentiment_CDU,sentiment_migration,sentiment_education,sentiment_AfD,sentiment_right_wing_extremism,sentiment_leaflet_affair,...,sentiment_Die_Grünen,sentiment_border_control,sentiment_foreign_policy,sentiment_EU,sentiment_FDP,sentiment_tax,sentiment_housing,sentiment_medication,sentiment_whole,total (topic only)
bild,-0.042593,-0.222322,0.0,-0.109819,-0.013512,-0.079369,0.058116,-0.007962,-0.19538,,...,0.011727,-0.065615,-0.09534,0.0,0.086419,-0.04344,0.03062,,-0.076816,-0.050167
faz politik deutschland,-0.006867,0.0,0.238108,0.164739,0.000163,0.008134,0.083446,-0.045802,-0.212471,,...,-0.032978,0.025733,-0.19744,0.0,-0.034042,0.0,0.29295,,-0.018984,0.024239
focus politik,-0.563963,0.0,,0.0,-0.6908,-0.113833,,,0.0,,...,0.273051,,0.0,0.0,0.0,,,,-0.073216,-0.109555
n tv politics,-0.093483,-0.312412,,0.196659,0.045171,0.391764,-0.131143,-0.126217,0.0,,...,0.14586,0.0,0.3818,0.0,0.0,0.0,0.0,-0.501175,-0.012159,-0.008674
spiegel,0.006524,-0.041823,0.14964,0.18019,-0.075906,-0.139015,0.122314,-0.187022,-0.128807,-0.933735,...,0.03958,0.109577,0.043142,0.0,0.036277,0.0,0.0,0.0,-0.052381,-0.042409
sz,0.082647,0.0,0.0,0.107332,0.002347,0.047714,0.297628,-0.092562,-0.0309,0.21075,...,0.202747,,0.0,0.0,0.145536,,0.271653,,-0.020583,0.069064
t online,-0.039785,0.0,,0.0,-0.092745,-0.077082,0.101426,-0.083513,-0.149025,-0.961628,...,0.067067,0.0,0.00344,-0.075017,0.020897,-0.395767,-0.17,,-0.074423,-0.099953
tagesschau,-0.026705,-0.150981,-0.091067,0.0,-0.052295,-0.024626,0.132121,-0.057829,-0.36315,-0.59715,...,0.176905,-0.240094,0.08895,0.0,0.0,-0.28477,0.23902,,-0.029531,-0.076693
welt politik dtl,-0.049279,0.02632,0.015275,0.096426,-0.06665,-0.033897,0.192599,-0.034654,-0.062887,,...,0.036378,0.006801,-0.2811,0.049369,-0.120201,-0.317867,-0.296,0.0,-0.050493,-0.043576
zeit online,0.0,0.0,0.0,-0.021489,0.0,0.4215,,-0.176198,,,...,0.0,,,-0.386986,,,,,0.002906,-0.016317


In [181]:
result = analysis.get_analysis_per_paper_and_topic(clustering_and_sentiment_df, analysis.avg_neg)
result


Unnamed: 0,sentiment_SPD,sentiment_Bundeswehr,sentiment_Ukraine,sentiment_climate,sentiment_CDU,sentiment_migration,sentiment_education,sentiment_AfD,sentiment_right_wing_extremism,sentiment_leaflet_affair,...,sentiment_Die_Grünen,sentiment_border_control,sentiment_foreign_policy,sentiment_EU,sentiment_FDP,sentiment_tax,sentiment_housing,sentiment_medication,sentiment_whole,total (topic only)
bild,-0.713012,-0.889289,,-0.664021,-0.605995,-0.4057,-0.661629,-0.772751,-0.48845,,...,-0.810055,-0.783912,-0.4767,,,-0.488084,,,-0.458014,-0.640878
faz politik deutschland,-0.40364,,,-0.1027,-0.49235,-0.500107,-0.550937,-0.668722,-0.495767,,...,-0.40364,,-0.5529,,-0.40364,,,,-0.438067,-0.475596
focus politik,-0.845945,,,,-0.6908,-0.6597,,,,,...,,,,,,,,,-0.571529,-0.732148
n tv politics,-0.68554,-0.937236,,,-0.703712,,-0.644846,-0.694195,,,...,,,,,,,,-0.501175,-0.449093,-0.660797
spiegel,-0.638398,-0.602247,,-0.4939,-0.755016,-0.4518,,-0.647643,-0.671025,-0.933735,...,-0.67811,,-0.352767,,-0.664547,,,,-0.447273,-0.619945
sz,-0.258,,,-0.560504,-0.589344,-0.3182,,-0.83306,-0.2263,,...,-0.1548,,,,-0.1548,,,,-0.442176,-0.421594
t online,-0.680733,,,,-0.724901,-0.367126,,-0.71967,-0.48895,-0.961628,...,-0.411823,,-0.4767,-0.600138,-0.457349,-0.791533,-0.34,,-0.420602,-0.581969
tagesschau,-0.785329,-0.754906,-0.2732,,-0.620503,-0.5275,-0.927296,-0.787154,-0.7263,-0.59715,...,,-0.960376,,,,-0.664463,,,-0.403543,-0.695286
welt politik dtl,-0.690022,-0.688535,-0.78555,-0.624729,-0.729242,-0.475681,-0.680076,-0.592855,-0.42115,,...,-0.58304,-0.474492,-0.51114,,-0.610416,-0.65725,-0.296,,-0.415027,-0.59245
zeit online,,,,-0.487878,,,,-0.528594,,,...,,,,-0.773973,,,,,-0.487456,-0.596815


## Temporal analysis

In [182]:
result = analysis.get_analysis_per_topic_and_time(clustering_and_sentiment_df)


Average sentiment over time for each topic:


In [183]:
result = analysis.get_analysis_per_topic_and_time(clustering_and_sentiment_df)

In [185]:
# Plot average sentiment over time for each topic
analysis.plot_sentiment_over_time(result)


In [186]:
# Plot each column separately
analysis.plot_individual_sentiment(result)


## Voting

In [188]:
voting_results = analysis.get_voting_prediction(clustering_and_sentiment_df, party_df)
voting_results

Unnamed: 0_level_0,sentiment_Ukraine,sentiment_EU,sentiment_medication,sentiment_climate,sentiment_Bundeswehr,sentiment_migration,sentiment_education,sentiment_housing,sentiment_tax,sentiment_foreign_policy,sentiment_leaflet_affair,sentiment_heating,sentiment_Border_control,sentiment_right_wing_extremism,number of topic votes,party_counts,total votes,percentage
programme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
CSU,9,2,0,16,0,49,24,5,12,0,3,3,0,0,123,0.0,123.0,13.65
AfD,3,2,0,16,4,52,24,5,12,0,0,3,0,0,121,1.4,122.4,13.58
Grüne,9,2,0,16,7,49,24,5,2,0,3,3,0,18,138,0.0,138.0,15.31
FDP,9,2,0,16,4,49,24,5,12,0,3,3,0,0,127,2.5,129.5,14.37
FW,0,2,0,16,4,49,24,5,12,0,0,3,0,0,115,0.0,115.0,12.76
Linke,0,2,0,16,7,52,24,5,2,12,3,3,0,0,126,0.1,126.1,13.99
SPD,3,2,0,16,7,49,24,5,12,0,3,3,0,18,142,5.2,147.2,16.33


In [189]:
papers = clustering_and_sentiment_df['paper'].unique()
outcome_per_paper_df = pd.DataFrame(columns = papers, index = party_df['programme'])
for i in range(len(papers)):

  voting_prediction_paper = analysis.get_voting_prediction(clustering_and_sentiment_df, party_df, paper = papers[i]).sort_index()
  outcome_per_paper_df[papers[i]] = voting_prediction_paper['percentage']
outcome_per_paper_df['total'] = voting_results['percentage']

In [190]:
outcome_per_paper_df

Unnamed: 0_level_0,tagesschau,faz politik deutschland,bild,focus politik,n tv politics,zeit online,welt politik dtl,spiegel,t online,sz,total
programme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
CSU,13.51,14.14,10.09,14.29,15.62,16.67,13.35,8.97,8.96,11.9,13.65
AfD,13.51,9.09,22.94,14.29,3.12,8.33,14.52,10.26,13.43,9.52,13.58
Grüne,13.51,17.17,11.93,14.29,18.75,16.67,14.05,14.1,10.45,14.29,15.31
FDP,13.51,17.17,12.84,14.29,15.62,16.67,14.75,20.51,16.42,19.05,14.37
FW,12.61,12.12,10.09,14.29,15.62,16.67,13.11,5.13,7.46,11.9,12.76
Linke,11.71,11.11,12.84,14.29,12.5,8.33,12.41,17.95,16.42,11.9,13.99
SPD,21.62,19.19,19.27,14.29,18.75,16.67,17.8,23.08,26.87,21.43,16.33


## MSE

In [191]:
d = {'CDU': [37.0], 'Grüne': [14.4], "FW": [15.8], "AfD": [14.6], "SPD": [8.4], "FDP":[3.0], "Linke": [1.5]}
actual_voting_outcome = pd.DataFrame(data=d).T
actual_voting_outcome

Unnamed: 0,0
CDU,37.0
Grüne,14.4
FW,15.8
AfD,14.6
SPD,8.4
FDP,3.0
Linke,1.5


In [192]:
df1 = actual_voting_outcome.sort_index()
df2 = analysis.get_voting_prediction(clustering_and_sentiment_df, party_df).sort_index()


percentage1 = df1[0]
percentage2 = df2['percentage']

# Compute the mean squared error between the two percentage columns
mse = mean_absolute_error(percentage1, percentage2)

print("Mean Absolute Error:", mse)


Mean Absolute Error: 8.587142857142856


In [193]:
papers = clustering_and_sentiment_df['paper'].unique()
mae_per_paper_df = pd.DataFrame(columns = ['papers', 'MAE'])
mae_per_paper_df['papers'] = papers
for i in range(len(papers)):
  df1 = actual_voting_outcome.sort_index()
  df2 = analysis.get_voting_prediction(clustering_and_sentiment_df, party_df, paper = papers[i]).sort_index()

  percentage1 = df1[0]
  percentage2 = df2['percentage']

  # Compute the mean squared error between the two percentage columns
  mae = mean_absolute_error(percentage1, percentage2)

  mae_per_paper_df["MAE"][i] = mae

mae_per_paper_df

Unnamed: 0,papers,MAE
0,tagesschau,8.942857
1,faz politik deutschland,9.912857
2,bild,10.782857
3,focus politik,7.801429
4,n tv politics,10.194286
5,zeit online,8.358571
6,welt politik dtl,8.404286
7,spiegel,13.14
8,t online,12.615714
9,sz,10.524286


In [194]:
analysis.get_voting_prediction(clustering_and_sentiment_df, party_df, paper = "welt politik dtl")

Unnamed: 0_level_0,sentiment_Ukraine,sentiment_EU,sentiment_medication,sentiment_climate,sentiment_Bundeswehr,sentiment_migration,sentiment_education,sentiment_housing,sentiment_tax,sentiment_foreign_policy,sentiment_leaflet_affair,sentiment_heating,sentiment_Border_control,sentiment_right_wing_extremism,number of topic votes,party_counts,total votes,percentage
programme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
CSU,4,2,0,6,0,28,11,0,6,0,0,0,0,0,57,0,57,13.35
AfD,2,2,0,2,3,29,11,0,6,0,0,1,0,0,56,6,62,14.52
Grüne,4,2,0,6,2,28,11,0,1,0,0,0,0,6,60,0,60,14.05
FDP,4,2,0,6,3,28,11,0,6,0,0,0,0,0,60,3,63,14.75
FW,0,2,0,6,3,28,11,0,6,0,0,0,0,0,56,0,56,13.11
Linke,0,2,0,6,2,29,11,0,1,1,0,0,0,0,52,1,53,12.41
SPD,2,2,0,6,2,28,11,0,6,0,0,0,0,6,63,13,76,17.8


In [None]:
data = {
    'Party': ['SPD', 'CDU', 'AfD', 'Linke', 'FW', 'Grüne', 'FDP'],
    'Score': [-0.074, -0.09, -0.09, -0.07, -0.03, 0.09, 0.01],
    'Count': [458, 256, 236, 16, 65, 197, 150]
}

df = pd.DataFrame(data)

# Calculate percentage
total_count = df['Count'].sum()
df['percentage'] = (df['Count'] / total_count) * 100

print(df)



   Party  Score  Count  percentage
0    SPD -0.074    458   33.236575
1    CDU -0.090    256   18.577649
2    AfD -0.090    236   17.126270
3  Linke -0.070     16    1.161103
4     FW -0.030     65    4.716981
5  Grüne  0.090    197   14.296081
6    FDP  0.010    150   10.885341


In [None]:
df1 = actual_voting_outcome.sort_index()
df2 = df.sort_index()

percentage1 = df1[0]
percentage2 = df2['percentage']

# Compute the mean squared error between the two percentage columns
mse = mean_absolute_error(percentage1, percentage2)

print("Mean Absolute Error:", mse)

Mean Absolute Error: 12.969790586771719
