In [6]:
import os
import sys
import argparse
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [7]:

df = pd.read_csv('final-features-rgb.csv')

X = df[[f'feature_{i}' for i in range(12)]]
y = df['party']
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

FileNotFoundError: [Errno 2] No such file or directory: 'final-features-rgb.csv'

In [None]:
# extract model metrics
def extract_metrics(report):
    metrics = {}
    lines = report.split("\n")
    for line in lines:
        if line.startswith(' ') and len(line.split()) > 1:
            parts = line.split()
            if len(parts) >= 5:
                label = parts[0]
                try:
                    precision = float(parts[1])
                    recall = float(parts[2])
                    f1_score = float(parts[3])
                    support = int(parts[4])
                    metrics[label] = {
                        'Precision': precision,
                        'Recall': recall,
                        'F1-Score': f1_score,
                        'Support': support
                    }
                except ValueError:
                    continue  
    return metrics

In [None]:
# 1. Random Forest 
rf = RandomForestClassifier(class_weight='balanced', random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
report_rf = classification_report(y_test, y_pred_rf)
metrics_rf = extract_metrics(report_rf)
print("=== Random Forest ===")
print(report_rf)

=== Random Forest ===
              precision    recall  f1-score   support

         afd       0.72      0.35      0.47       411
         cdu       0.48      0.62      0.54      1138
         csu       0.48      0.70      0.57      1355
         fdp       0.84      0.76      0.80       841
     gruenen       0.84      0.48      0.61       532
       linke       0.54      0.43      0.48       845
         spd       0.64      0.36      0.46       563

    accuracy                           0.57      5685
   macro avg       0.65      0.53      0.56      5685
weighted avg       0.61      0.57      0.57      5685



In [None]:
# 2. Support Vector Machine (SVM) 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm = SVC(kernel='rbf', class_weight='balanced', random_state=42)
svm.fit(X_train_scaled, y_train)
y_pred_svm = svm.predict(X_test_scaled)
report_svm = classification_report(y_test, y_pred_svm)
metrics_svm = extract_metrics(report_svm)
print("=== Support Vector Machine (SVM) ===")
print(report_svm)

=== Support Vector Machine (SVM) ===
              precision    recall  f1-score   support

         afd       0.37      0.54      0.44       411
         cdu       0.43      0.51      0.47      1138
         csu       0.52      0.44      0.48      1355
         fdp       0.72      0.71      0.71       841
     gruenen       0.48      0.52      0.50       532
       linke       0.46      0.31      0.37       845
         spd       0.41      0.47      0.44       563

    accuracy                           0.49      5685
   macro avg       0.48      0.50      0.49      5685
weighted avg       0.50      0.49      0.49      5685



In [None]:
# 3. MLP Classifier 
mlp = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    activation='relu',
    learning_rate_init=0.001,
    max_iter=1000,
    alpha=0.0001,
    random_state=42
)

mlp.fit(X_train_scaled, y_train)
y_pred_mlp = mlp.predict(X_test_scaled)
report_mlp = classification_report(y_test, y_pred_mlp)
metrics_mlp = extract_metrics(report_mlp)
print("=== MLP Classifier ===")
print(report_mlp)

=== MLP Classifier ===
              precision    recall  f1-score   support

         afd       0.49      0.37      0.42       411
         cdu       0.47      0.46      0.47      1138
         csu       0.49      0.52      0.51      1355
         fdp       0.72      0.70      0.71       841
     gruenen       0.46      0.53      0.49       532
       linke       0.39      0.35      0.37       845
         spd       0.41      0.44      0.43       563

    accuracy                           0.49      5685
   macro avg       0.49      0.48      0.48      5685
weighted avg       0.49      0.49      0.49      5685



In [None]:
metrics_all = {}

for label in metrics_rf:
    metrics_all[label] = {
        'Random Forest': metrics_rf[label]['F1-Score'],
        'SVM': metrics_svm[label]['F1-Score'],
        'MLP': metrics_mlp[label]['F1-Score']
    }

metrics_df = pd.DataFrame(metrics_all).T

metrics_df["Average"] = metrics_df.mean(axis=1)

average_row = metrics_df.mean(numeric_only=True)
average_row.name = "Average"
metrics_df = pd.concat([metrics_df, average_row.to_frame().T])

#Output
print("=== Modellvergleich ===")
print(metrics_df.round(2))

=== Modellvergleich ===
         Random Forest   SVM   MLP  Average
afd               0.47  0.44  0.42     0.44
cdu               0.54  0.47  0.47     0.49
csu               0.57  0.48  0.51     0.52
fdp               0.80  0.71  0.71     0.74
gruenen           0.61  0.50  0.49     0.53
linke             0.48  0.37  0.37     0.41
spd               0.46  0.44  0.43     0.44
Average           0.56  0.49  0.49     0.51
