In [6]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [7]:
url = 'https://raw.githubusercontent.com/IvaroEkel/Probabilistic-Machine-Learning_lecture-PROJECTS/refs/heads/main/projects/05-1PLXXXX_political_color_posts/data/final-features-hsv.csv'
df = pd.read_csv(url)
X = df[[f'feature_{i}' for i in range(13)]]
y = df['party']
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

In [8]:
# extract model metrics
def extract_metrics(report):
    metrics = {}
    lines = report.split("\n")
    for line in lines:
        if line.startswith(' ') and len(line.split()) > 1:
            parts = line.split()
            if len(parts) >= 5:
                label = parts[0]
                try:
                    precision = float(parts[1])
                    recall = float(parts[2])
                    f1_score = float(parts[3])
                    support = int(parts[4])
                    metrics[label] = {
                        'Precision': precision,
                        'Recall': recall,
                        'F1-Score': f1_score,
                        'Support': support
                    }
                except ValueError:
                    continue
    return metrics

In [9]:
# 1. Random Forest
rf = RandomForestClassifier(class_weight='balanced', random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
report_rf = classification_report(y_test, y_pred_rf)
metrics_rf = extract_metrics(report_rf)
print("=== Random Forest ===")
print(report_rf)

=== Random Forest ===
              precision    recall  f1-score   support

         afd       0.72      0.44      0.55       411
         cdu       0.49      0.64      0.56      1138
         csu       0.54      0.69      0.61      1355
         fdp       0.83      0.74      0.78       841
      gruene       0.90      0.54      0.67       532
       linke       0.56      0.49      0.52       845
         spd       0.61      0.45      0.52       563

    accuracy                           0.60      5685
   macro avg       0.66      0.57      0.60      5685
weighted avg       0.63      0.60      0.60      5685



In [10]:
# 2. XGBoost
# Encode the target variable
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb.fit(X_train, y_train_encoded)
y_pred_xgb_encoded = xgb.predict(X_test)

# Decode the predictions back to original labels for classification report
y_pred_xgb = le.inverse_transform(y_pred_xgb_encoded)

report_xgb = classification_report(y_test, y_pred_xgb)
metrics_xgb = extract_metrics(report_xgb)
print("=== XGBoost ===")
print(report_xgb)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


=== XGBoost ===
              precision    recall  f1-score   support

         afd       0.74      0.52      0.61       411
         cdu       0.55      0.65      0.59      1138
         csu       0.60      0.68      0.64      1355
         fdp       0.83      0.81      0.82       841
      gruene       0.82      0.61      0.70       532
       linke       0.60      0.55      0.57       845
         spd       0.56      0.52      0.54       563

    accuracy                           0.64      5685
   macro avg       0.67      0.62      0.64      5685
weighted avg       0.65      0.64      0.64      5685



In [11]:
# 3. Support Vector Machine (SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm = SVC(kernel='rbf', class_weight='balanced', random_state=42)
svm.fit(X_train_scaled, y_train)
y_pred_svm = svm.predict(X_test_scaled)
report_svm = classification_report(y_test, y_pred_svm)
metrics_svm = extract_metrics(report_svm)
print("=== Support Vector Machine (SVM) ===")
print(report_svm)

=== Support Vector Machine (SVM) ===
              precision    recall  f1-score   support

         afd       0.42      0.61      0.50       411
         cdu       0.45      0.61      0.52      1138
         csu       0.69      0.46      0.55      1355
         fdp       0.80      0.70      0.75       841
      gruene       0.68      0.60      0.64       532
       linke       0.49      0.40      0.44       845
         spd       0.42      0.58      0.49       563

    accuracy                           0.55      5685
   macro avg       0.56      0.56      0.55      5685
weighted avg       0.58      0.55      0.55      5685



In [12]:
# 4. MLP Classifier
mlp = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    activation='relu',
    learning_rate_init=0.001,
    max_iter=1000,
    alpha=0.0001,
    random_state=42
)

mlp.fit(X_train_scaled, y_train)
y_pred_mlp = mlp.predict(X_test_scaled)
report_mlp = classification_report(y_test, y_pred_mlp)
metrics_mlp = extract_metrics(report_mlp)
print("=== MLP Classifier ===")
print(report_mlp)

=== MLP Classifier ===
              precision    recall  f1-score   support

         afd       0.58      0.49      0.53       411
         cdu       0.49      0.58      0.53      1138
         csu       0.58      0.57      0.58      1355
         fdp       0.74      0.75      0.74       841
      gruene       0.60      0.61      0.61       532
       linke       0.50      0.44      0.47       845
         spd       0.50      0.46      0.48       563

    accuracy                           0.57      5685
   macro avg       0.57      0.56      0.56      5685
weighted avg       0.57      0.57      0.57      5685



In [13]:
# 5. Linear Discriminant Analysis (LDA)
lda = LinearDiscriminantAnalysis()
lda.fit(X_train_scaled, y_train)
y_pred_lda = lda.predict(X_test_scaled)
report_lda = classification_report(y_test, y_pred_lda)
metrics_lda = extract_metrics(report_lda)
print("=== Linear Discriminant Analysis (LDA) ===")
print(report_lda)

=== Linear Discriminant Analysis (LDA) ===
              precision    recall  f1-score   support

         afd       0.44      0.05      0.09       411
         cdu       0.33      0.33      0.33      1138
         csu       0.39      0.64      0.48      1355
         fdp       0.58      0.50      0.54       841
      gruene       0.60      0.38      0.46       532
       linke       0.38      0.33      0.35       845
         spd       0.48      0.39      0.43       563

    accuracy                           0.42      5685
   macro avg       0.46      0.37      0.38      5685
weighted avg       0.44      0.42      0.41      5685



In [14]:
metrics_all = {}

for label in metrics_rf:
    metrics_all[label] = {
        'Random Forest': metrics_rf[label]['F1-Score'],
        'SVM': metrics_svm[label]['F1-Score'],
        'MLP': metrics_mlp[label]['F1-Score'],
        'XGBoost': metrics_xgb[label]['F1-Score'],
        'LDA': metrics_lda[label]['F1-Score']
    }

metrics_df = pd.DataFrame(metrics_all).T

metrics_df["Average"] = metrics_df.mean(axis=1)

average_row = metrics_df.mean(numeric_only=True)
average_row.name = "Average"
metrics_df = pd.concat([metrics_df, average_row.to_frame().T])

#Output
print("=== Modellvergleich ===")
print(metrics_df.round(2))

=== Modellvergleich ===
         Random Forest   SVM   MLP  XGBoost   LDA  Average
afd               0.55  0.50  0.53     0.61  0.09     0.46
cdu               0.56  0.52  0.53     0.59  0.33     0.51
csu               0.61  0.55  0.58     0.64  0.48     0.57
fdp               0.78  0.75  0.74     0.82  0.54     0.73
gruene            0.67  0.64  0.61     0.70  0.46     0.62
linke             0.52  0.44  0.47     0.57  0.35     0.47
spd               0.52  0.49  0.48     0.54  0.43     0.49
Average           0.60  0.56  0.56     0.64  0.38     0.55
