<a href="https://colab.research.google.com/github/sebastianiu/Prognosemodell_Online_Kreditzahlungsverkehr/blob/main/models/test/model_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Daten Laden

In [1]:
# Bibliotheken laden
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

url = "https://github.com/sebastianiu/Prognosemodell_Online_Kreditzahlungsverkehr/raw/main/data/raw/PSP_Jan_Feb_2019.xlsx"
Datensatz = pd.read_excel(url)
Datensatz = Datensatz.rename(columns = {"Unnamed: 0":"id"})

# 2. Datenaufbereitung

In [2]:
# Bilbiotheken laden
from sklearn.preprocessing import LabelEncoder
from sklearn import datasets
from sklearn.datasets import make_classification
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn.model_selection import cross_val_score

# Datenaufbereitung
label_encoder_PSP = LabelEncoder()
label_encoder_country = LabelEncoder()
label_encoder_card = LabelEncoder()
label_encoder_weekday = LabelEncoder()

ML_Daten = Datensatz.filter(['amount','success','PSP','country','card','tmsp','3D_secured'], axis=1)

ML_Daten['country'] = label_encoder_country.fit_transform(ML_Daten['country'])
ML_Daten['PSP'] = label_encoder_PSP.fit_transform(ML_Daten['PSP'])
ML_Daten['card'] = label_encoder_card.fit_transform(ML_Daten['card'])

# Datumswerte in Tag/ Wochentag/ Stunde aufteilen
ML_Daten['weekday'] = ML_Daten['tmsp'].dt.day_name()
ML_Daten['weekday'] = label_encoder_weekday.fit_transform(ML_Daten['weekday'])
ML_Daten['day'] = ML_Daten['tmsp'].dt.strftime('%d').astype(int)
ML_Daten['hour'] = ML_Daten['tmsp'].dt.strftime('%H').astype(int)

# Separation in X Merkmale and Zielvariable Y
Y = ML_Daten['success']
X = ML_Daten.filter(['amount','PSP','3D_secured','card','country','weekday','day','hour'], axis=1)

# Aufteilung in Trainings- und Validierungsdatensatz
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

# 3. Bewertungsfunktionen

In [3]:
from sklearn.metrics import  roc_curve,auc,accuracy_score, f1_score

def Visualisierung_AUC(model,x_test,y_test):
  # Berechnung der Falsch-Positiv-Rate und der Wahr-Positiv-Rate für alle Schwellenwerte der Klassifizierung
  probs = model.predict_proba(x_test)
  preds = probs[:,1]
  fpr, tpr, threshold = roc_curve(y_test, preds)
  auc_value = round(auc(fpr,tpr),2)
  df = pd.DataFrame(np.column_stack([fpr, tpr, threshold]), columns=['FPR', 'TPR', 'Threshold'])
  # Visualisierung
  fig = px.line(df, x='FPR', y="TPR",title = f'AUC-Wert = {auc_value}')
  fig.update_layout(shapes = [{'type': 'line', 'yref': 'paper', 'xref': 'paper', 'y0': 0, 'y1': 1, 'x0': 0, 'x1': 1,'line_color':'red','line_dash':'dot'}])
  fig.show()

def Visualisierung_Class_Errors(model,x_test,y_test):
  #Vorhersage
  y_pred = model.predict(x_test)
  #Zippe Daten in Liste
  list(zip(y_pred,y_test))
  #Entzippe Liste
  unzip_file = [{'y_pred':y_pred,'y_test':y_test}for y_pred,y_test in zip(y_pred,y_test)]
  #Estelle DataFrame
  data = pd.DataFrame(unzip_file)
  #Ermittel Classification Error
  data['class_error'] = abs(data.y_pred-data.y_test)
  data = pd.concat([data,x_test], axis=1, join="inner")
  data['zaehler'] = 1

  # Decode Features
  data['PSP'] = label_encoder_PSP.inverse_transform(data['PSP'])
  data['country'] = label_encoder_country.inverse_transform(data['country'])
  data['card'] = label_encoder_card.inverse_transform(data['card'])
  data['weekday'] = label_encoder_weekday.inverse_transform(data['weekday'])

  max = data.amount.max()

  def create_amount_quantiles(row):
    if row['amount'] > 0 and row['amount'] <= max/4:
      result = '0 - '+str(max/4)
    else:
      if row['amount'] > max/4 and row['amount'] <= max/4*2:
        result = str(max/4+1)+' - '+str(max/4*2)
      else:
        if row['amount'] > max/4*2 and row['amount'] <= max/4*3:
          result = str(max/4*2+1)+' - '+str(max/4*3)
        else:
          result = str(max/4*3+1)+' - '+str(max)
    return result

  data['amount_quantiles'] = data.apply(create_amount_quantiles, axis=1)


  field_list = ['PSP','card','country','weekday','3D_secured','amount_quantiles']
  error_rates = pd.DataFrame(columns=['Merkmal','Merkmalswert','class_error','zaehler'])

  for field in field_list:
    errors = data.groupby(data[field])['class_error'].sum()
    total = data.groupby(data[field])['zaehler'].sum()
    error_rates_tmp = pd.concat([errors, total], axis=1)
    error_rates_tmp['Merkmalswert'] = error_rates_tmp.index.values
    error_rates_tmp['Merkmal']=field
    error_rates_tmp.reset_index()
    error_rates = pd.concat([error_rates,error_rates_tmp])

  error_rates['class_error_rate'] = error_rates.zaehler/error_rates.class_error
  max_error_rate = error_rates['class_error_rate'].max()

  # Visualisiere Verteilungen
  fig = px.bar(error_rates, x='Merkmalswert', y='class_error_rate', color= 'Merkmal',labels={'class_error_rate':'Fehlerrate in %'},title="Verteilung der Klassifizierungsfehler")
  fig.add_hline(y=max_error_rate,line_dash="dot",annotation_text=str(round(max_error_rate,2))+' %',annotation_position="top left")
  fig.show()

#Funktionen zur Modellbewertung
def Model_Bewertung(model,x_train, y_train,x_test, y_test,X,Y):
  y_pred_proba = model.predict_proba(x_test)
  cross_validation_tmp = cross_val_score(model, X, Y, cv=6)
  cross_validation=[]
  for value in cross_validation_tmp:
    new = round(value,3)
    cross_validation.append(new)

  #Vorhersagen für Bewertung erzeugen
  y_train_pred = model.predict(x_train)
  y_test_pred = model.predict(x_test)

  print('Bewertungsmetriken')
  print('#'*20)
  print(f"Vorhersage-Genauigkeit auf Basis der Trainingsdaten: {round(accuracy_score(y_train, y_train_pred),2)}")
  print(f"Vorhersage-Genauigkeit auf Basis der Testdaten: {round(accuracy_score(y_test, y_test_pred),2)}")
  print(f"Vorhersage-Genauigkeit nach Kreuz-Validierung: {round(sum(cross_validation)/len(cross_validation),2)}")
  print('*'*15)
  f1_score_train = round(f1_score(y_train_pred,y_train,zero_division=1.0,average='weighted'),2)
  f1_score_test = round(f1_score(y_test_pred,y_test,zero_division=1.0,average='weighted'),2)
  print(f"F1-Score auf Basis der Trainingsdaten: {f1_score_train}")
  print(f"F1-Score auf Basis der Testdaten: {f1_score_test}")
  print('*'*15)

# 3. Bestes Modell feintunen

In [5]:
# import machine learning libraries
import xgboost as xgb
from sklearn.metrics import accuracy_score


# import packages for hyperparameters tuning
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe


### 3.1 Merkmale reduzieren anhand Erkenntissen aus EDA --> nicht möglich, da Merkmalsgewichtung nicht einschätzbar war

## 3.2 Hyperparameter-Tuning mittels Bayesian Optimization with HYPEROPT für XGBoost Classifier

Bei der Optimierung geht es darum, eine Funktion mit minimalen Kosten zu finden, die eine insgesamt bessere Leistung eines Modells sowohl im Zugsatz als auch im Testsatz bestimmt.
In diesem Prozess trainieren wir das Modell mit verschiedenen möglichen Parameterbereichen, bis wir das am besten geeignete Modell erhalten.
Die Optimierung von Hyperparametern hilft bei der Bestimmung der optimal abgestimmten Parameter und der Rückgabe des am besten geeigneten Modells. Dies ist die beste Vorgehensweise beim Erstellen eines ML- oder DL-Modells.
In diesem Abschnitt besprechen wir eine der genauesten und erfolgreichsten Methoden zur Optimierung von Hyperparametern, nämlich die Bayes'sche Optimierung mit HYPEROPT.

HYPEROPT ist eine leistungsstarke Python-Bibliothek, die einen Hyperparameterraum von Werten durchsucht und die bestmöglichen Werte findet, die das Minimum der Verlustfunktion ergeben. Die Bayes'sche Optimierungstechnik verwendet Hyperopt, um die Hyperparameter des Modells abzustimmen. Hyperopt ist eine Python-Bibliothek, die zur Optimierung von Modellhyperparametern verwendet wird.

## Phasen
1. Domänenraum initialisieren --> Range an Eingabewerten, die getestet werden


In [6]:
space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': 180,
        'seed': 0
    }


In [23]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold

XGBC = xgb.XGBClassifier()

# Hyper-Parameter für Raster
max_depth= [3, 18, 1]
gamma=  [1,9],
reg_alpha = [40,180,1],
reg_lambda = [0,1],
colsample_bytree = [0,1],
min_child_weight = [0, 10, 1],
n_estimators= [180]
seed= [0]

# define grid search
grid = dict(
    max_depth =	max_depth,
  gamma = gamma,
  reg_alpha = reg_alpha,
  reg_lambda = reg_lambda,
  colsample_bytree = colsample_bytree,
  min_child_weight = min_child_weight,
  n_estimators = n_estimators,
  seed = seed)

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

#Rastersuche
grid_search = GridSearchCV(estimator=XGBC, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(x_train,y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

#Bewertung der ermittelten optimalen HP
print('Model-Bewertung mit Hyper-Parametern ermittelt mit GridSearchCV')
print(f'Vorhersage-Genauigkeit auf Basis der Trainingsdaten: {grid_result.score(x_train2,y_train2):.3f}')
print(f'Vorhersage-Genauigkeit auf Basis der Testdaten: {grid_result.score(x_test2,y_test2):.3f}')

ValueError: ignored

Die verfügbaren Hyperopt-Optimierungsalgorithmen sind:

hp.choice(label, options) – Gibt eine der Optionen zurück, die eine Liste oder ein Tupel sein sollte.

hp.randint(label, Upper) – Gibt eine zufällige Ganzzahl im Bereich [0, Upper) zurück.

hp.uniform(label, low, high) – Gibt einen Wert zurück, der gleichmäßig zwischen niedrig und hoch liegt.

hp.quniform(label, low, high, q) – Gibt einen Wert Round(uniform(low, high) / q) * q zurück, d. h. es rundet die Dezimalwerte und gibt eine ganze Zahl zurück.

hp.normal(label, mean, std) – Gibt einen realen Wert zurück, der normalverteilt mit Mittelwert und Standardabweichung Sigma ist.


2. Zielfunktion definieren: Funktion, die  realen Wert zurückgibt, den es zu minimieren gilt --> Validierungsfehler in Bezug auf die Hyperparameter minimieren. Wenn der wahre Wert die Genauigkeit ist, dann wollen wir ihn maximieren. Dann sollte die Funktion das Negativ dieser Metrik zurückgeben.

In [14]:
def objective(space):
    clf= xgb.XGBClassifier(
                    n_estimators =space['n_estimators'], max_depth = int(space['max_depth']), gamma = space['gamma'],
                    reg_alpha = int(space['reg_alpha']),min_child_weight=int(space['min_child_weight']),
                    colsample_bytree=int(space['colsample_bytree']))

    evaluation = [( x_train, y_train), ( x_test, y_test)]

    clf.fit(x_train, y_train,
            eval_set=evaluation, eval_metric="auc",
            early_stopping_rounds=10,verbose=False)

    pred = clf.predict(x_test)
    accuracy = accuracy_score(y_test, pred>0.5)
    print ("SCORE:", accuracy)
    return {'loss': -accuracy, 'status': STATUS_OK }

3. Optimierungsalgorithmus
Dies ist die Methode, mit der die Ersatzzielfunktion erstellt und die nächsten auszuwertenden Werte ausgewählt werden.

In [15]:
trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 100,
                        trials = trials)

  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]





SCORE:
0.7939892878397143
  1%|          | 1/100 [00:01<01:58,  1.20s/trial, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
  2%|▏         | 2/100 [00:02<02:00,  1.23s/trial, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
  3%|▎         | 3/100 [00:03<01:50,  1.14s/trial, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
  4%|▍         | 4/100 [00:03<01:22,  1.16trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
  5%|▌         | 5/100 [00:04<01:06,  1.44trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
  6%|▌         | 6/100 [00:04<00:50,  1.84trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
  7%|▋         | 7/100 [00:05<00:47,  1.97trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
  8%|▊         | 8/100 [00:05<00:42,  2.17trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
  9%|▉         | 9/100 [00:05<00:36,  2.46trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 10%|█         | 10/100 [00:05<00:34,  2.59trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 11%|█         | 11/100 [00:07<00:52,  1.70trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 12%|█▏        | 12/100 [00:08<01:16,  1.15trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 13%|█▎        | 13/100 [00:08<01:01,  1.41trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 14%|█▍        | 14/100 [00:09<00:53,  1.61trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 15%|█▌        | 15/100 [00:09<00:45,  1.88trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 16%|█▌        | 16/100 [00:09<00:40,  2.08trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 17%|█▋        | 17/100 [00:10<00:34,  2.43trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 18%|█▊        | 18/100 [00:10<00:31,  2.60trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 19%|█▉        | 19/100 [00:11<00:32,  2.47trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 20%|██        | 20/100 [00:11<00:32,  2.48trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 21%|██        | 21/100 [00:11<00:31,  2.50trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 22%|██▏       | 22/100 [00:12<00:30,  2.57trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 23%|██▎       | 23/100 [00:12<00:30,  2.56trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 24%|██▍       | 24/100 [00:12<00:29,  2.53trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 25%|██▌       | 25/100 [00:13<00:28,  2.61trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 26%|██▌       | 26/100 [00:13<00:28,  2.56trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 27%|██▋       | 27/100 [00:14<00:27,  2.66trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 28%|██▊       | 28/100 [00:14<00:26,  2.71trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 29%|██▉       | 29/100 [00:14<00:27,  2.62trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 30%|███       | 30/100 [00:15<00:25,  2.70trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 31%|███       | 31/100 [00:15<00:25,  2.75trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 32%|███▏      | 32/100 [00:15<00:25,  2.69trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 33%|███▎      | 33/100 [00:16<00:24,  2.71trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 34%|███▍      | 34/100 [00:16<00:22,  2.93trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 35%|███▌      | 35/100 [00:16<00:23,  2.78trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 36%|███▌      | 36/100 [00:17<00:23,  2.72trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 37%|███▋      | 37/100 [00:17<00:23,  2.63trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 38%|███▊      | 38/100 [00:18<00:24,  2.58trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 39%|███▉      | 39/100 [00:18<00:24,  2.46trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 40%|████      | 40/100 [00:20<00:54,  1.09trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 41%|████      | 41/100 [00:21<00:44,  1.32trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 42%|████▏     | 42/100 [00:21<00:37,  1.54trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 43%|████▎     | 43/100 [00:21<00:31,  1.84trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 44%|████▍     | 44/100 [00:22<00:29,  1.87trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 45%|████▌     | 45/100 [00:22<00:26,  2.05trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
SCORE:
0.7939892878397143
 47%|████▋     | 47/100 [00:23<00:19,  2.65trial/s, best loss: -0.7939892878397143]







SCORE:
0.7939892878397143
 48%|████▊     | 48/100 [00:23<00:18,  2.80trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 49%|████▉     | 49/100 [00:23<00:18,  2.72trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 50%|█████     | 50/100 [00:24<00:18,  2.63trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 51%|█████     | 51/100 [00:24<00:19,  2.54trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 52%|█████▏    | 52/100 [00:25<00:18,  2.57trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 53%|█████▎    | 53/100 [00:25<00:18,  2.55trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 54%|█████▍    | 54/100 [00:26<00:18,  2.45trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 55%|█████▌    | 55/100 [00:26<00:17,  2.54trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 56%|█████▌    | 56/100 [00:26<00:16,  2.60trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 57%|█████▋    | 57/100 [00:27<00:17,  2.47trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 58%|█████▊    | 58/100 [00:27<00:21,  1.93trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 59%|█████▉    | 59/100 [00:28<00:21,  1.95trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 60%|██████    | 60/100 [00:28<00:18,  2.20trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 61%|██████    | 61/100 [00:29<00:16,  2.31trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
SCORE:
0.7939892878397143
 63%|██████▎   | 63/100 [00:29<00:13,  2.72trial/s, best loss: -0.7939892878397143]







SCORE:
0.7939892878397143
 64%|██████▍   | 64/100 [00:30<00:12,  2.85trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 65%|██████▌   | 65/100 [00:30<00:13,  2.63trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 66%|██████▌   | 66/100 [00:32<00:30,  1.13trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 67%|██████▋   | 67/100 [00:33<00:24,  1.36trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 68%|██████▊   | 68/100 [00:33<00:20,  1.58trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 69%|██████▉   | 69/100 [00:33<00:17,  1.77trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 70%|███████   | 70/100 [00:34<00:15,  1.91trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 71%|███████   | 71/100 [00:34<00:14,  2.05trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 72%|███████▏  | 72/100 [00:34<00:11,  2.34trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 73%|███████▎  | 73/100 [00:35<00:10,  2.50trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 74%|███████▍  | 74/100 [00:35<00:10,  2.60trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 75%|███████▌  | 75/100 [00:36<00:09,  2.59trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 76%|███████▌  | 76/100 [00:36<00:09,  2.51trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 77%|███████▋  | 77/100 [00:36<00:09,  2.47trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 78%|███████▊  | 78/100 [00:37<00:08,  2.54trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 79%|███████▉  | 79/100 [00:37<00:08,  2.52trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 80%|████████  | 80/100 [00:38<00:08,  2.47trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 81%|████████  | 81/100 [00:38<00:08,  2.35trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 82%|████████▏ | 82/100 [00:38<00:06,  2.65trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 83%|████████▎ | 83/100 [00:39<00:06,  2.49trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 84%|████████▍ | 84/100 [00:39<00:06,  2.63trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 85%|████████▌ | 85/100 [00:39<00:05,  2.77trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 86%|████████▌ | 86/100 [00:40<00:04,  2.91trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 87%|████████▋ | 87/100 [00:40<00:04,  2.82trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
SCORE:
0.7939892878397143
 89%|████████▉ | 89/100 [00:41<00:03,  3.38trial/s, best loss: -0.7939892878397143]







SCORE:
0.7939892878397143
 90%|█████████ | 90/100 [00:41<00:03,  3.24trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 91%|█████████ | 91/100 [00:41<00:02,  3.28trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 92%|█████████▏| 92/100 [00:42<00:02,  2.86trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 93%|█████████▎| 93/100 [00:42<00:02,  2.72trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 94%|█████████▍| 94/100 [00:44<00:05,  1.13trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 95%|█████████▌| 95/100 [00:45<00:03,  1.36trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 96%|█████████▌| 96/100 [00:45<00:02,  1.58trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 97%|█████████▋| 97/100 [00:45<00:01,  1.75trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 98%|█████████▊| 98/100 [00:46<00:01,  1.91trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
 99%|█████████▉| 99/100 [00:46<00:00,  2.01trial/s, best loss: -0.7939892878397143]





SCORE:
0.7939892878397143
100%|██████████| 100/100 [00:47<00:00,  2.12trial/s, best loss: -0.7939892878397143]


4. Ergebnisse: Bewertungs- oder Wertepaare, die der Algorithmus zum Erstellen des Modells verwendet

In [13]:
print("The best hyperparameters are : ","\n")
print(best_hyperparams)



The best hyperparameters are :  

{'colsample_bytree': 0.7754253144572327, 'gamma': 2.3428307819009326, 'max_depth': 9.0, 'min_child_weight': 9.0, 'reg_alpha': 87.0, 'reg_lambda': 0.31344499685938565}


In [26]:
XGB_final = xgb.XGBClassifier(colsample_bytree=0.7754253144572327,gamma=2.3428307819009326,max_depth=9,min_child_weight=9.0,reg_alpha=87.0,reg_lambda=0.31344499685938565)
XGB_final.fit(X,Y)


Model_Bewertung(XGB_final,x_train, y_train,x_test, y_test,X,Y)
Visualisierung_AUC(XGB_final,x_test,y_test)

Bewertungsmetriken
####################
Vorhersage-Genauigkeit auf Basis der Trainingsdaten: 0.8
Vorhersage-Genauigkeit auf Basis der Testdaten: 0.79
Vorhersage-Genauigkeit nach Kreuz-Validierung: 0.8
***************
F1-Score auf Basis der Trainingsdaten: 0.89
F1-Score auf Basis der Testdaten: 0.88
***************
