<a href="https://colab.research.google.com/github/sebastianiu/Prognosemodell_Online_Kreditzahlungsverkehr/blob/main/models/prod/prognose_model_prod.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Bilbiotheken und optmimierte Hyperparametereinstellungen laden

In [149]:
# Bibliotheken laden
import xgboost as xgb
from sklearn.metrics import accuracy_score,auc,roc_curve
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

best_hyperparams = {'learning_rate': 0.03486207277919493, 'max_depth': 5, 'n_estimators': 120, 'subsample': 0.9998090410526672}

# 2. Daten Laden

In [150]:
url = "https://github.com/sebastianiu/Prognosemodell_Online_Kreditzahlungsverkehr/raw/main/data/raw/PSP_Jan_Feb_2019.xlsx"
Datensatz = pd.read_excel(url)
Datensatz = Datensatz.rename(columns = {"Unnamed: 0":"id"})

# 3. Datenaufbereitung

In [179]:
# Bilbiotheken laden
from sklearn.preprocessing import LabelEncoder
from sklearn import datasets
from sklearn.datasets import make_classification
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn.model_selection import cross_val_score

# Datenaufbereitung
label_encoder_PSP = LabelEncoder()
label_encoder_country = LabelEncoder()
label_encoder_card = LabelEncoder()
label_encoder_weekday = LabelEncoder()

ML_Daten = Datensatz.filter(['amount','success','PSP','country','card','tmsp','3D_secured'], axis=1)

ML_Daten['country'] = label_encoder_country.fit_transform(ML_Daten['country'])
ML_Daten['PSP'] = label_encoder_PSP.fit_transform(ML_Daten['PSP'])
ML_Daten['card'] = label_encoder_card.fit_transform(ML_Daten['card'])

# Datumswerte in Tag/ Wochentag/ Stunde aufteilen
ML_Daten['weekday'] = ML_Daten['tmsp'].dt.day_name()
ML_Daten['weekday'] = label_encoder_weekday.fit_transform(ML_Daten['weekday'])
ML_Daten['day'] = ML_Daten['tmsp'].dt.strftime('%d').astype(int)
ML_Daten['hour'] = ML_Daten['tmsp'].dt.strftime('%H').astype(int)

# Separation in X Merkmale and Zielvariable Y
Y = ML_Daten['success']
X = ML_Daten.filter(['amount','PSP','3D_secured','card','country','weekday','day','hour'], axis=1)

# Aufteilung in Trainings- und Validierungsdatensatz
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

# 4. Erstelle und trainiere Modell mit optimierten Hyperparametern

In [302]:
XGB_final = xgb.XGBClassifier(colsample_bytree = best_hyperparams.get('colsample_bytree') ,gamma = best_hyperparams.get('gamma'), max_depth = round(best_hyperparams.get('max_depth')),
                              min_child_weight = best_hyperparams.get('min_child_weight'),reg_alpha = best_hyperparams.get('reg_alpha'), reg_lambda = best_hyperparams.get('reg_lambda'))
XGB_final.fit(X,Y)

y_test_pred = XGB_final.predict(x_test)
y_train_pred = XGB_final.predict(x_train)
probs = XGB_final.predict_proba(x_test)
preds = probs[:,1]
fpr, tpr, threshold = roc_curve(y_test, preds)
auc_value = round(auc(fpr,tpr),2)

print('Bewertungsmetriken')
print('#'*20)
print(f"Vorhersage-Genauigkeit auf Basis der Trainingsdaten: {round(accuracy_score(y_train, y_train_pred),3)}")
print(f"Vorhersage-Genauigkeit auf Basis der Testdaten: {round(accuracy_score(y_test, y_test_pred),3)}")
print(f"AUC-Wert: {auc_value}")

Bewertungsmetriken
####################
Vorhersage-Genauigkeit auf Basis der Trainingsdaten: 0.809
Vorhersage-Genauigkeit auf Basis der Testdaten: 0.816
AUC-Wert: 0.77


In [305]:
Prognose_vergangen = pd.DataFrame({'y_true':y_test,'y_pred':y_test_pred})
Prognose_vergangen.to_csv('Prognose_vergangen.csv')

# 5. Testprognosedaten für Visualisierung in Dashboard erzeugen

In [246]:
# Neue Transaktionen per Zufall aus Bestand erzeugen
Transaktionen_sample = ML_Daten.sample(frac=0.01)

In [286]:
# Pro Datenzeile Duplikat für jedes PSP erzeugen, damit Prognosemodell Vorhersage für jeden PSP pro Transaktion erzeugt
daten1 =   Transaktionen_sample[['amount','PSP','3D_secured','card','country','weekday','day','hour']].copy(deep=True)
daten1.PSP = 'Moneycard'
daten2 =   Transaktionen_sample[['amount','PSP','3D_secured','card','country','weekday','day','hour']].copy(deep=True)
daten2.PSP = 'Goldcard'
daten3 =   Transaktionen_sample[['amount','PSP','3D_secured','card','country','weekday','day','hour']].copy(deep=True)
daten3.PSP = 'UK_Card'
daten4 =   Transaktionen_sample[['amount','PSP','3D_secured','card','country','weekday','day','hour']].copy(deep=True)
daten4.PSP = 'Simplecard'

# Alle Datenzeilen  konkantinieren = untereinander anfügen
Transaktionen_neu = pd.concat([daten1,daten2,daten3,daten4])

# PSP-Daten für Weitereverarbeitung wieder Enkodieren
Transaktionen_neu['PSP'] = label_encoder_PSP.fit_transform(Transaktionen_neu['PSP'])

Transaktionen_neu = Transaktionen_neu.reset_index()
del Transaktionen_neu['index']

In [297]:
print(Transaktionen_neu["country"].unique())

[0 1 2]


In [287]:
Transaktionen_neu.head()

Unnamed: 0,amount,PSP,3D_secured,card,country,weekday,day,hour
0,239,1,0,1,0,1,21,6
1,43,1,0,0,1,1,18,11
2,262,1,1,1,1,5,29,20
3,168,1,1,2,0,6,16,22
4,114,1,0,1,0,0,1,0


In [274]:
# für neue Transaktionen erzeugen
y_pred = XGB_final.predict(Transaktionen_neu)
y_pred_probs = XGB_final.predict_proba(Transaktionen_neu)
y_pred_probs = y_pred_probs[:,0]

Transaktionen_neu_pred = pd.DataFrame({'y_pred':y_pred,'y_pred_prob':y_pred_probs})

In [292]:
Transaktionen_neu = pd.concat([Transaktionen_neu,Transaktionen_neu_pred],axis=1)

In [298]:
Transaktionen_neu.head()

Unnamed: 0,amount,PSP,3D_secured,card,country,weekday,day,hour,y_pred,y_pred_prob
0,239,1,0,1,0,1,21,6,0,0.732072
1,43,1,0,0,1,1,18,11,0,0.62515
2,262,1,1,1,1,5,29,20,0,0.803845
3,168,1,1,2,0,6,16,22,0,0.68567
4,114,1,0,1,0,0,1,0,0,0.82595


In [299]:
Transaktionen_neu['country'] = label_encoder_country.inverse_transform(Transaktionen_neu['country'])
Transaktionen_neu['PSP'] = label_encoder_PSP.inverse_transform(Transaktionen_neu['PSP'])
Transaktionen_neu['card'] = label_encoder_card.inverse_transform(Transaktionen_neu['card'])
Transaktionen_neu['weekday'] = label_encoder_weekday.inverse_transform(Transaktionen_neu['weekday'])

In [300]:
Transaktionen_neu.head()

Unnamed: 0,amount,PSP,3D_secured,card,country,weekday,day,hour,y_pred,y_pred_prob
0,239,Moneycard,0,Master,Austria,Monday,21,6,0,0.732072
1,43,Moneycard,0,Diners,Germany,Monday,18,11,0,0.62515
2,262,Moneycard,1,Master,Germany,Tuesday,29,20,0,0.803845
3,168,Moneycard,1,Visa,Austria,Wednesday,16,22,0,0.68567
4,114,Moneycard,0,Master,Austria,Friday,1,0,0,0.82595


In [301]:
Transaktionen_neu.to_csv('Transaktionen_neu.csv')