# Plots

In [None]:
# dummy 
df , y, x, x_sm = None, None, None, None 
x_train, x_test = None, None
y_train, y_test = None, None
model = None

#### Boxplot mit Auswahl von Predictors

In [None]:
# Boxplot
import matplotlib.pyplot as plt

df_no = df.loc[df['student'] == 0, :]
df_yes = df.loc[df['student'] == 1, :]

# Create Figure and subplots
fig = plt.figure(figsize=(6, 5))
ax1 = fig.add_subplot(1, 1, 1)
ax1.boxplot([df_no['balance'], df_yes['balance']])
ax1.set_xlabel('Default')
ax1.set_ylabel('Balance')
ax1.set_xticklabels(['No','Yes'])

# plt.tight_layout()
plt.show()

#### Heatmap for correlation

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

corr = df.drop(['origin', 'mpg01'], axis=1).corr()

fig = plt.figure(figsize = (10,8))
ax1 = fig.add_subplot(1, 1, 1)

sns.heatmap(corr)
plt.show()

#### Pairplot

In [None]:
import seaborn as sns

fig = sns.pairplot(df.drop(['origin', 'mpg01'], axis=1))
plt.show()

#### Parallel Coordinates Plot

In [None]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import matplotlib.pyplot as plt

scaler = MinMaxScaler() # scale to [0, 1]

df[df.columns] = scaler.fit_transform(df[df.columns] )

# Plot parallel coordinates:
fig = plt.figure(figsize = (14,6))
ax = fig.add_subplot(1, 1, 1)
pd.plotting.parallel_coordinates(df, 'mpg01',ax=ax, color=('k', 'r'))

plt.show()

#### ROC Curve

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

x_train_sm = sm.add_constant(x_train)
x_test_sm = sm.add_constant(x_test)

n = 100

alpha = np.linspace(0, 1, n)

def class_a(alpha, probability):
    classification = np.zeros(len(probability), dtype=int)
    for i in range(len(probability)):
        if probability.iloc[i] > alpha:
            classification[i] = 1

    return classification

# Create defintion returning both recall and fpr:
def ROC_data(x, y, model, alpha):
    """ Return Recall and False Posite Rate
    for a given x, y, model and threshold alpha """
    y_pred = class_a(alpha, model.predict(x))

    tp = (y_pred[y_pred == y] == 1).sum()
    tn = (y_pred[y_pred == y] == 0).sum()
    fp = (y_pred[y_pred != y] == 1).sum()
    fn = (y_pred[y_pred != y] == 0).sum()
    # Recall: tp / (tp + fn)
    Recall = tp / (tp + fn)
    fpr = fp / (fp + tn)

    return fpr, Recall

fpr_train, Recall_train = np.zeros(n), np.zeros(n)
fpr_test, Recall_test = np.zeros(n), np.zeros(n)

for i in range(n):
    fpr_train[i], Recall_train[i] = (ROC_data(
        x_train_sm, y_train, model, alpha[i]))
    fpr_test[i], Recall_test[i] = (ROC_data(
        x_test_sm, y_test, model, alpha[i]))

""" Plot ROC curve """
fig = plt.figure(figsize = (7,6))
ax = fig.add_subplot(1, 1, 1)

plt.plot(fpr_train, Recall_train, label='train')
plt.plot(fpr_test, Recall_test, label='test')
plt.plot([0, 1], [0, 1], ':', label='random gues')
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
plt.legend()
plt.show()

AUC_train, AUC_test = 0, 0
for i in range(n-1):
    AUC_train += Recall_train[i] * (fpr_train[i] - fpr_train[i + 1])
    AUC_test += Recall_test[i] * (fpr_test[i] - fpr_test[i + 1])

print("AUC train:\n", np.round(AUC_train, 4),
"\nAUC test:\n", np.round(AUC_test, 4))

#### Plot Hyperplane for Support Vector Classifier

In [None]:
# plot the hyperplane
beta1, beta2 = clf.coef_[0][0], clf.coef_[0][1]
beta0 = clf.intercept_[0]

x1_hyperplane = np.linspace(1, 4, 2)
x2_hyperplane = - beta1 / beta2 * x1_hyperplane - beta0 / beta2

fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(1, 1, 1)

ax.plot(x1_hyperplane, x2_hyperplane, '-k')

ax.scatter(x1, x2, c=y)
ax.set_xlabel('x1')
ax.set_ylabel('x2')

plt.title("Maximal margin Hyperplane")
plt.show()