In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.metrics import confusion_matrix, precision_recall_curve

In [2]:
def plot_distribution(feature): 
    plt.figure(dpi=100)
    sns.histplot(dataframe[feature], color='tab:green', bins=25)
    plt.title('Histogram of '+feature, fontsize = 13, fontweight='bold')
    plt.xlabel(feature, size=12, fontweight='bold')
    plt.ylabel('Density', size=12, fontweight='bold');

In [3]:
def replace_missing(df, columns, strategy):    
    imp = SimpleImputer(missing_values=np.nan, strategy=strategy)
    df_tmp  = pd.DataFrame(data=imp.fit_transform(df[columns]),
                       columns=df[columns].columns, index=df[columns].index)
    df.drop(columns=columns, inplace=True)
    df = pd.concat([df,df_tmp], axis=1)    
    return df

In [4]:
def Set_X_y(data, target):
    return data.drop(target, axis=1), data.loc[:,target]

In [5]:
def plot_heat_map(data):    
    corr = data.corr() 
    grid_kws = {"height_ratios": (0.95, .05), "hspace": .4}
    fig, (ax, cbar_ax) = plt.subplots(2, gridspec_kw=grid_kws, figsize=(20,20), dpi=150)
    ax = sns.heatmap(round(corr,2), ax=ax, cbar_ax=cbar_ax, cmap="viridis",cbar_kws={"orientation": "horizontal"},
                     linewidths=0.05, annot=True, annot_kws={"size":25})
    ax.set_xticklabels(ax.get_xticklabels(), rotation=50, fontweight='bold', fontsize=25) 
    ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontweight='bold', fontsize=25)
    cbar_ax.tick_params(labelsize=25)

In [6]:
def plot_confusion_matrix(actual, prediction):    
    cnf_matrix = confusion_matrix(actual, prediction)
    labels = [0,1]
    fig, ax = plt.subplots(figsize = (5,3), dpi=100)
    sns.heatmap(cnf_matrix, annot=True, xticklabels=labels, yticklabels=labels, cmap="viridis_r", annot_kws={"size":20}, fmt='d')
    ax.set_xlabel('Predicted', size=13, fontweight='bold')
    ax.set_ylabel('Actual', size=13, fontweight='bold')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=0, fontweight='bold', fontsize=13)
    ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontweight='bold', fontsize=13)
    plt.show()

In [7]:
def classification_metrics(actual, prediction):    
    accuracy = metrics.accuracy_score(actual, prediction)
    recall = metrics.recall_score(actual, prediction)
    precision = metrics.precision_score(actual, prediction)
    f1score = metrics.f1_score(actual, prediction)
    
    performance = pd.DataFrame({"Accuracy":round(accuracy,4), "Recall":round(recall,4),"Precision":round(precision,4),
                               "F-score":round(f1score,4)}, index=[0])
    return performance