#Scorecardpy  

Author: Schichen Xie

This package is python version of R package scorecard. Its goal is to make the development of traditional credit risk scorecard model easier and efficient by providing functions for some common tasks.

data partition (`split_df`)

variable selection (iv, `var_filter`)

weight of evidence (woe) binning (woebin, `woebin_plot`, `woebin_adj`, `woebin_ply`)

scorecard scaling (scorecard, `scorecard_ply`)

performance evaluation (`perf_eva`, `perf_psi`)

https://github.com/ShichenXie/scorecardpy/blob/master/scorecardpy/scorecard.py

https://pypi.org/project/scorecardpy/

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns 
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
nRowsRead = 1000 # specify 'None' if want to read whole file
dat = pd.read_csv('../input/cusersmarildownloadsgermancsv/german.csv', delimiter=';', encoding = "ISO-8859-2", nrows = nRowsRead)
dat.dataframeName = 'german.csv'
nRow, nCol = dat.shape
print(f'There are {nRow} rows and {nCol} columns')
dat.head()

In [None]:
!pip install scorecardpy

In [None]:
import scorecardpy as sc

In [None]:
dt_s = sc.var_filter(dat, y="Creditability")

In [None]:
dt_s.head()

In [None]:
X = dt_s.loc[:,dt_s.columns != 'Creditability']
y = dt_s.loc[:,dt_s.columns == 'Creditability']

In [None]:
train, test = sc.split_df(dt_s, 'Creditability').values()

In [None]:
print(train.shape)
print(test.shape)

In [None]:
bins = sc.woebin(dt_s, y="Creditability")

In [None]:
sc.woebin_plot(bins)

In [None]:
train_woe = sc.woebin_ply(train, bins)
test_woe = sc.woebin_ply(test, bins)

In [None]:
y_train = train_woe.loc[:,'Creditability']
X_train = train_woe.loc[:,train_woe.columns != 'Creditability']
y_test = test_woe.loc[:,'Creditability']
X_test = test_woe.loc[:,train_woe.columns != 'Creditability']

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(penalty='l1', C=0.9, solver='saga', n_jobs=-1)
lr.fit(X_train, y_train)
print(lr.coef_)
print(lr.intercept_)

In [None]:
train_pred = lr.predict_proba(X_train)[:,1]
test_pred = lr.predict_proba(X_test)[:,1]

In [None]:
train_perf = sc.perf_eva(y_train, train_pred, title = "train")
test_perf = sc.perf_eva(y_test, test_pred, title = "test")

In [None]:
card = sc.scorecard(bins, lr, X_train.columns)
# 评分
train_score = sc.scorecard_ply(train, card, only_total_score=False, print_step=0, replace_blank_na=True, var_kp = None)
test_score = sc.scorecard_ply(test, card, only_total_score=False, print_step=0, replace_blank_na=True, var_kp = None)
score = sc.scorecard_ply(dat, card, only_total_score=False, print_step=0, replace_blank_na=True, var_kp = None)

In [None]:
print('评分卡:',end='\n')
card

In [None]:
# 展示总体数据集评分
score

In [None]:
sc.perf_psi(
  score = {'train':train_score, 'test':test_score},
  label = {'train':y_train, 'test':y_test}
)

In [None]:
#Code by Olga Belitskaya https://www.kaggle.com/olgabelitskaya/sequential-data/comments
from IPython.display import display,HTML
c1,c2,f1,f2,fs1,fs2=\
'#eb3434','#eb3446','Akronim','Smokum',30,15
def dhtml(string,fontcolor=c1,font=f1,fontsize=fs1):
    display(HTML("""<style>
    @import 'https://fonts.googleapis.com/css?family="""\
    +font+"""&effect=3d-float';</style>
    <h1 class='font-effect-3d-float' style='font-family:"""+\
    font+"""; color:"""+fontcolor+"""; font-size:"""+\
    str(fontsize)+"""px;'>%s</h1>"""%string))
    
    
dhtml('Be patient. Marília Prata, @mpwolke was Here' )