#Introduction:
Credit risk is the probability of a financial loss resulting from a borrower's failure to repay a loan. Essentially, credit risk refers to the risk that a lender may not receive the owed principal and interest, which results in an interruption of cash flows and increased costs for collection.


---


```Bank Credit Scoring``` will be used to  develop a model to predict debtor default, where default is defined as an overdue period exceeding 90 days.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Neccessary imports

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant

import sklearn
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import KNNImputer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_curve, precision_recall_curve, roc_auc_score, auc

In [None]:
df=pd.read_csv("/content/drive/MyDrive/Projects/Bank credit risk assessment/bank_credit_scoring.csv")

df.shape

(18420, 18)

Translating russian data into English

In [None]:
df.columns

Index(['Задолженность', 'Просрочка, дни', 'Первоначльный лимит', 'BIRTHDATE',
       'SEX', 'EDU', 'INCOME', 'TERM', 'Рейтинг кредитной истории', 'LV_AREA',
       'LV_SETTLEMENTNAME', 'INDUSTRYNAME', 'PDN', 'CLIENTID', 'SCORINGMARK',
       'UNDERAGECHILDRENCOUNT', 'VELCOMSCORING', 'FAMILYSTATUS'],
      dtype='object')

In [None]:
df=df.rename(columns={'Задолженность': 'Debt',
 'Просрочка, дни': 'Delay, days',
 'Первоначльный лимит': 'Primary limit',
 'BIRTHDATE': 'BIRTHDATE',
 'SEX': 'SEX',
 'EDU': 'EDU',
 'INCOME': 'INCOME',
 'TERM': 'TERM',
 'Рейтинг кредитной истории': 'Credit history rating',
 'LV_AREA': 'LV_AREA',
 'LV_SETTLEMENTNAME': 'LV_SETTLEMENTNAME',
 'INDUSTRYNAME': 'INDUSTRYNAME',
 'PDN': 'PDN',
 'CLIENTID': 'CLIENTID',
 'SCORINGMARK': 'Scoring',
 'UNDERAGECHILDRENCOUNT': 'UNDERAGECHILDRENCOUNT',
 'VELCOMSCORING': 'VELCOMSCORING',
 'FAMILYSTATUS': 'FAMILYSTATUS'})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18420 entries, 0 to 18419
Data columns (total 18 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Debt                   18420 non-null  float64
 1   Delay, days            18420 non-null  int64  
 2   Primary limit          18420 non-null  float64
 3   BIRTHDATE              18420 non-null  object 
 4   SEX                    18420 non-null  object 
 5   EDU                    18420 non-null  object 
 6   INCOME                 18420 non-null  float64
 7   TERM                   18420 non-null  int64  
 8   Credit history rating  17488 non-null  object 
 9   LV_AREA                16297 non-null  object 
 10  LV_SETTLEMENTNAME      18374 non-null  object 
 11  INDUSTRYNAME           18420 non-null  object 
 12  PDN                    18420 non-null  float64
 13  CLIENTID               18420 non-null  int64  
 14  Scoring                16786 non-null  float64
 15  UN

In [None]:
df["EDU"].value_counts()

Unnamed: 0_level_0,count
EDU,Unnamed: 1_level_1
Среднее специальное,9196
Высшее,6140
Среднее,2519
Неоконченное высшее,555
**Послевузовское,10


In [None]:
df["EDU"]=df["EDU"].map({
    "Среднее специальное": "Vocational Secondary",
    "Высшее": "Higher Education",
    "Среднее": "Secondary Education",
    "Неоконченное высшее": "Incomplete Higher Education",
    "**Послевузовское": "Postgraduate"
})
df["EDU"].value_counts()

Unnamed: 0_level_0,count
EDU,Unnamed: 1_level_1
Vocational Secondary,9196
Higher Education,6140
Secondary Education,2519
Incomplete Higher Education,555
Postgraduate,10


In [None]:
df["SEX"].value_counts()

Unnamed: 0_level_0,count
SEX,Unnamed: 1_level_1
Мужской,11249
Женский,7171


In [None]:
df["SEX"]=df["SEX"].map({
    "Мужской": "Male",
    "Женский": "Female"
})
df["SEX"].value_counts()

Unnamed: 0_level_0,count
SEX,Unnamed: 1_level_1
Male,11249
Female,7171


In [None]:










df["LV_AREA"].value_counts()

Unnamed: 0_level_0,count
LV_AREA,Unnamed: 1_level_1
г. Минск,4290
Минская область,2408
Витебская область,1154
Гомельская область,1025
Брестская область,1021
Могилевская область,876
МИНСКАЯ,733
ВИТЕБСКАЯ,702
ГОМЕЛЬСКАЯ,680
Гродненская область,674


In [None]:
# Normalize LV_AREA values before mapping
df["LV_AREA"] = df["LV_AREA"].astype(str).str.strip().str.upper()

# Translation map in uppercase to match normalized values
lv_area_translation = {
    "Г. МИНСК": "Minsk City",
    "Г.МИНСК": "Minsk City",
    "МИНСК": "Minsk City",
    "МИНСКАЯ ОБЛАСТЬ": "Minsk Region",
    "МИНСКАЯ": "Minsk Region",
    "МИНСКАЯ ОБЛ": "Minsk Region",
    "МИНСКАЯ ": "Minsk Region",
    "МИНСКАЯОБЛАСТЬ": "Minsk Region",
    "МИНСКАЯ ОБЛАСТЬ": "Minsk Region",

    "ВИТЕБСКАЯ ОБЛАСТЬ": "Vitebsk Region",
    "ВИТЕБСКАЯ": "Vitebsk Region",
    "ВИТЕБСКАЯ ": "Vitebsk Region",

    "ГОМЕЛЬСКАЯ ОБЛАСТЬ": "Gomel Region",
    "ГОМЕЛЬСКАЯ ОБЛ": "Gomel Region",
    "ГОМЕЛЬСКАЯ": "Gomel Region",

    "БРЕСТСКАЯ ОБЛАСТЬ": "Brest Region",
    "БРЕСТСКАЯ": "Brest Region",

    "ГРОДНЕНСКАЯ ОБЛАСТЬ": "Grodno Region",
    "ГРОДНЕНСКАЯ": "Grodno Region",

    "МОГИЛЕВСКАЯ ОБЛАСТЬ": "Mogilev Region",
    "МОГИЛЕВСКАЯ": "Mogilev Region",
    "МОГИЛЁВСКАЯ": "Mogilev Region",
    "МОГИЛЕВСК": "Mogilev Region",
    "МОГИЛЁВСК": "Mogilev Region",

    "ГРОДНЕНСКАЯ ОБЛ": "Grodno Region",
    "ВИТЕБСКАЯ": "Vitebsk Region",
    "БРЕСТСКАЯ": "Brest Region",

    "ГРОДНЕНСКАЯ": "Grodno Region",
    "ГРОДНЕНСКАЯ ": "Grodno Region",

    "ВИТЕБСКАЯ": "Vitebsk Region",
    "ГОМЕЛЬСКАЯ": "Gomel Region",
    "МОГИЛЕВСКАЯ ": "Mogilev Region"
}

# Apply mapping
df["LV_AREA"] = df["LV_AREA"].map(lv_area_translation)

# Show result
df["LV_AREA"].value_counts()


Unnamed: 0_level_0,count
LV_AREA,Unnamed: 1_level_1
Minsk City,4296
Minsk Region,3483
Vitebsk Region,1991
Gomel Region,1825
Brest Region,1770
Mogilev Region,1658
Grodno Region,1274


In [None]:
df["LV_AREA"].value_counts()

Unnamed: 0_level_0,count
LV_AREA,Unnamed: 1_level_1
Minsk City,4296
Minsk Region,3483
Vitebsk Region,1991
Gomel Region,1825
Brest Region,1770
Mogilev Region,1658
Grodno Region,1274


In [None]:
belarus_placenames = {
    "КОСАКОВКА": "Kosakovka",
    "МИНСК": "Minsk",
    "ЁДКИ": "Yodki",
    "ФАНИПОЛЬ": "Fanipol",
    "Минск": "Minsk",
    "БРЕСТ": "Brest",
    "ГОМЕЛЬ": "Gomel",
    "РОМАНОВЩИНА": "Romanovshchina",
    "ГРОДНО": "Grodno",
    "ОШМЯНЫ": "Oshmyany",
    "Светлогорск": "Svetlogorsk",
    "КОБРИН": "Kobrin",
    "РУССКОЕ СЕЛО": "Russian Village",  # This one translated literally
    "КОСТРОВИЧИ": "Kostrovichi",
    "КРАСНОПОЛЬЕ": "Krasnopolye",
    "КОЛОДИЩИ": "Kolodishchi",
    "Брест": "Brest",
    "ЧЕРВЕНЬ": "Cherven",
    "РЕЧИЦА": "Rechitsa",
    "ЖОДИНО": "Zhodino",
    "ДОБРИНО": "Dobrino",
    "КЛЕЦК": "Kletsk",
    "Могилев": "Mogilev",
    "БОРИСОВ": "Borisov",
    "ПРОСКУРНИ": "Proskurni",
    "ЛЕСНОЙ": "Lesnoy",
    "ВЕРХНЕДВИНСК": "Verkhnedvinsk",
    "СОСНОВАЯ": "Sosnovaya",
    "БЛИЗНИЦА": "Bliznitsa",
    "Трилесино": "Trilesino",
    "МОЛОДЕЧНО": "Molodechno",
    "МАГДАЛИН": "Magdalin",
    "КОМИНТЕРН": "Komintern",
    "ДРОГИЧИН": "Drogichin",
    "МОГИЛЕВ": "Mogilev",
    "ПОЛОЦК": "Polotsk",
    "ОЗЯТЫ": "Ozyaty",
    "МАРЬИНА ГОРКА": "Maryina Gorka",
    "ПОСТАВЫ": "Postavy",
    "ДАРЕВО-ЧИЖ": "Darevo-Chizh",
    "СОЛИГОРСК": "Soligorsk",
    "ВИТЕБСК": "Vitebsk",
    "РП. РЕЧИЦА": "Rechitsa",
    "СМОЛЯНЫ": "Smolyany",
    "ДОМАЧЕВО": "Domachevo",
    "БОБРУЙСК": "Bobruysk",
    "ВЕРХОВИЧИ": "Verkhovichi",
    "КРАСНЫЙ ВОСХОД": "Red Dawn",  # Translated literally
    "ЖЛОБИН": "Zhlobin",
    "ОБЕРОВЩИНА": "Oberovshchina",
    "Вилейка": "Vileyka",
    "БЫХОВ": "Bykhov",
    "РЕВЯТИЧИ": "Revyatichi",
    "БАРАНОВИЧИ": "Baranovichi",
    "ОГДЕМЕР": "Ogdemer",
    "Руденск": "Rudensk",
    "ЛИДА": "Lida",
    "РОССЬ": "Ross",
    "ВИЛЕЙКА": "Vileyka",
    "СЛОНИМ": "Slonim",
    "СТОЛБЦЫ": "Stolbtsy",
    "КРИЧЕВ": "Krichev",
    "БИГОСОВО": "Bigosovo",
    "Борисов": "Borisov",
    "СВИСЛОЧЬ": "Svisloch",
    "ШАЙТЕРОВО": "Shayterovo",
    "МЕДУХОВО": "Medukhovo",
    "ЗАМОСТОЧЬЕ": "Zamostochye",
    "ЛЕПЕЛЬ": "Lepel",
    "ЗАРЕЧЬЕ": "Zarechye",
    "ДУБРОВНО": "Dubrovno",
    "ДОБРУШ": "Dobrush",
    "ПОГОНЦЫ": "Pogontsy",
    "ЛОЗОВКА": "Lozovka",
    "ДВОРИЩЕ": "Dvorishche",
    "СЕННО": "Senno",
    "НЕМОЙТА": "Nemoyta",
    "ЗУБРЕВИЧИ": "Zubrevichi",
    "НОВОСЁЛКИ": "Novoselki",
    "ВЫСОЧАНЫ": "Vysochany",
    "ГОРА": "Gora",
    "ГОРОДОК": "Gorodok",
    "ЖИТКОВИЧИ": "Zhitkovichi",
    "МАЛЫЕ СЛОВЕНИ": "Malye Sloveni",
    "РЕЧКИ": "Rechki",
    "ЗАСЛОНОВО": "Zaslonovo",
    "СТАРЫЕ ДОРОГИ": "Starye Dorogi",
    "Седча": "Sedcha",
    "РОГАЧЕВ": "Rogachev",
    "РАДОШКОВИЧИ": "Radoshkovichi",
    "Лепель": "Lepel",
    "ПРУДОК": "Prudok",
    "МОШКАНЫ": "Moshkany",
    "КРАСНОСЕЛЬСКИЙ": "Krasnoselsky",
    "СЛАВГОРОД": "Slavgorod",
    "БОБР": "Bobr",
    "ЮШЕВИЧИ": "Yushevichi",
    "ЗАЛЕСЬЕ": "Zalesye",
    "БЕРЕЗОВКА": "Berezovka",
    "НАРОВЛЯ": "Narovlya",
    "ШАРКОВЩИНА": "Sharkovshchina",
    "БОГДАНОВКА": "Bogdanovka",
    "Наровля": "Narovlya",
    "ДОБРИНЁВО": "Dobrinevo",
    "БЕЛООЗЁРСК": "Beloozersk",
    "ОСОВЦЫ": "Osovtsy",
    "БИРЮЗОВО": "Biryuzovo",
    "ЗАБОЛОТЬЕ": "Zabolotye",
    "Пружаны": "Pruzhany",
    "ВЕДРИЦА": "Vedritsa",
    "ШКЛОВ": "Shklov",
    "Гомель": "Gomel",
    "НОВИНКА": "Novinka",
    "СТРОЧИЦА": "Strochitsa",
    "ОЛЬШАНЫ": "Olshany",
    "БУДА-КОШЕЛЕВО": "Buda-Koshelevo",
    "НОВОКОЛОСОВО": "Novokolosovo",
    "СЛУЦК": "Slutsk",
    "ЧЕЧЕРСК": "Chechersk",
    "КРАСНОЕ": "Krasnoye",
    "ЖДАНЫ": "Zhdany",
    "МОЗЫРЬ": "Mozyr",
    "ОРША": "Orsha",
    "ТИШОВКА": "Tishovka",
    "ГЛУСК": "Glusk",
    "ОРДАТЬ": "Ordat",
    "ГАТОВО": "Gatovo",
    "ДЕРКОВЩИНА": "Derkovshchina",
    "МЕЛЕШКОВИЧИ": "Meleshkovichi",
    "ДОКОЛЬ": "Dokol",
    "ДОВСК": "Dovsk",
    "ЖУХОВЦЫ": "Zhukhovtsy",
    "МИХАЛИШКИ": "Mikhalishki",
    "ДЗЕРЖИНСК": "Dzerzhinsk",
    "МИКАШЕВИЧИ": "Mikashevichi",
    "Дзержинск": "Dzerzhinsk",
    "СТАРАЯ РУДИЦА": "Staraya Ruditsa",
    "ПИНСК": "Pinsk",
    "ЧЕРНИ": "Cherni",
    "ЧИСТЬ": "Chist",
    "ШЕПЕЛЕВО": "Shepelevo",
    "МАЛОРИТА": "Malorita",
    "РОССОНЫ": "Rossony",
    "Гадиловичи": "Gadilovichi",
    "КАЛИНКОВИЧИ": "Kalinkovichi",
    "ЩУЧИН": "Shchuchin",
    "ПЕТРИКОВ": "Petrikov",
    "БЕЛЫНИЧИ": "Belynichi",
    "НОВОГРУДОК": "Novogrudok",
    "ВЫСОКОЕ": "Vysokoye",
    "БАЦЕВИЧИ": "Batsevichi",
    "ДРУЦК": "Drutsk",
    "Гродно": "Grodno",
    "МЕЖИСЕТКИ": "Mezhisetki",
    "ВОРОПАЕВО": "Voropayevo",
    "ИВЕНЕЦ": "Ivenets",
    "КРИВЧИЦЫ": "Krivchitsy",
    "Речица": "Rechitsa",
    "ЛУНИНЕЦ": "Luninets",
    "БРЮХОВЦЫ": "Bryukhovtsy",
    "ГОЛОВЧИЦЫ": "Golovchitsy",
    "НОВОПОЛОЦК": "Novopolotsk",
    "ГОРКИ": "Gorki",
    "ДРИЧИН": "Drichin",
    "ОСИПОВИЧИ": "Osipovichi",
    "СЁМКОВО": "Semkovo",
    "БЕРЁЗА": "Bereza",
    "ЯНКИ": "Yanki",
    "ДУБРОВО": "Dubrovo",
    "ВЕТКА": "Vetka",
    "СКОРОДЫ": "Skorody",
    "ЛЯХОВИЧИ": "Lyakhovichi",
    "Носилово": "Nosilovo",
    "КОПЦЕВИЧИ": "Koptsevichi",
    "Новополоцк": "Novopolotsk",
    "ПАСТОВИЧИ": "Pastovichi",
    "МЕШКОВО": "Meshkovo",
    "РОГАЧЁВ": "Rogachev",
    "МОСТЫ": "Mosty",
    "БАРСУКИ": "Barsuki",
    "БОРОВИЦА": "Borovitsa",
    "МИОРЫ": "Miory",
    "ПРИГОРОДНАЯ": "Prigorodnaya",
    "ОСТРОШИЦКИЙ ГОРОДОК": "Ostroshitsky Gorodok",
    "ЛАПУТЫ": "Laputy",
    "ГАЛЬЧУНЫ": "Galchuny",
    "БЕРЕЗИНО": "Berezino",
    "ЖУКНЕВО": "Zhuknevo"
}

# Adding the rest of the names to complete the dictionary
additional_placenames = {
    "МИХАНОВИЧИ": "Mikhanovichi",
    "АНДРЕЕВЦЫ": "Andreevtsy",
    "БЕНЯКОНИ": "Benyakoni",
    "Барсуки": "Barsuki",
    "КЛИМОВИЧИ": "Klimovichi",
    "ЗУБЕЛЕВИЧИ": "Zubelevichi",
    "ОБРУБ": "Obrub",
    "КОПЫЛЬ": "Kopyl",
    "ЧАУСЫ": "Chausy",
    "ШУМИЛИНО": "Shumilino",
    "ТОЛОЧИН": "Tolochin",
    "МИХЕЕВКА": "Mikheevka",
    "МУРАВЩИНА": "Muravshchina",
    "НИСИМКОВИЧИ": "Nisimkovichi",
    "ЛЮБАНЬ": "Lyuban",
    "Копище": "Kopishche",
    "Заславль": "Zaslavl",
    "ПУХОВИЧИ": "Pukhovichi",
    "Слуцк": "Slutsk",
    "ЛЕНИНО": "Lenino",
    "ОСИПОВЩИНА": "Osipovshchina",
    "ПОГОСТ": "Pogost",
    "МСТИСЛАВЛЬ": "Mstislavl",
    "БОГДАНОВО": "Bogdanovo",
    "УЗМЁНЫ": "Uzmeny",
    "БОЛЬШИЕ БОРТНИКИ": "Bolshie Bortniki",
    "ВЕЙНО": "Veyno",
    "Волковыск": "Volkovysk",
    "ЖАБИНКА": "Zhabinka",
    "ЗАСЛАВЛЬ": "Zaslavl",
    "ВАЛЕВКА": "Valevka",
    "КОПИЩЕ": "Kopishche",
    "ОЗАРИЧИ": "Ozarichi",
    "КРИВАЯ БЕРЕЗА": "Krivaya Bereza",
    "ЗАСОВЬЕ": "Zasovye",
    "КИРОВСК": "Kirovsk",
    "ВАСИЛЕВИЧИ": "Vasilevichi",
    "НЕСВИЖ": "Nesvizh",
    "БОЛЬШЕВИК": "Bolshevik",
    "КАЗАЗАЕВКА": "Kazazaevka",
    "ГРЕБЁНКА": "Grebenka",
    "Чериков": "Cherikov",
    "ХОТИЛЫ": "Khotily",
    "ВЫШЕМИР": "Vyshemir",
    "ЯНОВИЧИ": "Yanovichi",
    "Россоны": "Rossony",
    "ЗЕЛЬВА": "Zelva",
    "КРУПЕНИКИ": "Krupeniki",
    "ШЕРШУНЫ": "Shershuny",
    "ФАРИНОВО": "Farinovo",
    "СВЕТЛОГОРСК": "Svetlogorsk",
    "ГОВЯДЫ": "Govyady",
    "Дружный": "Druzhny",
    "Мосты": "Mosty",
    "КОРЕЛИЧИ": "Korelichi",
    "БАБИНО-2": "Babino-2",
    "Червень": "Cherven",
    "НЕГНЕВИЧИ": "Negnevichi",
    "КОПТИ": "Kopti",
    "Орешники": "Oreshniki",
    "РАДЮКИ": "Radyuki",
    "МИЧУРИНСКАЯ": "Michurinskaya",
    "РЕДЬКИ": "Redki",
    "СЛОБОДА": "Sloboda",
    "Витебск": "Vitebsk",
    "БЕЛАЯ": "Belaya",
    "БЕРЁЗКИ": "Berezki",
    "Яновичи": "Yanovichi",
    "КРУЛЕВЩИНА": "Krulevshchina",
    "НАРОВЧИЗНА": "Narovchizna",
    "РУДКОВЩИНА": "Rudkovshchina",
    "НОВОЛУКОМЛЬ": "Novolukoml",
    "ЗАЗЕРКА": "Zazerka",
    "БРАСЛАВ": "Braslav",
    "КАРАВАЙНИЦА": "Karavaynitsa",
    "Марьина Горка": "Maryina Gorka",
    "ЗАПОЛЬЕ": "Zapolye",
    "КОВЗАНЫ": "Kovzany",
    "ЮРКОВИЧИ": "Yurkovich",
    "ОСТРОВ": "Ostrov",
    "НЕМАН": "Neman",
    "ПРИЛУКИ": "Priluki",
    "ВЕЛИКИЙ ДВОР": "Veliky Dvor",
    "ОМЕЛЬНАЯ": "Omelnaya",
    "МИРНЫЙ": "Mirny",
    "СНОВ": "Snov",
    "Богатырево": "Bogatyrevo",
    "ГРИБАНЫ": "Gribany",
    "СТАРИЦА": "Staritsa",
    "ЧЕРЕНКИ": "Cherenki",
    "ПОВСТЫНЬ": "Povstyn",
    "ПАРАФЬЯНОВО": "Parafyanovo",
    "ЧЕРИКОВ": "Cherikov",
    "РУСИНО": "Rusino",
    "БОЛЬШАЯ БЕРЕСТОВИЦА": "Bolshaya Berestovitsa",
    "СТАНИСЛАВОВО": "Stanislavovo",
    "ЮЗУФОВО": "Yuzufovo",
    "ВОЛКОВЫСК": "Volkovysk",
    "НАЧА": "Nacha",
    "СЛАБОДКА": "Slabodka",
    "ВОЛЧИН": "Volchin",
    "ПЕСОЧНОЕ": "Pesochnoye",
    "ЦЕРКОВИЩЕ": "Tserkovishche",
    "ДВОР НИЗГОЛОВО": "Dvor Nizgolovo",
    "УДАРНЫЙ": "Udarny",
    "ЛУГОВАЯ СЛОБОДА": "Lugovaya Sloboda"
}

# Combining the dictionaries
belarus_placenames.update(additional_placenames)

# More names to add (since the list is very long)
more_placenames = {
    "ПАЛИЧИН": "Palichin",
    "ГАРБУЗЫ": "Garbuzy",
    "ГАТОВИЧИ": "Gatovichi",
    "СТОЯЛОВО": "Stoyalovo",
    "РОВАНИЧИ": "Rovanichi",
    "НИКОЛАЕВО": "Nikolaevo",
    "ЛЕЛЬЧИЦЫ": "Lelchitsy",
    "СТАРОБИН": "Starobin",
    "ПРУЖАНЫ": "Pruzhany",
    "ПРИОЗЁРНАЯ": "Priozernaya",
    "ГОРОДЕЦ": "Gorodets",
    "ОСОВИНО": "Osovino",
    "ЛЕЩИЛОВО": "Leshchilovo",
    "ОСТРОВЕЦ": "Ostrovets",
    "ЧЕРНИКОВЩИНА": "Chernikovshchina",
    "ЮРАТИШКИ": "Yuratishki",
    "ГЛУБОКОЕ": "Glubokoye",
    "КАБАКИ": "Kabaki",
    "МАЛЕВИЧИ": "Malevichi",
    "ГАНЦЕВИЧИ": "Gantsevichi",
    "КРУГЛОЕ": "Krugloye",
    "ВИДОМЛЯ": "Vidomlya",
    "ДРУЯ": "Druya",
    "ЛОГОВИЩЕ": "Logovishche",
    "ОСТРОМЕЧЕВО": "Ostromechevo",
    "УЗДА": "Uzda",
    "ВЕРХЛЕС": "Verkhles",
    "Боровляны": "Borovlyany",
    "КОМАРОВИЧИ": "Komarovichi",
    "ВИШНЕВЕЦ": "Vishnevets",
    "Большой Тростенец": "Bolshoy Trostenets",
    "БЕШЕНКОВИЧИ": "Beshenkovichi",
    "ТЫЧИНКИ": "Tychinki",
    "ЧУРИЛОВИЧИ": "Churilovichi",
    "Журевичи": "Zhurevichi",
    "ЖИРОВИЧИ": "Zhirovichi",
    "НОВОСЕЛКИ": "Novoselki",
    "ГОЛВИНЦЫ": "Golvintsy",
    "ЖЕРЕБКОВИЧИ": "Zherebkovichi",
    "Воложин": "Volozhin",
    "БОГАТЫРЕВО": "Bogatyrevo",
    "Правдинский": "Pravdinsky",
    "СМОЛЕВИЧИ": "Smolevichi",
    "КУЗЬМИНИЧИ": "Kuzminichi",
    "КОММУНАР": "Kommunar",
    "КАМЕНЕЦ": "Kamenets",
    "МОЛЯТИЧИ": "Molyatichi",
    "БЕЛАЯ ЛИПА": "Belaya Lipa",
    "КРИВАЯ ГРЯДА": "Krivaya Gryada",
    "КВАСОВКА": "Kvasovka",
    "БОГУШЕВСК": "Bogushevsk",
    "СМОРГОНЬ": "Smorgon",
    "Мачулищи": "Machulishchi",
    "БРОДЫ": "Brody",
    "ОЗЕРЫ": "Ozery",
    "ВЕРЕСКОВО": "Vereskovo",
    "Жодино": "Zhodino",
    "СВЕТЛОСЕЛЬСКИЙ": "Svetloselsky",
    "ЛАПИЧИ": "Lapichi",
    "КОНСТАНТИНОВО": "Konstantinovo",
    "КОРЗУНЫ": "Korzuny",
    "ЛЮБОНИЧИ": "Lyubonichi",
    "СТРИГАНЕЦ": "Striganets",
    "БОЛЬШИЕ ЖУХОВИЧИ": "Bolshie Zhukhovichi",
    "ЛЮБКОВЩИНА": "Lyubkovshchina",
    "МАЧУЛИЩИ": "Machulishchi",
    "ХОЛХЛОВО": "Kholkhlovo",
    "СЕЛИЩЕ": "Selishche",
    "МИР": "Mir",
    "ЮРКОВЩИНА": "Yurkovshchina",
    "УЗЛЯНЫ": "Uzlyany",
    "ПАВЛОВИЧИ": "Pavlovichi",
    "ПРОКИСЕЛЬ": "Prokisel",
    "ХМЕЛЕВО": "Khmelevo",
    "КОПАТКЕВИЧИ": "Kopatkevichi",
    "СТОДОЛИЧИ": "Stodolichi",
    "ГУЗГАЛОВКА": "Guzgalovka",
    "ЛОБАЧЕВКА": "Lobachevka",
    "СВИСТЕЛКИ": "Svistelki"
}

belarus_placenames.update(more_placenames)

# Final batch of places to complete the dictionary
final_placenames = {
    "БУЙНИЧИ": "Buynichi",
    "ЖДАНОВИЧИ": "Zhdanovichi",
    "ИВАЦЕВИЧИ": "Ivatsevichi",
    "Логойск": "Logoysk",
    "КЛИЧЕВ": "Klichev",
    "ЛЫСКОВО": "Lyskovo",
    "БОРОВЛЯНЫ": "Borovlyany",
    "СТАРОЕ ЛЯДНО": "Staroye Lyadno",
    "ПЕРВОМАЙСКИЙ": "Pervomaysky",
    "Энергетиков": "Energetikov",
    "ЧЕРНЕВИЧИ": "Chernevichi",
    "КОРЕНЁВКА": "Korenevka",
    "ЛУТЫ": "Luty",
    "ОБОЛЬ": "Obol",
    "КРИНИЦА": "Krinitsa",
    "ЛОЕВ": "Loyev",
    "ИЗАБЕЛИН": "Izabelin",
    "ГУРНОФЕЛЬ": "Gurnofel",
    "СЕДЕНЕВИЧИ": "Sedenevichi",
    "Малорита": "Malorita",
    "СТАНЬКОВО": "Stankovo",
    "ЧЕРНЫЙ БОР": "Cherny Bor",
    "БОРОВКА": "Borovka",
    "ЩИТОМИРИЧИ": "Shchitomirichi",
    "БОБРОВИЧИ": "Bobrovichi",
    "Осиповичи": "Osipovichi",
    "ГУДЕВИЧИ": "Gudevichi",
    "КРИВИЧИ": "Krivichi",
    "СОРОЧИ": "Sorochi",
    "БОРЗДОВКА": "Borzdovka",
    "ШИЛОВ УГОЛ": "Shilov Ugol",
    "ЯТВЕЗЬ": "Yatvez",
    "ЧЕРНАВЧИЦЫ": "Chernavchitsy",
    "ДЕЩЕНКА": "Deshchenka",
    "ПЕСКИ": "Peski",
    "ХОЛМЕЧ": "Kholmech",
    "ПЕРЕВОЛОКА": "Perevoloka",
    "СКИДЕЛЬ": "Skidel",
    "КРУПИЦА": "Krupitsa",
    "ПЛОТНИЦА": "Plotnitsa",
    "ОСТРОШИЦЫ": "Ostroshitsy",
    "КУЛАКИ": "Kulaki",
    "ЧИЖЕВЩИНА": "Chizhevshchina",
    "НЕКРАШЕВИЧИ": "Nekrashevichi",
    "Радошковичи": "Radoshkovichi",
    "ГУРКИ": "Gurki",
    "ЖИГУЛИ": "Zhiguli",
    "ЗАДРОВЬЕ": "Zadrovye",
    "КОПЫСЬ": "Kopys",
    "СМИЛОВИЧИ": "Smilovichi",
    "МАДЕЙКИ": "Madeyki",
    "КОСТЮКОВИЧИ": "Kostyukovichi",
    "ВИШОВ": "Vishov",
    "ГРАМОЩЕ": "Gramoshche",
    "ДРУЖНЫЙ": "Druzhny",
    "ЗАБЫЧАНЬЕ": "Zabychanye",
    "ЯНЧУКИ": "Yanchuki",
    "ТОРГУНЫ": "Torguny",
    "ЛОЙКИ": "Loyki",
    "ГОРТОЛЬ": "Gortol",
    "ЛИОЗНО": "Liozno",
    "Молодечно": "Molodechno",
    "ГРИЧИНО": "Grichino",
    "КОВАЛИ": "Kovali",
    "РОЖАНКА": "Rozhanka",
    "БОРЗДИЛЫ": "Borzdily",
    "КРОТОВ": "Krotov",
    "БОЛЬШИЕ СВИРЯНКИ": "Bolshie Sviryanki"
}

belarus_placenames.update(final_placenames)

# Print the dictionary to verify
df["LV_SETTLEMENTNAME"]=df["LV_SETTLEMENTNAME"].map(belarus_placenames)
df["LV_SETTLEMENTNAME"].value_counts()

Unnamed: 0_level_0,count
LV_SETTLEMENTNAME,Unnamed: 1_level_1
Minsk,5043
Mogilev,780
Gomel,751
Vitebsk,676
Brest,541
...,...
Ostroshitsy,1
Gurki,1
Borzdily,1
Krotov,1


In [None]:
df["INDUSTRYNAME"].value_counts()

Unnamed: 0_level_0,count
INDUSTRYNAME,Unnamed: 1_level_1
Производство,4323
Торговля,3100
АПК,1858
Услуги населению,1166
Строительство,1154
Наука и образование,1150
Транспорт/перевозки,1077
МВД/МЧС/МО,1053
Медицина и здравоохранение,981
Финансы и страхование,801


In [None]:
df["INDUSTRYNAME"]= df["INDUSTRYNAME"].map({
    "Производство": "Manufacturing/Production",
    "Торговля": "Trade/Commerce",
    "АПК": "Agro-Industrial Complex",
    "Услуги населению": "Public Services",
    "Строительство": "Construction",
    "Наука и образование": "Science and Education",
    "Транспорт/перевозки": "Transport/Transportation",
    "МВД/МЧС/МО": "Ministry of Internal Affairs/Ministry of Emergency Situations/Ministry of Defense",
    "Медицина и здравоохранение": "Medicine and Healthcare",
    "Финансы и страхование": "Finance and Insurance",
    "Государственное управление": "Public Administration",
    "Информационные технологии": "Information Technology",
    "Иное": "Other",
    "Культура и искусство": "Culture and Art",
    "Спорт и туризм": "Sports and Tourism",
    "Пенсионер": "Pensioner/Retiree"
})

In [None]:
df["INDUSTRYNAME"].value_counts()

Unnamed: 0_level_0,count
INDUSTRYNAME,Unnamed: 1_level_1
Manufacturing/Production,4323
Trade/Commerce,3100
Agro-Industrial Complex,1858
Public Services,1166
Construction,1154
Science and Education,1150
Transport/Transportation,1077
Ministry of Internal Affairs/Ministry of Emergency Situations/Ministry of Defense,1053
Medicine and Healthcare,981
Finance and Insurance,801


In [None]:
df.head(10)

Unnamed: 0,Debt,"Delay, days",Primary limit,BIRTHDATE,SEX,EDU,INCOME,TERM,Credit history rating,LV_AREA,LV_SETTLEMENTNAME,INDUSTRYNAME,PDN,CLIENTID,Scoring,UNDERAGECHILDRENCOUNT,VELCOMSCORING,FAMILYSTATUS
0,6063.5,3,7000.0,1983-07-08,Male,Vocational Secondary,703.07,60,A1,Gomel Region,Kosakovka,Agro-Industrial Complex,0.98,919517,233.0,0,,1
1,3765.04,0,5000.0,1987-12-19,Female,Higher Education,1693.68,60,B2,,Minsk,Finance and Insurance,0.17,539353,,1,,1
2,2067.66,0,2650.0,1966-02-28,Female,Secondary Education,724.49,60,C2,Grodno Region,Yodki,Medicine and Healthcare,0.4,818913,,0,,2
3,2370.39,0,3000.0,1972-02-18,Female,Vocational Secondary,1045.84,60,A3,Minsk Region,Fanipol,Public Administration,0.29,691598,216.0,0,,2
4,2280.55,0,3000.0,1997-02-23,Female,Higher Education,1092.65,60,B3,,Minsk,Medicine and Healthcare,0.63,425193,,0,,2
5,839.91,0,1000.0,1988-02-08,Male,Higher Education,3341.23,60,C1,Brest Region,Brest,Manufacturing/Production,0.1,476562,246.0,0,,2
6,1065.19,0,2000.0,1990-05-17,Female,Higher Education,983.6,24,C2,Minsk City,Minsk,Public Services,0.25,914643,74.0,1,,2
7,2675.67,62,3000.0,1999-08-13,Female,Vocational Secondary,1076.35,60,D1,Gomel Region,Gomel,Manufacturing/Production,0.16,918315,85.0,0,,2
8,2496.93,0,3500.0,1982-01-11,Male,Vocational Secondary,1195.75,36,,Minsk Region,Romanovshchina,Agro-Industrial Complex,0.12,922922,166.0,3,,1
9,5024.11,0,5800.0,1973-04-13,Male,Vocational Secondary,3260.93,60,D2,Minsk City,Minsk,Manufacturing/Production,0.4,926420,136.0,3,,1


In [None]:
df.to_csv("/content/drive/MyDrive/Projects/Bank credit risk assessment/bank_credit_scoring_en.csv", index=False)

In [6]:
a=[1,3];b=[2]

c=a+b
import statistics
statistics.median(c)

2