# Credit Risk Modeling Project

This project involves building a credit risk model using the German Credit Data dataset. The goal is to predict the creditworthiness of individuals based on various features.

## Dataset

The dataset used is the [German Credit Data](https://archive.ics.uci.edu/dataset/144/statlog+german+credit+data), which contains information about individuals and their credit history.


In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.output_scroll { height: 300px; overflow-y: scroll; }</style>"))

In [None]:
!pip install ucimlrepo

In [38]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
statlog_german_credit_data = fetch_ucirepo(id=144) 
  
# data (as pandas dataframes) 
x = statlog_german_credit_data.data.features 
y = statlog_german_credit_data.data.targets 
  
# metadata 
print(statlog_german_credit_data.metadata) 
  
# variable information 
print(statlog_german_credit_data.variables) 


{'uci_id': 144, 'name': 'Statlog (German Credit Data)', 'repository_url': 'https://archive.ics.uci.edu/dataset/144/statlog+german+credit+data', 'data_url': 'https://archive.ics.uci.edu/static/public/144/data.csv', 'abstract': 'This dataset classifies people described by a set of attributes as good or bad credit risks. Comes in two formats (one all numeric). Also comes with a cost matrix', 'area': 'Social Science', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 1000, 'num_features': 20, 'feature_types': ['Categorical', 'Integer'], 'demographics': ['Other', 'Marital Status', 'Age', 'Occupation'], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1994, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5NC77', 'creators': ['Hans Hofmann'], 'intro_paper': None, 'additional_info': {'summary': 'Two datasets are provided.  the original dataset, in the form provided by

In [40]:
x.columns = ['Status of Checking Account', 'Duration in Months', 'Credit History', 
             'Purpose', 'Credit Amount', 'Savings Account', 'Employment Duration', 
             'Installment Rate', 'Personal Status', 'Debtors', 'Residence Years', 
             'Property', 'Age', 'Other Plans', 'Housing', 'Existing Credits', 
             'Job', 'Maintenance', 'Telephone', 'Foreign Worker']

print(x.head(5))

  Status of Checking Account  Duration in Months Credit History Purpose  \
0                        A11                   6            A34     A43   
1                        A12                  48            A32     A43   
2                        A14                  12            A34     A46   
3                        A11                  42            A32     A42   
4                        A11                  24            A33     A40   

   Credit Amount Savings Account Employment Duration  Installment Rate  \
0           1169             A65                 A75                 4   
1           5951             A61                 A73                 2   
2           2096             A61                 A74                 2   
3           7882             A61                 A74                 2   
4           4870             A61                 A73                 3   

  Personal Status Debtors  Residence Years Property  Age Other Plans Housing  \
0             A93    A10

In [None]:
# Mapping the categorical variables to human readable values

checking_account_mapping = {
    'A11': '< 0 DM',
    'A12': '0 <= ... < 200 DM',
    'A13': '>= 200 DM',
    'A14': 'no checking account'
}
x['Status of Checking Account'] = x['Status of Checking Account'].replace(checking_account_mapping)

credit_history_mapping = {
    'A30': 'no credits taken/all paid',
    'A31': 'all credits paid back duly',
    'A32': 'existing credits paid back duly',
    'A33': 'delay in paying off',
    'A34': 'critical account/other credits existing'
}
x['Credit History'] = x['Credit History'].replace(credit_history_mapping)

purpose_mapping = {
    'A40': 'car (new)',
    'A41': 'car (used)',
    'A42': 'furniture/equipment',
    'A43': 'radio/television',
    'A44': 'domestic appliances',
    'A45': 'repairs',
    'A46': 'education',
    'A47': 'retraining',
    'A48': 'business',
    'A49': 'others'
}
x['Purpose'] = x['Purpose'].replace(purpose_mapping)

savings_account_mapping = {
    'A61': '< 100 DM',
    'A62': '100 <= ... < 500 DM',
    'A63': '500 <= ... < 1000 DM',
    'A64': '>= 1000 DM',
    'A65': 'unknown/no savings account'
}
x['Savings Account'] = x['Savings Account'].replace(savings_account_mapping)

employment_mapping = {
    'A71': 'unemployed',
    'A72': '< 1 year',
    'A73': '1 <= ... < 4 years',
    'A74': '4 <= ... < 7 years',
    'A75': '>= 7 years'
}
x['Employment Duration'] = x['Employment Duration'].replace(employment_mapping)

personal_status_mapping = {
    'A91': 'male : divorced/separated',
    'A92': 'female : divorced/separated/married',
    'A93': 'male : single',
    'A94': 'male : married/widowed',
    'A95': 'female : single'
}
x['Personal Status'] = x['Personal Status'].replace(personal_status_mapping)

debtors_mapping = {
    'A101': 'none',
    'A102': 'co-applicant',
    'A103': 'guarantor'
}
x['Debtors'] = x['Debtors'].replace(debtors_mapping)

property_mapping = {
    'A121': 'real estate',
    'A122': 'building society savings/life insurance',
    'A123': 'car or other',
    'A124': 'unknown/no property'
}
x['Property'] = x['Property'].replace(property_mapping)

plans_mapping = {
    'A141': 'bank',
    'A142': 'stores',
    'A143': 'none'
}
x['Other Plans'] = x['Other Plans'].replace(plans_mapping)

housing_mapping = {
    'A151': 'rent',
    'A152': 'own',
    'A153': 'for free'
}
x['Housing'] = x['Housing'].replace(housing_mapping)

job_mapping = {
    'A171': 'unemployed/unskilled - non-resident',
    'A172': 'unskilled - resident',
    'A173': 'skilled employee/official',
    'A174': 'management/self-employed/highly qualified'
}
x['Job'] = x['Job'].replace(job_mapping)

telephone_mapping = {
    'A191': 'none',
    'A192': 'yes, registered under the customer’s name'
}
x['Telephone'] = x['Telephone'].replace(telephone_mapping)

foreign_worker_mapping = {
    'A201': 'yes',
    'A202': 'no'
}
x['Foreign Worker'] = x['Foreign Worker'].replace(foreign_worker_mapping)


In [43]:
print(x.head(5))

  Status of Checking Account  Duration in Months  \
0                     < 0 DM                   6   
1          0 <= ... < 200 DM                  48   
2        no checking account                  12   
3                     < 0 DM                  42   
4                     < 0 DM                  24   

                            Credit History              Purpose  \
0  critical account/other credits existing     radio/television   
1          existing credits paid back duly     radio/television   
2  critical account/other credits existing            education   
3          existing credits paid back duly  furniture/equipment   
4                      delay in paying off            car (new)   

   Credit Amount             Savings Account Employment Duration  \
0           1169  unknown/no savings account          >= 7 years   
1           5951                    < 100 DM  1 <= ... < 4 years   
2           2096                    < 100 DM  4 <= ... < 7 years   
3           

In [None]:
# Adjusdting the Mapping to machine readable values

checking_account_mapping = {
    'A11': '< 0 DM',
    'A12': '0 <= ... < 200 DM',
    'A13': '>= 200 DM',
    'A14': 'no checking account'
}
x['Status of Checking Account'] = x['Status of Checking Account'].replace(checking_account_mapping)

credit_history_mapping = {
    'A30': 'no credits taken/all paid',
    'A31': 'all credits paid back duly',
    'A32': 'existing credits paid back duly',
    'A33': 'delay in paying off',
    'A34': 'critical account/other credits existing'
}
x['Credit History'] = x['Credit History'].replace(credit_history_mapping)

purpose_mapping = {
    'A40': 'car (new)',
    'A41': 'car (used)',
    'A42': 'furniture/equipment',
    'A43': 'radio/television',
    'A44': 'domestic appliances',
    'A45': 'repairs',
    'A46': 'education',
    'A47': 'retraining',
    'A48': 'business',
    'A49': 'others'
}
x['Purpose'] = x['Purpose'].replace(purpose_mapping)

savings_account_mapping = {
    'A61': '< 100 DM',
    'A62': '100 <= ... < 500 DM',
    'A63': '500 <= ... < 1000 DM',
    'A64': '>= 1000 DM',
    'A65': 'unknown/no savings account'
}
x['Savings Account'] = x['Savings Account'].replace(savings_account_mapping)

employment_mapping = {
    'A71': 'unemployed',
    'A72': '< 1 year',
    'A73': '1 <= ... < 4 years',
    'A74': '4 <= ... < 7 years',
    'A75': '>= 7 years'
}
x['Employment Duration'] = x['Employment Duration'].replace(employment_mapping)

personal_status_mapping = {
    'A91': 'male : divorced/separated',
    'A92': 'female : divorced/separated/married',
    'A93': 'male : single',
    'A94': 'male : married/widowed',
    'A95': 'female : single'
}
x['Personal Status'] = x['Personal Status'].replace(personal_status_mapping)

debtors_mapping = {
    'A101': 'none',
    'A102': 'co-applicant',
    'A103': 'guarantor'
}
x['Debtors'] = x['Debtors'].replace(debtors_mapping)

property_mapping = {
    'A121': 'real estate',
    'A122': 'building society savings/life insurance',
    'A123': 'car or other',
    'A124': 'unknown/no property'
}
x['Property'] = x['Property'].replace(property_mapping)

plans_mapping = {
    'A141': 'bank',
    'A142': 'stores',
    'A143': 'none'
}
x['Other Plans'] = x['Other Plans'].replace(plans_mapping)

housing_mapping = {
    'A151': 'rent',
    'A152': 'own',
    'A153': 'for free'
}
x['Housing'] = x['Housing'].replace(housing_mapping)

job_mapping = {
    'A171': 'unemployed/unskilled - non-resident',
    'A172': 'unskilled - resident',
    'A173': 'skilled employee/official',
    'A174': 'management/self-employed/highly qualified'
}
x['Job'] = x['Job'].replace(job_mapping)

telephone_mapping = {
    'A191': 'none',
    'A192': 'yes, registered under the customer’s name'
}
x['Telephone'] = x['Telephone'].replace(telephone_mapping)

foreign_worker_mapping = {
    'A201': 'yes',
    'A202': 'no'
}
x['Foreign Worker'] = x['Foreign Worker'].replace(foreign_worker_mapping)


In [35]:
y.head()

Unnamed: 0,class
0,1
1,2
2,1
3,1
4,2


In [18]:
print(y.value_counts())

class
1        700
2        300
Name: count, dtype: int64
