# Uczenie maszynowe - laboratorium 1

## Dataset: Polish companies bankruptcy

- Source: https://archive.ics.uci.edu/dataset/365/polish+companies+bankruptcy+data


In [2]:
from pandas import DataFrame
from ucimlrepo import fetch_ucirepo
from typing import NamedTuple, List, Dict, Any
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display


# Type definitions from:
# https://github.com/uci-ml-repo/ucimlrepo?tab=readme-ov-file#parameters
class UciMlData(NamedTuple):
    ids: DataFrame
    features: DataFrame
    targets: DataFrame
    original: DataFrame
    headers: List[str]


class UciMlDataset(NamedTuple):
    data: UciMlData
    metadata: Dict[str, Any]
    variables: DataFrame

In [9]:
# Dataset: Polish companies bankruptcy
# Source: https://archive.ics.uci.edu/dataset/365/polish+companies+bankruptcy+data

polish_companies_bankruptcy: UciMlDataset = fetch_ucirepo(id=365)

X = polish_companies_bankruptcy.data.features
y = polish_companies_bankruptcy.data.targets

In [19]:
display(X.head())
display(y.head())

print("Number of features: ", X.shape[1])
print("Number of samples: ", X.shape[0])
print("Feature types: ", X.dtypes.unique())

Unnamed: 0,year,A1,A2,A3,A4,A5,A6,A7,A8,A9,...,A55,A56,A57,A58,A59,A60,A61,A62,A63,A64
0,1,0.20055,0.37951,0.39641,2.0472,32.351,0.38825,0.24976,1.3305,1.1389,...,348690.0,0.12196,0.39718,0.87804,0.001924,8.416,5.1372,82.658,4.4158,7.4277
1,1,0.20912,0.49988,0.47225,1.9447,14.786,0.0,0.25834,0.99601,1.6996,...,2304.6,0.1213,0.42002,0.853,0.0,4.1486,3.2732,107.35,3.4,60.987
2,1,0.24866,0.69592,0.26713,1.5548,-1.1523,0.0,0.30906,0.43695,1.309,...,6332.7,0.24114,0.81774,0.76599,0.69484,4.9909,3.951,134.27,2.7185,5.2078
3,1,0.081483,0.30734,0.45879,2.4928,51.952,0.14988,0.092704,1.8661,1.0571,...,20545.0,0.054015,0.14207,0.94598,0.0,4.5746,3.6147,86.435,4.2228,5.5497
4,1,0.18732,0.61323,0.2296,1.4063,-7.3128,0.18732,0.18732,0.6307,1.1559,...,3186.6,0.13485,0.48431,0.86515,0.12444,6.3985,4.3158,127.21,2.8692,7.898


Unnamed: 0,class
0,0
1,0
2,0
3,0
4,0


Number of features:  65
Number of samples:  43405
Feature types:  [dtype('int64') dtype('float64')]


In [3]:
# additional feature information
feature_description = [
    ["A1", "net profit / total assets"],
    ["A2", "total liabilities / total assets"],
    ["A3", "working capital / total assets"],
    ["A4", "current assets / short-term liabilities"],
    [
        "A5",
        "[(cash + short-term securities + receivables - short-term liabilities) / (operating expenses - depreciation)] * 365",
    ],
    ["A6", "retained earnings / total assets"],
    ["A7", "EBIT / total assets"],
    ["A8", "book value of equity / total liabilities"],
    ["A9", "sales / total assets"],
    ["A10", "equity / total assets"],
    ["A11", "(gross profit + extraordinary items + financial expenses) / total assets"],
    ["A12", "gross profit / short-term liabilities"],
    ["A13", "(gross profit + depreciation) / sales"],
    ["A14", "(gross profit + interest) / total assets"],
    ["A15", "(total liabilities * 365) / (gross profit + depreciation)"],
    ["A16", "(gross profit + depreciation) / total liabilities"],
    ["A17", "total assets / total liabilities"],
    ["A18", "gross profit / total assets"],
    ["A19", "gross profit / sales"],
    ["A20", "(inventory * 365) / sales"],
    ["A21", "sales (n) / sales (n-1)"],
    ["A22", "profit on operating activities / total assets"],
    ["A23", "net profit / sales"],
    ["A24", "gross profit (in 3 years) / total assets"],
    ["A25", "(equity - share capital) / total assets"],
    ["A26", "(net profit + depreciation) / total liabilities"],
    ["A27", "profit on operating activities / financial expenses"],
    ["A28", "working capital / fixed assets"],
    ["A29", "logarithm of total assets"],
    ["A30", "(total liabilities - cash) / sales"],
    ["A31", "(gross profit + interest) / sales"],
    ["A32", "(current liabilities * 365) / cost of products sold"],
    ["A33", "operating expenses / short-term liabilities"],
    ["A34", "operating expenses / total liabilities"],
    ["A35", "profit on sales / total assets"],
    ["A36", "total sales / total assets"],
    ["A37", "(current assets - inventories) / long-term liabilities"],
    ["A38", "constant capital / total assets"],
    ["A39", "profit on sales / sales"],
    ["A40", "(current assets - inventory - receivables) / short-term liabilities"],
    [
        "A41",
        "total liabilities / ((profit on operating activities + depreciation) * (12/365))",
    ],
    ["A42", "profit on operating activities / sales"],
    ["A43", "rotation receivables + inventory turnover in days"],
    ["A44", "(receivables * 365) / sales"],
    ["A45", "net profit / inventory"],
    ["A46", "(current assets - inventory) / short-term liabilities"],
    ["A47", "(inventory * 365) / cost of products sold"],
    ["A48", "EBITDA (profit on operating activities - depreciation) / total assets"],
    ["A49", "EBITDA (profit on operating activities - depreciation) / sales"],
    ["A50", "current assets / total liabilities"],
    ["A51", "short-term liabilities / total assets"],
    ["A52", "(short-term liabilities * 365) / cost of products sold)"],
    ["A53", "equity / fixed assets"],
    ["A54", "constant capital / fixed assets"],
    ["A55", "working capital"],
    ["A56", "(sales - cost of products sold) / sales"],
    [
        "A57",
        "(current assets - inventory - short-term liabilities) / (sales - gross profit - depreciation)",
    ],
    ["A58", "total costs /total sales"],
    ["A59", "long-term liabilities / equity"],
    ["A60", "sales / inventory"],
    ["A61", "sales / receivables"],
    ["A62", "(short-term liabilities *365) / sales"],
    ["A63", "sales / short-term liabilities"],
    ["A64", "sales / fixed assets"],
]

display(pd.DataFrame(feature_description, columns=["Feature", "Description"]))

Unnamed: 0,Feature,Description
0,A1,net profit / total assets
1,A2,total liabilities / total assets
2,A3,working capital / total assets
3,A4,current assets / short-term liabilities
4,A5,[(cash + short-term securities + receivables -...
...,...,...
59,A60,sales / inventory
60,A61,sales / receivables
61,A62,(short-term liabilities *365) / sales
62,A63,sales / short-term liabilities
