In [4]:
import sklearn.datasets  # IRIS, WINE
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import StandardScaler
from ucimlrepo import fetch_ucirepo
from scipy.io import arff

In [40]:
from typing import NamedTuple, Any


class UciMlData(NamedTuple):
    ids: pd.DataFrame
    features: pd.DataFrame
    targets: pd.DataFrame
    original: pd.DataFrame
    headers: list[str]


class UciMlDataset(NamedTuple):
    data: UciMlData
    metadata: dict[str, Any]
    variables: pd.DataFrame

## IRIS


In [43]:
iris_dataset: UciMlDataset = fetch_ucirepo("iris")
X_iris = iris_dataset.data.features
y_iris = iris_dataset.data.targets

print("features: ", iris_dataset.feature_names)
print("shape", X_iris.shape)
print("target names: ", iris_dataset.target_names)

display(X_iris.head())
display(y_iris.head())

features:  None
shape (150, 4)
target names:  None


Unnamed: 0,sepal length,sepal width,petal length,petal width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


Unnamed: 0,class
0,Iris-setosa
1,Iris-setosa
2,Iris-setosa
3,Iris-setosa
4,Iris-setosa


## SEEDS


In [47]:
seeds_dataset = pd.read_csv(
    "data/seeds_dataset.txt",
    sep=",",
    header=None,
    names=[
        "area",
        "perimeter",
        "compactness",
        "length",
        "width",
        "asymmetry",
        "groove",
        "class",
    ],
)

X_seeds = seeds_dataset.drop(columns=["class"])
y_seeds = pd.DataFrame(seeds_dataset["class"])

print(seeds_dataset.shape)
display(X_seeds.head())
display(y_seeds.head())

(210, 8)


Unnamed: 0,area,perimeter,compactness,length,width,asymmetry,groove
0,15.26,14.84,0.871,5.763,3.312,2.221,5.22
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956
2,14.29,14.09,0.905,5.291,3.337,2.699,4.825
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175


Unnamed: 0,class
0,1
1,1
2,1
3,1
4,1


## Glass


In [53]:
glass_dataset: UciMlDataset = fetch_ucirepo("glass")

X_glass = glass_dataset.data.features
y_glass = glass_dataset.data.targets

print(X_glass.shape)
display(X_glass.head())
display(y_glass.head())

(214, 9)


Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0


Unnamed: 0,Type_of_glass
0,1
1,1
2,1
3,1
4,1


## PCB


In [54]:
pcb_dataset: UciMlDataset = fetch_ucirepo(id=365)

X_pcb = pcb_dataset.data.features
y_pcb = pcb_dataset.data.targets

print(X_pcb.shape)
display(X_pcb.head())
display(y_pcb.head())

(43405, 65)


Unnamed: 0,year,A1,A2,A3,A4,A5,A6,A7,A8,A9,...,A55,A56,A57,A58,A59,A60,A61,A62,A63,A64
0,1,0.20055,0.37951,0.39641,2.0472,32.351,0.38825,0.24976,1.3305,1.1389,...,348690.0,0.12196,0.39718,0.87804,0.001924,8.416,5.1372,82.658,4.4158,7.4277
1,1,0.20912,0.49988,0.47225,1.9447,14.786,0.0,0.25834,0.99601,1.6996,...,2304.6,0.1213,0.42002,0.853,0.0,4.1486,3.2732,107.35,3.4,60.987
2,1,0.24866,0.69592,0.26713,1.5548,-1.1523,0.0,0.30906,0.43695,1.309,...,6332.7,0.24114,0.81774,0.76599,0.69484,4.9909,3.951,134.27,2.7185,5.2078
3,1,0.081483,0.30734,0.45879,2.4928,51.952,0.14988,0.092704,1.8661,1.0571,...,20545.0,0.054015,0.14207,0.94598,0.0,4.5746,3.6147,86.435,4.2228,5.5497
4,1,0.18732,0.61323,0.2296,1.4063,-7.3128,0.18732,0.18732,0.6307,1.1559,...,3186.6,0.13485,0.48431,0.86515,0.12444,6.3985,4.3158,127.21,2.8692,7.898


Unnamed: 0,class
0,0
1,0
2,0
3,0
4,0
