In [None]:
!pip install nflows

In [None]:
!git clone https://github.com/pragatischdv/data-accuracy-score.git
%cd data-accuracy-score

In [6]:
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
import math
import torch
from torch import nn
from torch import optim
from sklearn.decomposition import PCA
from nflows.flows.base import Flow
from nflows.distributions.normal import StandardNormal
from nflows.transforms.base import CompositeTransform
from nflows.transforms.autoregressive import MaskedAffineAutoregressiveTransform
from nflows.transforms.coupling import AffineCouplingTransform
from nflows.transforms.linear import NaiveLinear
from nflows.transforms.permutations import ReversePermutation
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from das import evaluate_das

In [7]:
# Load Iris dataset
iris = load_iris()

# Create DataFrame
df = pd.DataFrame(iris.data, columns=iris.feature_names)

# The target is already numeric, but let's simulate label encoding from names
species_names = pd.Categorical.from_codes(iris.target, iris.target_names)
le = LabelEncoder()
df["species"] = le.fit_transform(species_names)

print(df.head())
print("\nLabel mapping:", dict(zip(le.classes_, le.transform(le.classes_))))

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   species  
0        0  
1        0  
2        0  
3        0  
4        0  

Label mapping: {'setosa': np.int64(0), 'versicolor': np.int64(1), 'virginica': np.int64(2)}


In [8]:
input_data = df.drop("species", axis=1)
output_data = np.array(df["species"])

In [9]:
MAX_CLASS_INT = 2
MIN_CLASS_INT = 0
N_SAMPLES = len(df)

In [10]:
pca = PCA(n_components = 1)
input_data = pca.fit_transform(input_data)

In [11]:
final_data = []
for i in range(len(output_data)):
  final_data.append([input_data[i][0], output_data[i]])

final_data = np.array(final_data)

In [12]:
num_layers = 7
base_dist = StandardNormal(shape=[2])
num_iter = 1000

In [13]:
transforms = []
for _ in range(num_layers):
     transforms.append(MaskedAffineAutoregressiveTransform(features=2,
                                                            hidden_features=4))

transform = CompositeTransform(transforms)

flow = Flow(transform, base_dist)
optimizer = optim.Adam(flow.parameters())

In [14]:
for i in range(num_iter):
    #x, y = datasets.make_circles(n_samples=300, factor=0.5, noise=0.05)
    x = torch.tensor(final_data, dtype=torch.float32)
    optimizer.zero_grad()
    loss = -flow.log_prob(inputs=x).mean()
    loss.backward()
    optimizer.step()

In [15]:
samples = flow.sample(N_SAMPLES)
samples = samples.detach().numpy()

y_sample = samples[:, 1]
for i in range(N_SAMPLES):
  y_sample[i] = math.floor(y_sample[i])
  if y_sample[i] < MIN_CLASS_INT:
    y_sample[i] = MIN_CLASS_INT
  elif y_sample[i] > MAX_CLASS_INT:
    y_sample[i] = MAX_CLASS_INT

In [16]:
res_cls = evaluate_das(
    final_data[:, 0].reshape(-1, 1), final_data[:, 1].reshape(-1, 1),
    samples[:, 0].reshape(-1, 1), samples[:, 1].reshape(-1, 1),
    final_data[:, 0].reshape(-1, 1),  None,
    task="classification",
    estimator=GaussianNB()
)
print("CLASSIFICATION RESULT:", res_cls)

CLASSIFICATION RESULT: {'task': 'classification', 'estimator': 'GaussianNB', 'das': 61.33333333333333}
