In [None]:
!pip install nflows

In [2]:
import pandas as pd
import numpy as np
import math

import torch
from torch import nn
from torch import optim
from sklearn.decomposition import PCA
 
from nflows.flows.base import Flow
from nflows.distributions.normal import StandardNormal
from nflows.transforms.base import CompositeTransform
from nflows.transforms.autoregressive import MaskedAffineAutoregressiveTransform
from nflows.transforms.coupling import AffineCouplingTransform
from nflows.transforms.linear import NaiveLinear
from nflows.transforms.permutations import ReversePermutation
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [7]:
data = pd.read_csv("/content/iris_flower.csv")
print(data.head())

   sepal_length  sepal_width  petal_length  petal_width  type
0           5.1          3.5           1.4          0.2     1
1           4.9          3.0           1.4          0.2     1
2           4.7          3.2           1.3          0.2     1
3           4.6          3.1           1.5          0.2     1
4           5.0          3.6           1.4          0.2     1


In [8]:
input_data = data.drop("type", axis=1)
output_data = np.array(data["type"])

In [9]:
MAX_CLASS_INT = 3
MIN_CLASS_INT = 1
N_SAMPLES = len(data)

In [10]:
pca = PCA(n_components = 1)
input_data = pca.fit_transform(input_data)

In [11]:
final_data = []
for i in range(len(output_data)):
  final_data.append([input_data[i][0], output_data[i]])

final_data = np.array(final_data)

In [12]:
num_layers = 7
base_dist = StandardNormal(shape=[2])
num_iter = 10000

Masked Autoregrssive Flow

In [13]:
transforms = []
for _ in range(num_layers):
     transforms.append(MaskedAffineAutoregressiveTransform(features=2, 
                                                            hidden_features=4))

transform = CompositeTransform(transforms)

flow = Flow(transform, base_dist)
optimizer = optim.Adam(flow.parameters())

In [14]:
for i in range(num_iter):
    #x, y = datasets.make_circles(n_samples=300, factor=0.5, noise=0.05)
    x = torch.tensor(final_data, dtype=torch.float32)
    optimizer.zero_grad()
    loss = -flow.log_prob(inputs=x).mean()
    loss.backward()
    optimizer.step()

Autoregressive Flow with Coupling layer

In [15]:
mask = [1,1]
class Net(nn.Module):

    def __init__(self, in_channel, out_channels):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_channel, in_channel),
            nn.ReLU(),
            nn.Linear(in_channel, in_channel),
            nn.ReLU(),
            nn.Linear(in_channel, out_channels),
        )

    def forward(self, inp, context=None):
        return self.net(inp)

def getNet(in_channel, out_channels):
        return Net(in_channel, out_channels)
transforms_cl = []
for _ in range(num_layers):
     transforms_cl.append(AffineCouplingTransform(mask, getNet))

transform_cl = CompositeTransform(transforms_cl)

flow_cl = Flow(transform_cl, base_dist)
optimizer_cl = optim.Adam(flow_cl.parameters())

In [16]:
for i in range(num_iter):
    #x, y = datasets.make_circles(n_samples=300, factor=0.5, noise=0.05)
    x = torch.tensor(final_data, dtype=torch.float32)
    optimizer_cl.zero_grad()
    loss_cl = -flow_cl.log_prob(inputs=x).mean()
    loss_cl.backward()
    optimizer_cl.step()

Linear Flow

In [17]:
transforms_l = []

for _ in range(num_layers):
     transforms_l.append(ReversePermutation(features=2))
     transforms_l.append(NaiveLinear(features=2))

transform_l = CompositeTransform(transforms_l)

flow_l = Flow(transform_l, base_dist)
optimizer_l = optim.Adam(flow_l.parameters())

The boolean parameter 'some' has been replaced with a string parameter 'mode'.
Q, R = torch.qr(A, some)
should be replaced with
Q, R = torch.linalg.qr(A, 'reduced' if some else 'complete') (Triggered internally at  ../aten/src/ATen/native/BatchLinearAlgebra.cpp:1980.)
  q, _ = torch.qr(x)


In [18]:
for i in range(num_iter):
    #x, y = datasets.make_circles(n_samples=300, factor=0.5, noise=0.05)
    x = torch.tensor(final_data, dtype=torch.float32)
    optimizer_l.zero_grad()
    loss_l = -flow_l.log_prob(inputs=x).mean()
    loss_l.backward()
    optimizer_l.step()

In [19]:
def guassianNB_accuracy(flow):
  max_accuracy = 0
  for i in range(100):
    samples = flow.sample(N_SAMPLES)
    samples = samples.detach().numpy()

    y_sample = samples[:, 1]
    for i in range(N_SAMPLES):
      y_sample[i] = math.floor(y_sample[i])
      if y_sample[i] < MIN_CLASS_INT:
        y_sample[i] = MIN_CLASS_INT
      elif y_sample[i] > MAX_CLASS_INT:
        y_sample[i] = MAX_CLASS_INT

    model_with_real_data = GaussianNB()
    model_with_sampled_data = GaussianNB()

    model_with_real_data.fit(final_data[:, 0].reshape(-1, 1), final_data[:, 1].reshape(-1, 1))
    model_with_sampled_data.fit(samples[:, 0].reshape(-1, 1), samples[:, 1].reshape(-1, 1))

    pred_data_real = model_with_real_data.predict(final_data[:, 0].reshape(-1, 1))
    pred_data_sampled = model_with_sampled_data.predict(final_data[:, 0].reshape(-1, 1))

    accuracy = accuracy_score(pred_data_real,pred_data_sampled)

    if max_accuracy < accuracy:
      max_accuracy = accuracy

  print(max_accuracy)



In [20]:
def logistic_regression_accuracy(flow):  
  max_accuracy_lr = 0
  for i in range(100):
    samples = flow.sample(N_SAMPLES)
    samples = samples.detach().numpy()

    y_sample = samples[:, 1]
    for i in range(N_SAMPLES):
      y_sample[i] = math.floor(y_sample[i])
      if y_sample[i] < MIN_CLASS_INT:
        y_sample[i] = MIN_CLASS_INT
      elif y_sample[i] > MAX_CLASS_INT:
        y_sample[i] = MAX_CLASS_INT

    model_with_real_data = LogisticRegression(random_state = 0)
    model_with_sampled_data = LogisticRegression(random_state = 0)

    model_with_real_data.fit(final_data[:, 0].reshape(-1, 1), final_data[:, 1].reshape(-1, 1))
    model_with_sampled_data.fit(samples[:, 0].reshape(-1, 1), samples[:, 1].reshape(-1, 1))

    pred_data_real = model_with_real_data.predict(final_data[:, 0].reshape(-1, 1))
    pred_data_sampled = model_with_sampled_data.predict(final_data[:, 0].reshape(-1, 1))

    accuracy = accuracy_score(pred_data_real,pred_data_sampled)

    if max_accuracy_lr < accuracy:
      max_accuracy_lr = accuracy

  print(max_accuracy_lr)

In [21]:
def knn_accuracy(flow):
  max_accuracy_knn = 0
  for i in range(100):
    samples = flow.sample(N_SAMPLES)
    samples = samples.detach().numpy()

    y_sample = samples[:, 1]
    for i in range(N_SAMPLES):
      y_sample[i] = math.floor(y_sample[i])
      if y_sample[i] < MIN_CLASS_INT:
        y_sample[i] = MIN_CLASS_INT
      elif y_sample[i] > MAX_CLASS_INT:
        y_sample[i] = MAX_CLASS_INT

    model_with_real_data = KNeighborsClassifier(n_neighbors=7)
    model_with_sampled_data = KNeighborsClassifier(n_neighbors=7)

    model_with_real_data.fit(final_data[:, 0].reshape(-1, 1), final_data[:, 1].reshape(-1, 1))
    model_with_sampled_data.fit(samples[:, 0].reshape(-1, 1), samples[:, 1].reshape(-1, 1))

    pred_data_real = model_with_real_data.predict(final_data[:, 0].reshape(-1, 1))
    pred_data_sampled = model_with_sampled_data.predict(final_data[:, 0].reshape(-1, 1))

    accuracy = accuracy_score(pred_data_real,pred_data_sampled)

    if max_accuracy_knn < accuracy:
      max_accuracy_knn = accuracy

  print(max_accuracy_knn)

In [22]:
def random_forest_accuracy(flow):  
  max_accuracy_rf = 0
  for i in range(100):
    samples = flow.sample(N_SAMPLES)
    samples = samples.detach().numpy()

    y_sample = samples[:, 1]
    for i in range(N_SAMPLES):
      y_sample[i] = math.floor(y_sample[i])
      if y_sample[i] < MIN_CLASS_INT:
        y_sample[i] = MIN_CLASS_INT
      elif y_sample[i] > MAX_CLASS_INT:
        y_sample[i] = MAX_CLASS_INT

    model_with_real_data = RandomForestClassifier(n_estimators = 100)
    model_with_sampled_data = RandomForestClassifier(n_estimators = 100)

    model_with_real_data.fit(final_data[:, 0].reshape(-1, 1), final_data[:, 1].reshape(-1, 1))
    model_with_sampled_data.fit(samples[:, 0].reshape(-1, 1), samples[:, 1].reshape(-1, 1))

    pred_data_real = model_with_real_data.predict(final_data[:, 0].reshape(-1, 1))
    pred_data_sampled = model_with_sampled_data.predict(final_data[:, 0].reshape(-1, 1))

    accuracy = accuracy_score(pred_data_real,pred_data_sampled)

    if max_accuracy_rf < accuracy:
      max_accuracy_rf = accuracy

  print(max_accuracy_rf) #0.3

In [23]:
guassianNB_accuracy(flow)
guassianNB_accuracy(flow_cl)
guassianNB_accuracy(flow_l)

0.8
0.6466666666666666


torch.linalg.solve has its arguments reversed and does not return the LU factorization.
To get the LU factorization see torch.lu, which can be used with torch.lu_solve or torch.lu_unpack.
X = torch.solve(B, A).solution
should be replaced with
X = torch.linalg.solve(A, B) (Triggered internally at  ../aten/src/ATen/native/BatchLinearAlgebra.cpp:859.)
  outputs, lu = torch.solve(outputs.t(), self._weight)  # Linear-system solver.


0.7733333333333333


In [24]:
logistic_regression_accuracy(flow)
logistic_regression_accuracy(flow_cl)
logistic_regression_accuracy(flow_l)

0.7666666666666667
0.6533333333333333
0.74


In [25]:
knn_accuracy(flow)
knn_accuracy(flow_cl)
knn_accuracy(flow_l)

0.8666666666666667
0.5066666666666667
0.7666666666666667


In [26]:
random_forest_accuracy(flow)
random_forest_accuracy(flow_cl)
random_forest_accuracy(flow_l)

0.76
0.5533333333333333
0.7533333333333333
