diff --git a/examples/benchmark_vegas.py b/examples/benchmark_vegas.py
new file mode 100644
index 0000000..d830c5d
--- /dev/null
+++ b/examples/benchmark_vegas.py
@@ -0,0 +1,93 @@
+import vegas
+import numpy as np
+import gvar
+import torch
+
+dim = 4
+nitn = 10
+ninc = 1000
+
+
+@vegas.lbatchintegrand
+def f_batch(x):
+    dx2 = 0.0
+    for d in range(dim):
+        dx2 += (x[:, d] - 0.5) ** 2
+    return np.exp(-200 * dx2)
+    # ans = np.empty((x.shape[0], 3), float)
+    # dx2 = 0.0
+    # for d in range(dim):
+    #     dx2 += (x[:, d] - 0.5) ** 2
+    # ans[:, 0] = np.exp(-200 * dx2)
+    # ans[:, 1] = x[:, 0] * ans[:, 0]
+    # ans[:, 2] = x[:, 0] ** 2 * ans[:, 0]
+    # return ans
+
+
+def smc(f, map, neval, dim):
+    "integrates f(y) over dim-dimensional unit hypercube"
+    y = np.random.uniform(0, 1, (neval, dim))
+    jac = np.empty(y.shape[0], float)
+    x = np.empty(y.shape, float)
+    map.map(y, x, jac)
+    fy = jac * f(x)
+    return (np.average(fy), np.std(fy) / neval**0.5)
+
+
+def mc(f, neval, dim):
+    "integrates f(y) over dim-dimensional unit hypercube"
+    y = np.random.uniform(0, 1, (neval, dim))
+    fy = f(y)
+    return (np.average(fy), np.std(fy) / neval**0.5)
+
+
+m = vegas.AdaptiveMap(dim * [[0, 1]], ninc=ninc)
+ny = 20000
+# torch.manual_seed(0)
+# y = torch.rand((ny, dim), dtype=torch.float64).numpy()
+y = np.random.uniform(0.0, 1.0, (ny, dim))  # 1000 random y's
+
+x = np.empty(y.shape, float)  # work space
+jac = np.empty(y.shape[0], float)
+f2 = np.empty(y.shape[0], float)
+
+for itn in range(10):  # 5 iterations to adapt
+    m.map(y, x, jac)  # compute x's and jac
+
+    f2 = (jac * f_batch(x)) ** 2
+    m.add_training_data(y, f2)  # adapt
+    # if itn == 0:
+    #     print(np.array(memoryview(m.sum_f)))
+    #     print(np.array(memoryview(m.n_f)))
+    m.adapt(alpha=0.5)
+
+
+# with map
+r = smc(f_batch, m, 50_000, dim)
+print("   SMC + map:", f"{r[0]} +- {r[1]}")
+
+# without map
+r = mc(f_batch, 50_000, dim)
+print("SMC (no map):", f"{r[0]} +- {r[1]}")
+
+
+# vegas with adaptive stratified sampling
+print("VEGAS using adaptive stratified sampling")
+integ = vegas.Integrator(dim * [[0, 1]])
+training = integ(f_batch, nitn=10, neval=20000)  # adapt grid
+
+# final analysis
+result = integ(f_batch, nitn=1, neval=50_000, adapt=False)
+print(result)
+result = integ(f_batch, nitn=5, neval=10_000, adapt=False)
+print(result)
+result = integ(f_batch, nitn=5, neval=10_000)
+print(result)
+# print("I[0] =", result[0], "  I[1] =", result[1], "  I[2] =", result[2])
+# print("Q = %.2f\n" % result.Q)
+# print("<x> =", result[1] / result[0])
+# print(
+#     "sigma_x**2 = <x**2> - <x>**2 =",
+#     result[2] / result[0] - (result[1] / result[0]) ** 2,
+# )
+# print("\ncorrelation matrix:\n", gv.evalcorr(result))
diff --git a/src/base.py b/src/base.py
new file mode 100644
index 0000000..07055b1
--- /dev/null
+++ b/src/base.py
@@ -0,0 +1,54 @@
+import torch
+from torch import nn
+import numpy as np
+
+
+class BaseDistribution(nn.Module):
+    """
+    Base distribution of a flow-based model
+    Parameters do not depend of target variable (as is the case for a VAE encoder)
+    """
+
+    def __init__(self, bounds, device="cpu", dtype=torch.float64):
+        super().__init__()
+        self.dtype = dtype
+        # self.bounds = bounds
+        if isinstance(bounds, (list, np.ndarray)):
+            self.bounds = torch.tensor(bounds, dtype=dtype, device=device)
+        elif isinstance(bounds, torch.Tensor):
+            self.bounds = bounds
+        else:
+            raise ValueError("Unsupported map specification")
+        self.dim = self.bounds.shape[0]
+        self.device = device
+
+    def sample(self, nsamples=1, **kwargs):
+        """Samples from base distribution
+
+        Args:
+          num_samples: Number of samples to draw from the distriubtion
+
+        Returns:
+          Samples drawn from the distribution
+        """
+        raise NotImplementedError
+
+
+class Uniform(BaseDistribution):
+    """
+    Multivariate uniform distribution
+    """
+
+    def __init__(self, bounds, device="cpu", dtype=torch.float64):
+        super().__init__(bounds, device, dtype)
+        self._rangebounds = self.bounds[:, 1] - self.bounds[:, 0]
+
+    def sample(self, nsamples=1, **kwargs):
+        # torch.manual_seed(0) # test seed
+        u = (
+            torch.rand((nsamples, self.dim), device=self.device, dtype=self.dtype)
+            * self._rangebounds
+            + self.bounds[:, 0]
+        )
+        log_detJ = torch.log(self._rangebounds).sum().repeat(nsamples)
+        return u, log_detJ
diff --git a/src/integrators.py b/src/integrators.py
index 7084970..4b93ad9 100644
--- a/src/integrators.py
+++ b/src/integrators.py
@@ -1,214 +1,261 @@
 from typing import Callable, Union, List, Tuple, Dict
 import torch
 from utils import RAvg
-from maps import Map, Affine
+from maps import Map, Linear, CompositeMap
+from base import Uniform
 import gvar
+import numpy as np
 
 
 class Integrator:
     """
-    Base class for all integrators.
+    Base class for all integrators. This class is designed to handle integration tasks
+    over a specified domain (bounds) using a sampling method (q0) and optional
+    transformation maps.
     """
 
     def __init__(
         self,
-        # bounds: Union[List[Tuple[float, float]], np.ndarray],
-        map,
+        maps=None,
+        bounds=None,
+        q0=None,
         neval: int = 1000,
-        batch_size: int = None,
+        nbatch: int = None,
         device="cpu",
-        # device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
+        dtype=torch.float64,
     ):
-        if not isinstance(map, Map):
-            map = Affine(map)
-
-        self.dim = map.dim
-        self.map = map
+        self.dtype = dtype
+        if maps:
+            if not self.dtype == maps.dtype:
+                raise ValueError("Float type of maps should be same as integrator.")
+            self.bounds = maps.bounds
+        else:
+            if not isinstance(bounds, (list, np.ndarray)):
+                raise TypeError("bounds must be a list or a NumPy array.")
+            self.bounds = torch.tensor(bounds, dtype=dtype, device=device)
+
+        self.dim = len(self.bounds)
+        if not q0:
+            q0 = Uniform(self.bounds, device=device, dtype=dtype)
+        self.q0 = q0
+        self.maps = maps
         self.neval = neval
-        if batch_size is None:
-            self.batch_size = neval
+        if nbatch is None:
+            self.nbatch = neval
             self.neval = neval
         else:
-            self.batch_size = batch_size
-            self.neval = -(-neval // batch_size) * batch_size
+            self.nbatch = nbatch
+            self.neval = -(-neval // nbatch) * nbatch
 
         self.device = device
 
     def __call__(self, f: Callable, **kwargs):
         raise NotImplementedError("Subclasses must implement this method")
 
+    def sample(self, nsample, **kwargs):
+        u, log_detJ = self.q0.sample(nsample)
+        if not self.maps:
+            return u, log_detJ
+        else:
+            u, log_detj = self.maps.forward(u)
+            return u, log_detJ + log_detj
+
 
 class MonteCarlo(Integrator):
     def __init__(
         self,
-        map,
-        nitn: int = 10,
+        maps=None,
+        bounds=None,
+        q0=None,
         neval: int = 1000,
-        batch_size: int = None,
+        nbatch: int = None,
         device="cpu",
-        adapt=False,
-        alpha=0.5,
+        dtype=torch.float64,
     ):
-        super().__init__(map, neval, batch_size, device)
-        self.adapt = adapt
-        self.alpha = alpha
-        self.nitn = nitn
+        super().__init__(maps, bounds, q0, neval, nbatch, device, dtype)
 
     def __call__(self, f: Callable, **kwargs):
-        u = torch.rand(self.batch_size, self.dim, device=self.device)
-        x, _ = self.map.forward(u)
+        x, _ = self.sample(1)
         f_values = f(x)
-        f_size = len(f_values) if isinstance(f_values, (list, tuple)) else 1
-        type_fval = f_values.dtype if f_size == 1 else type(f_values[0].dtype)
-
-        mean = torch.zeros(f_size, dtype=type_fval, device=self.device)
-        var = torch.zeros(f_size, dtype=type_fval, device=self.device)
-        # var = torch.zeros((f_size, f_size), dtype=type_fval, device=self.device)
-
-        result = RAvg(weighted=self.adapt)
-        epoch = self.neval // self.batch_size
-
-        for itn in range(self.nitn):
-            mean[:] = 0
-            var[:] = 0
-            for _ in range(epoch):
-                y = torch.rand(
-                    self.batch_size, self.dim, dtype=torch.float64, device=self.device
-                )
-                x, jac = self.map.forward(y)
-
-                f_values = f(x)
-                batch_results = self._multiply_by_jacobian(f_values, jac)
+        if isinstance(f_values, (list, tuple)) and isinstance(
+            f_values[0], torch.Tensor
+        ):
+            f_size = len(f_values)
+            type_fval = f_values[0].dtype
+        elif isinstance(f_values, torch.Tensor):
+            f_size = 1
+            type_fval = f_values.dtype
+        else:
+            raise TypeError(
+                "f must return a torch.Tensor or a list/tuple of torch.Tensor."
+            )
 
-                mean += torch.mean(batch_results, dim=-1) / epoch
-                var += torch.var(batch_results, dim=-1) / (self.neval * epoch)
+        epoch = self.neval // self.nbatch
+        values = torch.zeros((self.nbatch, f_size), dtype=type_fval, device=self.device)
 
-                if self.adapt:
-                    self.map.add_training_data(y, batch_results**2)
+        for iepoch in range(epoch):
+            x, log_detJ = self.sample(self.nbatch)
+            f_values = f(x)
+            batch_results = self._multiply_by_jacobian(f_values, torch.exp(log_detJ))
 
-            result.sum_neval += self.neval
-            result.add(gvar.gvar(mean.item(), (var**0.5).item()))
-            if self.adapt:
-                self.map.adapt(alpha=self.alpha)
+            values += batch_results / epoch
 
-        return result
+        results = np.array([RAvg() for _ in range(f_size)])
+        for i in range(f_size):
+            _mean = values[:, i].mean().item()
+            _std = values[:, i].std().item() / self.nbatch**0.5
+            results[i].add(gvar.gvar(_mean, _std))
+        if f_size == 1:
+            return results[0]
+        else:
+            return results
 
     def _multiply_by_jacobian(self, values, jac):
         # if isinstance(values, dict):
         #     return {k: v * torch.exp(log_det_J) for k, v in values.items()}
         if isinstance(values, (list, tuple)):
-            return torch.stack([v * jac for v in values])
+            return torch.stack([v * jac for v in values], dim=-1)
         else:
-            return values * jac
+            return torch.stack([values * jac], dim=-1)
+
+
+def random_walk(dim, bounds, device, dtype, u, **kwargs):
+    rangebounds = bounds[:, 1] - bounds[:, 0]
+    step_size = kwargs.get("step_size", 0.2)
+    step_sizes = rangebounds * step_size
+    step = torch.empty(dim, device=device, dtype=dtype).uniform_(-1, 1) * step_sizes
+    new_u = (u + step - bounds[:, 0]) % rangebounds + bounds[:, 0]
+    return new_u
+
+
+def uniform(dim, bounds, device, dtype, u, **kwargs):
+    rangebounds = bounds[:, 1] - bounds[:, 0]
+    return torch.rand_like(u) * rangebounds + bounds[:, 0]
+
+
+def gaussian(dim, bounds, device, dtype, u, **kwargs):
+    mean = kwargs.get("mean", torch.zeros_like(u))
+    std = kwargs.get("std", torch.ones_like(u))
+    return torch.normal(mean, std)
 
 
 class MCMC(MonteCarlo):
     def __init__(
         self,
-        map: Map,
-        nitn: int = 10,
+        maps=None,
+        bounds=None,
+        q0=None,
         neval=10000,
-        batch_size=None,
-        n_burnin=500,
+        nbatch=None,
+        nburnin=500,
         device="cpu",
-        adapt=False,
-        alpha=0.5,
+        dtype=torch.float64,
     ):
-        super().__init__(map, nitn, neval, batch_size, device, adapt, alpha)
-        self.n_burnin = n_burnin
+        super().__init__(maps, bounds, q0, neval, nbatch, device, dtype)
+        self.nburnin = nburnin
+        if maps is None:
+            self.maps = Linear([(0, 1)] * self.dim, device=device)
+        self._rangebounds = self.bounds[:, 1] - self.bounds[:, 0]
 
     def __call__(
         self,
         f: Callable,
-        proposal_dist="global_uniform",
+        proposal_dist: Callable = uniform,
         thinning=1,
-        mix_rate=0.0,
+        mix_rate=0.5,
         **kwargs,
     ):
         epsilon = 1e-16  # Small value to ensure numerical stability
-        vars_shape = (self.batch_size, self.dim)
-        current_y = torch.rand(vars_shape, dtype=torch.float64, device=self.device)
-        current_x, current_jac = self.map.forward(current_y)
+        epoch = self.neval // self.nbatch
+        current_y, current_jac = self.q0.sample(self.nbatch)
+        current_x, detJ = self.maps.forward(current_y)
+        current_jac += detJ
+        current_jac = torch.exp(current_jac)
         current_fval = f(current_x)
+        if isinstance(current_fval, (list, tuple)) and isinstance(
+            current_fval[0], torch.Tensor
+        ):
+            f_size = len(current_fval)
+            current_fval = sum(current_fval)
+
+            def _integrand(x):
+                return sum(f(x))
+        elif isinstance(current_fval, torch.Tensor):
+            f_size = 1
+
+            def _integrand(x):
+                return f(x)
+        else:
+            raise TypeError(
+                "f must return a torch.Tensor or a list/tuple of torch.Tensor."
+            )
+        type_fval = current_fval.dtype
+
         current_weight = mix_rate / current_jac + (1 - mix_rate) * current_fval.abs()
         current_weight.masked_fill_(current_weight < epsilon, epsilon)
-        # current_fval.masked_fill_(current_fval.abs() < epsilon, epsilon)
-
-        proposed_y = torch.empty_like(current_y)
-        proposed_x = torch.empty_like(proposed_y)
-        new_fval = torch.empty_like(current_fval)
-        new_weight = torch.empty_like(current_weight)
-
-        f_size = len(current_fval) if isinstance(current_fval, (list, tuple)) else 1
-        type_fval = current_fval.dtype if f_size == 1 else type(current_fval[0].dtype)
-        mean = torch.zeros(f_size, dtype=type_fval, device=self.device)
-        mean_ref = torch.zeros_like(mean)
-        var = torch.zeros(f_size, dtype=type_fval, device=self.device)
-        var_ref = torch.zeros_like(mean)
-
-        result = RAvg(weighted=self.adapt)
-        result_ref = RAvg(weighted=self.adapt)
-
-        epoch = self.neval // self.batch_size
-        n_meas = 0
-        for itn in range(self.nitn):
-            for i in range(epoch):
-                proposed_y[:] = self._propose(current_y, proposal_dist, **kwargs)
-                proposed_x[:], new_jac = self.map.forward(proposed_y)
-
-                new_fval[:] = f(proposed_x)
-                new_weight = mix_rate / new_jac + (1 - mix_rate) * new_fval.abs()
-
-                acceptance_probs = new_weight / current_weight * new_jac / current_jac
-
-                accept = (
-                    torch.rand(self.batch_size, dtype=torch.float64, device=self.device)
-                    <= acceptance_probs
+
+        n_meas = epoch // thinning
+
+        def one_step(current_y, current_x, current_weight, current_jac):
+            proposed_y = proposal_dist(
+                self.dim, self.bounds, self.device, self.dtype, current_y, **kwargs
+            )
+            proposed_x, new_jac = self.maps.forward(proposed_y)
+            new_jac = torch.exp(new_jac)
+
+            new_fval = _integrand(proposed_x)
+            new_weight = mix_rate / new_jac + (1 - mix_rate) * new_fval.abs()
+            new_weight.masked_fill_(new_weight < epsilon, epsilon)
+
+            acceptance_probs = new_weight / current_weight * new_jac / current_jac
+
+            accept = (
+                torch.rand(self.nbatch, dtype=self.dtype, device=self.device)
+                <= acceptance_probs
+            )
+
+            current_y = torch.where(accept.unsqueeze(1), proposed_y, current_y)
+            # current_fval = torch.where(accept, new_fval, current_fval)
+            current_x = torch.where(accept.unsqueeze(1), proposed_x, current_x)
+            current_weight = torch.where(accept, new_weight, current_weight)
+            current_jac = torch.where(accept, new_jac, current_jac)
+            return current_y, current_x, current_weight, current_jac
+
+        for i in range(self.nburnin):
+            current_y, current_x, current_weight, current_jac = one_step(
+                current_y, current_x, current_weight, current_jac
+            )
+
+        values = torch.zeros((self.nbatch, f_size), dtype=type_fval, device=self.device)
+        refvalues = torch.zeros(self.nbatch, dtype=type_fval, device=self.device)
+
+        for imeas in range(n_meas):
+            for j in range(thinning):
+                current_y, current_x, current_weight, current_jac = one_step(
+                    current_y, current_x, current_weight, current_jac
                 )
 
-                current_y = torch.where(accept.unsqueeze(1), proposed_y, current_y)
-                current_fval = torch.where(accept, new_fval, current_fval)
-                current_weight = torch.where(accept, new_weight, current_weight)
-                current_jac = torch.where(accept, new_jac, current_jac)
-
-                if i < self.n_burnin and (self.adapt or itn == 0):
-                    continue
-                elif i % thinning == 0:
-                    n_meas += 1
-                    batch_results = current_fval / current_weight
-
-                    mean += torch.mean(batch_results, dim=-1) / epoch
-                    var += torch.var(batch_results, dim=-1) / epoch
-
-                    batch_results_ref = 1 / (current_jac * current_weight)
-                    mean_ref += torch.mean(batch_results_ref, dim=-1) / epoch
-                    var_ref += torch.var(batch_results_ref, dim=-1) / epoch
-
-                    if self.adapt:
-                        self.map.add_training_data(
-                            current_y, (current_fval * current_jac) ** 2
-                        )
-            result.sum_neval += self.neval
-            result.add(gvar.gvar(mean.item(), ((var / n_meas) ** 0.5).item()))
-            result_ref.sum_neval += self.batch_size
-            result_ref.add(
-                gvar.gvar(mean_ref.item(), ((var_ref / n_meas) ** 0.5).item())
+            batch_results = self._multiply_by_jacobian(
+                f(current_x), 1.0 / current_weight
             )
+            batch_results_ref = 1 / (current_jac * current_weight)
+
+            values += batch_results / n_meas
+            refvalues += batch_results_ref / n_meas
+
+        results = np.array([RAvg() for _ in range(f_size)])
+        results_ref = RAvg()
+
+        mean_ref = refvalues.mean().item()
+        std_ref = refvalues.std().item() / self.nbatch**0.5
+
+        results_ref.add(gvar.gvar(mean_ref, std_ref))
+        for i in range(f_size):
+            _mean = values[:, i].mean().item()
+            _std = values[:, i].std().item() / self.nbatch**0.5
+            results[i].add(gvar.gvar(_mean, _std))
 
-            if self.adapt:
-                self.map.adapt(alpha=self.alpha)
-
-        return result / result_ref
-
-    def _propose(self, u, proposal_dist, **kwargs):
-        if proposal_dist == "random_walk":
-            step_size = kwargs.get("step_size", 0.2)
-            return (u + (torch.rand_like(u) - 0.5) * step_size) % 1.0
-        elif proposal_dist == "global_uniform":
-            return torch.rand_like(u)
-        # elif proposal_dist == "gaussian":
-        #     mean = kwargs.get("mean", torch.zeros_like(u))
-        #     std = kwargs.get("std", torch.ones_like(u))
-        #     return torch.normal(mean, std)
+        if f_size == 1:
+            return results[0] / results_ref * self._rangebounds.prod()
         else:
-            raise ValueError(f"Unknown proposal distribution: {proposal_dist}")
+            return results / results_ref * self._rangebounds.prod().item()
diff --git a/src/maps.py b/src/maps.py
index 9ab2881..02f5fb1 100644
--- a/src/maps.py
+++ b/src/maps.py
@@ -1,81 +1,93 @@
-import torch
 import numpy as np
+import torch
+from torch import nn
+from base import Uniform
+import sys
+
+TINY = 10 ** (sys.float_info.min_10_exp + 50)
 
 
-class Map:
-    def __init__(self, map_spec, device="cpu"):
-        # if isinstance(map_spec, dict):
-        #     self.map_spec = {
-        #         k: torch.tensor(v, device=device) for k, v in map_spec.items()
-        #     }
-        if isinstance(map_spec, (list, np.ndarray)):
-            self.map_spec = torch.tensor(map_spec, dtype=torch.float64, device=device)
+class Map(nn.Module):
+    def __init__(self, bounds, device="cpu", dtype=torch.float64):
+        super().__init__()
+        if isinstance(bounds, (list, np.ndarray)):
+            self.bounds = torch.tensor(bounds, dtype=dtype, device=device)
         else:
             raise ValueError("Unsupported map specification")
-        self.dim = self.map_spec.shape[0]
+        self.dim = self.bounds.shape[0]
         self.device = device
+        self.dtype = dtype
 
-    def forward(self, y):
+    def forward(self, u):
         raise NotImplementedError("Subclasses must implement this method")
 
     def inverse(self, x):
         raise NotImplementedError("Subclasses must implement this method")
 
-    def log_det_jacobian(self, y):
-        raise NotImplementedError("Subclasses must implement this method")
-
 
-class Affine(Map):
-    def __init__(self, map_spec, device="cpu"):
-        super().__init__(map_spec, device)
-        self._A = self.map_spec[:, 1] - self.map_spec[:, 0]
-        self._jac1 = torch.prod(self._A)
+class CompositeMap(Map):
+    def __init__(self, maps, device="cpu", dtype=torch.float64):
+        if not maps:
+            raise ValueError("Maps can not be empty.")
+        super().__init__(maps[-1].bounds, device, dtype)
+        self.maps = maps
 
-    def forward(self, y):
-        return y * self._A + self.map_spec[:, 0], self._jac1.repeat(y.shape[0])
+    def forward(self, u):
+        log_detJ = torch.zeros(len(u), device=u.device, dtype=self.dtype)
+        for map in self.maps:
+            u, log_detj = map.forward(u)
+            log_detJ += log_detj
+        return u, log_detJ
 
     def inverse(self, x):
-        return (x - self.map_spec[:, 0]) / self._A, self._jac1.repeat(x.shape[0])
+        log_detJ = torch.zeros(len(x), device=x.device, dtype=self.dtype)
+        for i in range(len(self.maps) - 1, -1, -1):
+            x, log_detj = self.maps[i].inverse(x)
+            log_detJ += log_detj
+        return x, log_detJ
 
-    def log_det_jacobian(self, y):
-        return torch.log(self._jac1) * y.shape[0]
 
+class Linear(Map):
+    def __init__(self, bounds, device="cpu", dtype=torch.float64):
+        super().__init__(bounds, device, dtype)
+        self._A = self.bounds[:, 1] - self.bounds[:, 0]
+        self._jac1 = torch.prod(self._A)
 
-class AdaptiveMap(Map):
-    def __init__(self, map_spec, alpha=0.5, device="cpu"):
-        super().__init__(map_spec, device)
-        self.alpha = alpha
-
-    def add_training_data(self, y, f):
-        pass
+    def forward(self, u):
+        return u * self._A + self.bounds[:, 0], torch.log(self._jac1.repeat(u.shape[0]))
 
-    def adapt(self, alpha=0.0):
-        pass
+    def inverse(self, x):
+        return (x - self.bounds[:, 0]) / self._A, torch.log(
+            self._jac1.repeat(x.shape[0])
+        )
 
 
-class Vegas(AdaptiveMap):
-    def __init__(self, map_spec, ninc=1000, alpha=0.5, device="cpu"):
-        super().__init__(map_spec, alpha, device)
+class Vegas(Map):
+    def __init__(self, bounds, ninc=1000, alpha=0.5, device="cpu", dtype=torch.float64):
+        super().__init__(bounds, device, dtype)
         # self.nbin = nbin
-
+        self.alpha = alpha
         if isinstance(ninc, int):
             self.ninc = torch.ones(self.dim, dtype=torch.int32, device=device) * ninc
         else:
             self.ninc = torch.tensor(ninc, dtype=torch.int32, device=device)
 
         self.inc = torch.empty(
-            self.dim, self.ninc.max(), dtype=torch.float64, device=self.device
+            self.dim, self.ninc.max(), dtype=self.dtype, device=self.device
         )
         self.grid = torch.empty(
-            self.dim, self.ninc.max() + 1, dtype=torch.float64, device=self.device
+            self.dim, self.ninc.max() + 1, dtype=self.dtype, device=self.device
         )
 
+        self._A = self.bounds[:, 1] - self.bounds[:, 0]
+        self._jaclinear = torch.prod(self._A)
+
         for d in range(self.dim):
             self.grid[d, : self.ninc[d] + 1] = torch.linspace(
-                self.map_spec[d, 0],
-                self.map_spec[d, 1],
+                self.bounds[d, 0],
+                self.bounds[d, 1],
                 self.ninc[d] + 1,
-                dtype=torch.float64,
+                dtype=self.dtype,
                 device=self.device,
             )
             self.inc[d, : self.ninc[d]] = (
@@ -83,8 +95,29 @@ def __init__(self, map_spec, ninc=1000, alpha=0.5, device="cpu"):
             )
         self.clear()
 
-    def add_training_data(self, y, f):
-        """Add training data ``f`` for ``y``-space points ``y``.
+    def train(self, nsamples, f, epoch=5, alpha=0.5):
+        q0 = Uniform(self.bounds, device=self.device, dtype=self.dtype)
+        u, log_detJ0 = q0.sample(nsamples)
+
+        fval = f(u)
+        f_size = len(fval) if isinstance(fval, (list, tuple)) else 1
+        if f_size > 1:
+
+            def _integrand(x):
+                return sum(f(x))
+        else:
+
+            def _integrand(x):
+                return f(x)
+
+        for _ in range(epoch):
+            x, log_detJ = self.forward(u)
+            f2 = torch.exp(2 * (log_detJ + log_detJ0)) * _integrand(x) ** 2
+            self.add_training_data(u, f2)
+            self.adapt(alpha)
+
+    def add_training_data(self, u, fval):
+        """Add training data ``f`` for ``u``-space points ``u``.
 
         Accumulates training data for later use by ``self.adapt()``.
         Grid increments will be made smaller in regions where
@@ -93,19 +126,21 @@ def add_training_data(self, y, f):
         when ``f`` is constant across the grid.
 
         Args:
-            y (tensor): ``y`` values corresponding to the training data.
-                ``y`` is a contiguous 2-d tensor, where ``y[j, d]``
+            u (tensor): ``u`` values corresponding to the training data.
+                ``u`` is a contiguous 2-d tensor, where ``u[j, d]``
                 is for points along direction ``d``.
             f (tensor): Training function values. ``f[j]`` corresponds to
-                point ``y[j, d]`` in ``y``-space.
+                point ``u[j, d]`` in ``u``-space.
         """
         if self.sum_f is None:
             self.sum_f = torch.zeros_like(self.inc)
-            self.n_f = torch.zeros_like(self.inc) + 1e-10
-        iy = torch.floor(y * self.ninc).long()
+            self.n_f = torch.zeros_like(self.inc) + TINY
+        iu = (u - self.bounds[:, 0]) / self._A * self.ninc
+        iu = torch.floor(iu).long()
         for d in range(self.dim):
-            self.sum_f[d, iy[:, d]] += torch.abs(f)
-            self.n_f[d, iy[:, d]] += 1
+            indices = iu[:, d]
+            self.sum_f[d].scatter_add_(0, indices, fval.abs())
+            self.n_f[d].scatter_add_(0, indices, torch.ones_like(fval))
 
     def adapt(self, alpha=0.0):
         """Adapt grid to accumulated training data.
@@ -167,9 +202,9 @@ def adapt(self, alpha=0.0):
                     )
                     sum_f = torch.sum(tmp_f[:ninc])
                     if sum_f > 0:
-                        avg_f[:ninc] = tmp_f[:ninc] / sum_f + 1e-10
+                        avg_f[:ninc] = tmp_f[:ninc] / sum_f + TINY
                     else:
-                        avg_f[:ninc] = 1e-10
+                        avg_f[:ninc] = TINY
                     avg_f[:ninc] = (
                         -(1 - avg_f[:ninc]) / torch.log(avg_f[:ninc])
                     ) ** alpha
@@ -218,101 +253,80 @@ def clear(self):
         self.n_f = None
 
     @torch.no_grad()
-    def forward(self, y):
-        y = y.to(self.device)
-        y_ninc = y * self.ninc
-        iy = torch.floor(y_ninc).long()
-        dy_ninc = y_ninc - iy
-
-        x = torch.empty_like(y)
-        jac = torch.ones(y.shape[0], device=x.device)
+    def forward(self, u):
+        u = u.to(self.device)
+        u_ninc = u * self.ninc
+        # iu = torch.floor(u_ninc).long()
+        iu = (u - self.bounds[:, 0]) / self._A * self.ninc
+        iu = torch.floor(iu).long()
+        du_ninc = u_ninc - torch.floor(u_ninc).long()
+
+        x = torch.empty_like(u)
+        jac = torch.ones(u.shape[0], device=x.device)
         # self.jac.fill_(1.0)
         for d in range(self.dim):
-            # Handle the case where iy < ninc
+            # Handle the case where iu < ninc
             ninc = self.ninc[d]
-            mask = iy[:, d] < ninc
+            mask = iu[:, d] < ninc
             if mask.any():
                 x[mask, d] = (
-                    self.grid[d, iy[mask, d]]
-                    + self.inc[d, iy[mask, d]] * dy_ninc[mask, d]
+                    self.grid[d, iu[mask, d]]
+                    + self.inc[d, iu[mask, d]] * du_ninc[mask, d]
                 )
-                jac[mask] *= self.inc[d, iy[mask, d]] * ninc
+                jac[mask] *= self.inc[d, iu[mask, d]] * ninc
 
-            # Handle the case where iy >= ninc
+            # Handle the case where iu >= ninc
             mask_inv = ~mask
             if mask_inv.any():
                 x[mask_inv, d] = self.grid[d, ninc]
                 jac[mask_inv] *= self.inc[d, ninc - 1] * ninc
 
-        return x, jac
+        return x, torch.log(jac / self._jaclinear)
 
     @torch.no_grad()
     def inverse(self, x):
         # self.jac.fill_(1.0)
         x = x.to(self.device)
-        y = torch.empty_like(x)
+        u = torch.empty_like(x)
         jac = torch.ones(x.shape[0], device=x.device)
         for d in range(self.dim):
             ninc = self.ninc[d]
-            iy = torch.searchsorted(self.grid[d, :], x[:, d].contiguous(), right=True)
+            iu = torch.searchsorted(self.grid[d, :], x[:, d].contiguous(), right=True)
 
-            mask_valid = (iy > 0) & (iy <= ninc)
-            mask_lower = iy <= 0
-            mask_upper = iy > ninc
+            mask_valid = (iu > 0) & (iu <= ninc)
+            mask_lower = iu <= 0
+            mask_upper = iu > ninc
 
-            # Handle valid range (0 < iy <= ninc)
+            # Handle valid range (0 < iu <= ninc)
             if mask_valid.any():
-                iyi_valid = iy[mask_valid] - 1
-                y[mask_valid, d] = (
-                    iyi_valid
-                    + (x[mask_valid, d] - self.grid[d, iyi_valid])
-                    / self.inc[d, iyi_valid]
+                iui_valid = iu[mask_valid] - 1
+                u[mask_valid, d] = (
+                    iui_valid
+                    + (x[mask_valid, d] - self.grid[d, iui_valid])
+                    / self.inc[d, iui_valid]
                 ) / ninc
-                jac[mask_valid] *= self.inc[d, iyi_valid] * ninc
+                jac[mask_valid] *= self.inc[d, iui_valid] * ninc
 
-            # Handle lower bound (iy <= 0)\
+            # Handle lower bound (iu <= 0)\
             if mask_lower.any():
-                y[mask_lower, d] = 0.0
+                u[mask_lower, d] = 0.0
                 jac[mask_lower] *= self.inc[d, 0] * ninc
 
-            # Handle upper bound (iy > ninc)
+            # Handle upper bound (iu > ninc)
             if mask_upper.any():
-                y[mask_upper, d] = 1.0
+                u[mask_upper, d] = 1.0
                 jac[mask_upper] *= self.inc[d, ninc - 1] * ninc
 
-        return y, jac
-
-    @torch.no_grad()
-    def log_det_jacobian(self, y):
-        y = y.to(self.device)
-        y_ninc = y * self.ninc
-        iy = torch.floor(y_ninc).long()
-
-        jac = torch.ones(y.shape[0], device=x.device)
-        for d in range(self.dim):
-            # Handle the case where iy < ninc
-            mask = iy[:, d] < self.ninc
-            if mask.any():
-                jac[mask] *= self.inc[d, iy[mask, d]] * self.ninc
-
-            # Handle the case where iy >= ninc
-            mask_inv = ~mask
-            if mask_inv.any():
-                jac[mask_inv] *= self.inc[d, self.ninc - 1] * self.ninc
+        return u, torch.log(jac / self._jaclinear)
 
-        return torch.sum(torch.log(jac), dim=-1)
 
+# class NormalizingFlow(Map):
+#     def __init__(self, bounds, flow_model, device="cpu"):
+#         super().__init__(bounds, device)
+#         self.flow_model = flow_model.to(device)
 
-class NormalizingFlow(AdaptiveMap):
-    def __init__(self, map_spec, flow_model, alpha=0.5, device="cpu"):
-        super().__init__(map_spec, alpha, device)
-        self.flow_model = flow_model.to(device)
-
-    def forward(self, u):
-        return self.flow_model.forward(u)[0]
-
-    def inverse(self, x):
-        return self.flow_model.inverse(x)[0]
+#     def forward(self, u):
+#         return self.flow_model.forward(u)
 
-    def log_det_jacobian(self, u):
-        return self.flow_model.forward(u)[1]
+#     def inverse(self, x):
+#         return self.flow_model.inverse(x)
diff --git a/src/mc_test.py b/src/mc_test.py
index 3219f62..9172c8b 100644
--- a/src/mc_test.py
+++ b/src/mc_test.py
@@ -1,28 +1,19 @@
 import torch
 from integrators import MonteCarlo, MCMC
-from maps import Vegas, Affine
+from maps import Vegas, Linear
 from utils import set_seed, get_device
 
 set_seed(42)
-device = get_device()
+# device = get_device()
+device = torch.device("cpu")
 
 
 def test_nothing():
-    pass  
+    pass
+
 
 def unit_circle_integrand(x):
     inside_circle = (x[:, 0] ** 2 + x[:, 1] ** 2 < 1).double()
-    # return {
-    #     "scalar": inside_circle,
-    #     "vector": torch.stack([inside_circle, 2 * inside_circle], dim=1),
-    #     "matrix": torch.stack(
-    #         [
-    #             inside_circle.unsqueeze(1).repeat(1, 3),
-    #             (2 * inside_circle).unsqueeze(1).repeat(1, 3),
-    #         ],
-    #         dim=1,
-    #     ),
-    # }
     return inside_circle
 
 
@@ -30,48 +21,201 @@ def half_sphere_integrand(x):
     return torch.clamp(1 - (x[:, 0] ** 2 + x[:, 1] ** 2), min=0) * 2
 
 
-dim = 2
-map_spec = [(-1, 1), (-1, 1)]
+def integrand_list(x):
+    return [unit_circle_integrand(x), half_sphere_integrand(x) / 2]
 
-affine_map = Affine(map_spec, device=device)
-# vegas_map = Vegas(map_spec, device=device)
 
-# Monte Carlo integration
-print("Calculate the area of the unit circle using Monte Carlo integration...")
+def integrand_list1(x):
+    dx2 = torch.zeros(x.shape[0], dtype=x.dtype, device=x.device)
+    for d in range(4):
+        dx2 += (x[:, d] - 0.5) ** 2
+    f = torch.exp(-200 * dx2)
+    return [f, f * x[:, 0], f * x[:, 0] ** 2]
 
-mc_integrator = MonteCarlo(affine_map, neval=400000, batch_size=1000, device=device)
-res = mc_integrator(unit_circle_integrand)
-print("Plain MC Integral results:")
-print(f"  Integral: {res.mean}")
-print(f"  Error: {res.sdev}")
 
-res = MonteCarlo(map_spec, neval=400000, batch_size=1000, device=device)(
-    unit_circle_integrand
-)
-print("Plain MC Integral results:")
-print(f"  Integral: {res.mean}")
-print(f"  Error: {res.sdev}")
+def sharp_peak(x):
+    dx2 = torch.zeros(x.shape[0], dtype=x.dtype, device=x.device)
+    for d in range(4):
+        dx2 += (x[:, d] - 0.5) ** 2
+    return torch.exp(-200 * dx2)
+
+
+dim = 2
+bounds = [(-1, 1), (-1, 1)]
+n_eval = 400000
+n_batch = 10000
+n_therm = 10
+
+vegas_map = Vegas(bounds, device=device, ninc=10)
+
 
+# True value of pi
+print(f"pi = {torch.pi} \n")
+
+# Start Monte Carlo integration, including plain-MC, MCMC, vegas, and vegas-MCMC
+mc_integrator = MonteCarlo(
+    bounds=bounds,
+    neval=n_eval,
+    nbatch=n_batch,
+    device=device,
+)
 mcmc_integrator = MCMC(
-    map_spec, neval=400000, batch_size=1000, n_burnin=100, device=device
+    bounds=bounds, neval=n_eval, nbatch=n_batch, nburnin=n_therm, device=device
 )
+
+print("Calculate the area of the unit circle f(x1, x2) in the bounds [-1, 1]^2...")
+res = mc_integrator(unit_circle_integrand)
+print("Plain MC Integral results: ", res)
+
 res = mcmc_integrator(unit_circle_integrand, mix_rate=0.5)
-print("MCMC Integral results:")
-print(f"  Integral: {res.mean}")
-print(f"  Error: {res.sdev}")
+print("MCMC Integral results: ", res)
+
+vegas_map.train(20000, unit_circle_integrand, alpha=0.5)
+vegas_integrator = MonteCarlo(
+    maps=vegas_map,
+    neval=n_eval,
+    nbatch=n_batch,
+    # nbatch=n_eval,
+    device=device,
+)
+res = vegas_integrator(unit_circle_integrand)
+print("VEGAS Integral results: ", res)
+
+vegasmcmc_integrator = MCMC(
+    maps=vegas_map,
+    neval=n_eval,
+    nbatch=n_batch,
+    nburnin=n_therm,
+    device=device,
+)
+res = vegasmcmc_integrator(unit_circle_integrand, mix_rate=0.5)
+print("VEGAS-MCMC Integral results: ", res, "\n")
 
-# True value of pi
-print(f"True value of pi: {torch.pi:.6f}")
+
+print(
+    r"Calculate the integral g(x1, x2) = $2 \max(1-(x_1^2+x_2^2), 0)$ in the bounds [-1, 1]^2..."
+)
 
 res = mc_integrator(half_sphere_integrand)
+print("Plain MC Integral results: ", res)
+
+res = mcmc_integrator(half_sphere_integrand, mix_rate=0.5)
+print("MCMC Integral results:", res)
+
+# train the vegas map
+vegas_map.train(20000, half_sphere_integrand, epoch=10, alpha=0.5)
+
+res = vegas_integrator(half_sphere_integrand)
+print("VEGAS Integral results: ", res)
+
+res = vegasmcmc_integrator(half_sphere_integrand, mix_rate=0.5)
+print("VEGAS-MCMC Integral results: ", res)
+
+
+print("\nCalculate the integral [f(x1, x2), g(x1, x2)/2] in the bounds [-1, 1]^2")
+# Two integrands
+res = mc_integrator(integrand_list)
 print("Plain MC Integral results:")
-print(f"  Integral: {res.mean}")
-print(f"  Error: {res.sdev}")
+print("  Integral 1: ", res[0])
+print("  Integral 2: ", res[1])
 
+res = mcmc_integrator(integrand_list, mix_rate=0.5)
+print("MCMC Integral results:")
+print(f"  Integral 1: ", res[0])
+print(f"  Integral 2: ", res[1])
+
+# print("VEAGS map is trained for g(x1, x2)")
+vegas_map.train(20000, integrand_list, epoch=10, alpha=0.5)
+res = vegas_integrator(integrand_list)
+print("VEGAS Integral results:")
+print("  Integral 1: ", res[0])
+print("  Integral 2: ", res[1])
+
+res = vegasmcmc_integrator(integrand_list, mix_rate=0.5)
+print("VEGAS-MCMC Integral results:")
+print("  Integral 1: ", res[0])
+print("  Integral 2: ", res[1])
+
+print("\nCalculate the integral [h(X), x1 * h(X),  x1^2 * h(X)] in the bounds [0, 1]^4")
+print("h(X) = exp(-200 * (x1^2 + x2^2 + x3^2 + x4^2))")
+
+bounds = [(0, 1)] * 4
+mc_integrator = MonteCarlo(
+    bounds=bounds,
+    neval=n_eval,
+    nbatch=n_batch,
+    # nbatch=n_eval,
+    device=device,
+)
 mcmc_integrator = MCMC(
-    map_spec, neval=400000, batch_size=1000, n_burnin=100, device=device
+    bounds=bounds, neval=n_eval, nbatch=n_batch, nburnin=n_therm, device=device
+)
+print("Plain MC Integral results:")
+res = mc_integrator(integrand_list1)
+print(
+    "  I[0] =",
+    res[0],
+    "  I[1] =",
+    res[1],
+    "  I[2] =",
+    res[2],
+    "  I[1]/I[0] =",
+    res[1] / res[0],
 )
-res = mcmc_integrator(half_sphere_integrand, mix_rate=0.5)
 print("MCMC Integral results:")
-print(f"  Integral: {res.mean}")
-print(f"  Error: {res.sdev}")
+res = mcmc_integrator(integrand_list1, mix_rate=0.5)
+print(
+    "  I[0] =",
+    res[0],
+    "  I[1] =",
+    res[1],
+    "  I[2] =",
+    res[2],
+    "  I[1]/I[0] =",
+    res[1] / res[0],
+)
+
+vegas_map = Vegas(bounds, device=device)
+print("train VEGAS map for h(X)...")
+# vegas_map.train(20000, sharp_peak, epoch=10, alpha=0.5)
+vegas_map.train(20000, integrand_list1, epoch=10, alpha=0.5)
+
+print("VEGAS Integral results:")
+vegas_integrator = MonteCarlo(
+    maps=vegas_map,
+    neval=n_eval,
+    nbatch=n_batch,
+    # nbatch=n_eval,
+    device=device,
+)
+res = vegas_integrator(integrand_list1)
+print(
+    "  I[0] =",
+    res[0],
+    "  I[1] =",
+    res[1],
+    "  I[2] =",
+    res[2],
+    "  I[1]/I[0] =",
+    res[1] / res[0],
+)
+
+print("VEGAS-MCMC Integral results:")
+vegasmcmc_integrator = MCMC(
+    maps=vegas_map,
+    neval=n_eval,
+    nbatch=n_batch,
+    nburnin=n_therm,
+    device=device,
+)
+res = vegasmcmc_integrator(integrand_list1, mix_rate=0.5)
+print(
+    "  I[0] =",
+    res[0],
+    "  I[1] =",
+    res[1],
+    "  I[2] =",
+    res[2],
+    "  I[1]/I[0] =",
+    res[1] / res[0],
+)
diff --git a/src/vegas_test.py b/src/vegas_test.py
new file mode 100644
index 0000000..4ca34a5
--- /dev/null
+++ b/src/vegas_test.py
@@ -0,0 +1,108 @@
+import torch
+from integrators import MonteCarlo, MCMC
+from maps import Vegas, Linear
+from utils import set_seed, get_device
+
+# set_seed(42)
+# device = get_device()
+device = torch.device("cpu")
+
+
+def integrand_list1(x):
+    dx2 = torch.zeros(x.shape[0], dtype=x.dtype, device=x.device)
+    for d in range(4):
+        dx2 += (x[:, d] - 0.5) ** 2
+    f = torch.exp(-200 * dx2)
+    return [f, f * x[:, 0], f * x[:, 0] ** 2]
+
+
+def sharp_peak(x):
+    dx2 = torch.zeros(x.shape[0], dtype=x.dtype, device=x.device)
+    for d in range(4):
+        dx2 += (x[:, d] - 0.5) ** 2
+    return torch.exp(-200 * dx2)
+
+
+def func(x):
+    return torch.log(x[:, 0]) / torch.sqrt(x[:, 0])
+
+
+ninc = 1000
+n_eval = 50000
+n_batch = 10000
+n_therm = 10
+
+print("\nCalculate the integral log(x)/x^0.5 in the bounds [0, 1]")
+
+print("train VEGAS map")
+vegas_map = Vegas([(0, 1)], device=device, ninc=ninc)
+vegas_map.train(20000, func, epoch=10, alpha=0.5)
+
+vegas_integrator = MonteCarlo(
+    maps=vegas_map,
+    neval=1000000,
+    nbatch=n_batch,
+    device=device,
+)
+res = vegas_integrator(func)
+print("VEGAS Integral results: ", res)
+
+vegasmcmc_integrator = MCMC(
+    maps=vegas_map,
+    neval=1000000,
+    nbatch=n_batch,
+    nburnin=n_therm,
+    device=device,
+)
+res = vegasmcmc_integrator(func, mix_rate=0.5)
+print("VEGAS-MCMC Integral results: ", res)
+
+# Start Monte Carlo integration, including plain-MC, MCMC, vegas, and vegas-MCMC
+print("\nCalculate the integral [h(X), x1 * h(X),  x1^2 * h(X)] in the bounds [0, 1]^4")
+print("h(X) = exp(-200 * (x1^2 + x2^2 + x3^2 + x4^2))")
+
+bounds = [(0, 1)] * 4
+vegas_map = Vegas(bounds, device=device, ninc=ninc)
+print("train VEGAS map for h(X)...")
+vegas_map.train(20000, sharp_peak, epoch=10, alpha=0.5)
+# print(vegas_map.extract_grid())
+
+print("VEGAS Integral results:")
+vegas_integrator = MonteCarlo(
+    maps=vegas_map,
+    neval=n_eval,
+    # nbatch=n_batch,
+    nbatch=n_eval,
+    device=device,
+)
+res = vegas_integrator(integrand_list1)
+print(
+    "  I[0] =",
+    res[0],
+    "  I[1] =",
+    res[1],
+    "  I[2] =",
+    res[2],
+    "  I[1]/I[0] =",
+    res[1] / res[0],
+)
+
+print("VEGAS-MCMC Integral results:")
+vegasmcmc_integrator = MCMC(
+    maps=vegas_map,
+    neval=n_eval,
+    nbatch=n_batch,
+    nburnin=n_therm,
+    device=device,
+)
+res = vegasmcmc_integrator(integrand_list1, mix_rate=0.5)
+print(
+    "  I[0] =",
+    res[0],
+    "  I[1] =",
+    res[1],
+    "  I[2] =",
+    res[2],
+    "  I[1]/I[0] =",
+    res[1] / res[0],
+)