sdv-dev · echo66 · Oct 10, 2018 · Oct 10, 2018 · Oct 10, 2018 · Oct 10, 2018
diff --git a/copulas/bivariate/base.py b/copulas/bivariate/base.py
@@ -182,20 +182,28 @@ def partial_derivative(self, X, y=0):
         """
         raise NotImplementedError
 
-    def sample(self, n_samples):
+    def sample(self, n_samples, seed=None):
         """Generate specified `n_samples` of new data from model. `v~U[0,1],v~C^-1(u|v)`
 
         Args:
             n_samples: `int`, amount of samples to create.
 
+            seed: `int` or None, the seed for the random numbers generator.
+
         Returns:
             np.ndarray: Array of length `n_samples` with generated data from the model.
         """
         if self.tau > 1 or self.tau < -1:
             raise ValueError("The range for correlation measure is [-1,1].")
+
+        s = np.random.get_state()
+
+        np.random.seed(seed)
 
         v = np.random.uniform(0, 1, n_samples)
         c = np.random.uniform(0, 1, n_samples)
+
+        np.random.set_state(s)
 
         u = self.percent_point(c, v)
         return np.column_stack((u, v))

diff --git a/copulas/multivariate/gaussian.py b/copulas/multivariate/gaussian.py
@@ -180,12 +180,14 @@ def func(*args):
         ranges = [[lower_bound, val] for val in X]
         return integrate.nquad(func, ranges)[0]
 
-    def sample(self, num_rows=1):
+    def sample(self, num_rows=1, seed=None):
         """Creates sintentic values stadistically similar to the original dataset.
 
         Args:
             num_rows: `int` amount of samples to generate.
 
+            seed: `int` or None, the seed for the random numbers generator.
+
         Returns:
             np.ndarray: Sampled data.
 
@@ -194,16 +196,25 @@ def sample(self, num_rows=1):
         means = np.zeros(self.covariance.shape[0])
         size = (num_rows,)
 
-        # clean up cavariance matrix
+        # clean up covariance matrix
         clean_cov = np.nan_to_num(self.covariance)
+
+        s = np.random.get_state()
+
+        np.random.seed(seed)
+
         samples = np.random.multivariate_normal(means, clean_cov, size=size)
+
+        np.random.set_state(s)
+
         # run through cdf and inverse cdf
         for i, (label, distrib) in enumerate(self.distribs.items()):
             # use standard normal's cdf
             res[label] = stats.norm.cdf(samples[:, i])
 
             # use original distributions inverse cdf
             res[label] = distrib.percent_point(res[label])
+
         return pd.DataFrame(data=res)
 
     def to_dict(self):

diff --git a/copulas/multivariate/vine.py b/copulas/multivariate/vine.py
@@ -1,5 +1,6 @@
 import logging
-from random import randint
+# from random import randint, seed, getstate, setstate
+import random
 
 import numpy as np
 from scipy import optimize
@@ -81,11 +82,25 @@ def get_likelihood(self, uni_matrix):
 
         return np.sum(values)
 
-    def sample(self, num_rows=1):
+    def sample(self, num_rows=1, seed=None):
         """Generating samples from vine model."""
+        s1 = np.random.get_state()
+
+        s2 = random.getstate()
+
+        np.random.seed(seed)
+
+        random.setstate(seed)
+
         unis = np.random.uniform(0, 1, self.n_var)
+
         # randomly select a node to start with
-        first_ind = randint(0, self.n_var - 1)
+        first_ind = random.randint(0, self.n_var - 1)
+
+        np.random.seed(s1)
+
+        random.setstate(s2)
+
         adj = self.trees[0].get_adjacent_matrix()
         visited = []
         explore = [first_ind]

diff --git a/copulas/univariate/gaussian.py b/copulas/univariate/gaussian.py
@@ -82,16 +82,26 @@ def percent_point(self, U):
         """
         return norm.ppf(U, loc=self.mean, scale=self.std)
 
-    def sample(self, num_samples=1):
+    def sample(self, num_samples=1, seed=None):
         """Returns new data point based on model.
 
         Arguments:
             n_samples: `int`
+
+            seed: `int` or None, the seed for the random numbers generator.
 
         Returns:
             np.ndarray: Generated samples
         """
-        return np.random.normal(self.mean, self.std, num_samples)
+        s = np.random.get_state()
+
+        np.random.seed(seed)
+
+        sample = np.random.normal(self.mean, self.std, num_samples)
+
+        np.random.set_state(s)
+
+        return sample
 
     def to_dict(self):
         return {

diff --git a/copulas/univariate/kde.py b/copulas/univariate/kde.py
@@ -70,16 +70,26 @@ def percent_point(self, U):
 
         return scipy.optimize.brentq(self.cumulative_distribution, -1000.0, 1000.0, args=(U))
 
-    def sample(self, num_samples=1):
+    def sample(self, num_samples=1, seed=None):
         """Samples new data point based on model.
 
         Args:
             num_samples: `int` number of points to be sampled
+
+            seed: `int` or None, the seed for the random numbers generator.
 
         Returns:
             samples: a list of datapoints sampled from the model
         """
-        return self.model.resample(num_samples)
+        s = np.random.get_state()
+
+        np.random.seed(seed)
+
+        sample = self.model.resample(num_samples)
+
+        np.random.set_state(s)
+
+        return sample
 
     @classmethod
     def from_dict(cls, copula_dict):