Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added seed for random numbers generators #62

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 9 additions & 1 deletion copulas/bivariate/base.py
Expand Up @@ -182,20 +182,28 @@ def partial_derivative(self, X, y=0):
"""
raise NotImplementedError

def sample(self, n_samples):
def sample(self, n_samples, seed=None):
"""Generate specified `n_samples` of new data from model. `v~U[0,1],v~C^-1(u|v)`

Args:
n_samples: `int`, amount of samples to create.

seed: `int` or None, the seed for the random numbers generator.

Returns:
np.ndarray: Array of length `n_samples` with generated data from the model.
"""
if self.tau > 1 or self.tau < -1:
raise ValueError("The range for correlation measure is [-1,1].")

s = np.random.get_state()

np.random.seed(seed)

v = np.random.uniform(0, 1, n_samples)
c = np.random.uniform(0, 1, n_samples)

np.random.set_state(s)

u = self.percent_point(c, v)
return np.column_stack((u, v))
Expand Down
15 changes: 13 additions & 2 deletions copulas/multivariate/gaussian.py
Expand Up @@ -180,12 +180,14 @@ def func(*args):
ranges = [[lower_bound, val] for val in X]
return integrate.nquad(func, ranges)[0]

def sample(self, num_rows=1):
def sample(self, num_rows=1, seed=None):
"""Creates sintentic values stadistically similar to the original dataset.

Args:
num_rows: `int` amount of samples to generate.

seed: `int` or None, the seed for the random numbers generator.

Returns:
np.ndarray: Sampled data.

Expand All @@ -194,16 +196,25 @@ def sample(self, num_rows=1):
means = np.zeros(self.covariance.shape[0])
size = (num_rows,)

# clean up cavariance matrix
# clean up covariance matrix
clean_cov = np.nan_to_num(self.covariance)

s = np.random.get_state()

np.random.seed(seed)

samples = np.random.multivariate_normal(means, clean_cov, size=size)

np.random.set_state(s)

# run through cdf and inverse cdf
for i, (label, distrib) in enumerate(self.distribs.items()):
# use standard normal's cdf
res[label] = stats.norm.cdf(samples[:, i])

# use original distributions inverse cdf
res[label] = distrib.percent_point(res[label])

return pd.DataFrame(data=res)

def to_dict(self):
Expand Down
21 changes: 18 additions & 3 deletions copulas/multivariate/vine.py
@@ -1,5 +1,6 @@
import logging
from random import randint
# from random import randint, seed, getstate, setstate
import random

import numpy as np
from scipy import optimize
Expand Down Expand Up @@ -81,11 +82,25 @@ def get_likelihood(self, uni_matrix):

return np.sum(values)

def sample(self, num_rows=1):
def sample(self, num_rows=1, seed=None):
"""Generating samples from vine model."""
s1 = np.random.get_state()

s2 = random.getstate()

np.random.seed(seed)

random.setstate(seed)

unis = np.random.uniform(0, 1, self.n_var)

# randomly select a node to start with
first_ind = randint(0, self.n_var - 1)
first_ind = random.randint(0, self.n_var - 1)

np.random.seed(s1)

random.setstate(s2)

adj = self.trees[0].get_adjacent_matrix()
visited = []
explore = [first_ind]
Expand Down
14 changes: 12 additions & 2 deletions copulas/univariate/gaussian.py
Expand Up @@ -82,16 +82,26 @@ def percent_point(self, U):
"""
return norm.ppf(U, loc=self.mean, scale=self.std)

def sample(self, num_samples=1):
def sample(self, num_samples=1, seed=None):
"""Returns new data point based on model.

Arguments:
n_samples: `int`

seed: `int` or None, the seed for the random numbers generator.

Returns:
np.ndarray: Generated samples
"""
return np.random.normal(self.mean, self.std, num_samples)
s = np.random.get_state()

np.random.seed(seed)

sample = np.random.normal(self.mean, self.std, num_samples)

np.random.set_state(s)

return sample

def to_dict(self):
return {
Expand Down
14 changes: 12 additions & 2 deletions copulas/univariate/kde.py
Expand Up @@ -70,16 +70,26 @@ def percent_point(self, U):

return scipy.optimize.brentq(self.cumulative_distribution, -1000.0, 1000.0, args=(U))

def sample(self, num_samples=1):
def sample(self, num_samples=1, seed=None):
"""Samples new data point based on model.

Args:
num_samples: `int` number of points to be sampled

seed: `int` or None, the seed for the random numbers generator.

Returns:
samples: a list of datapoints sampled from the model
"""
return self.model.resample(num_samples)
s = np.random.get_state()

np.random.seed(seed)

sample = self.model.resample(num_samples)

np.random.set_state(s)

return sample

@classmethod
def from_dict(cls, copula_dict):
Expand Down