In [6]:
import torch
import numpy as np
import matplotlib.pyplot as plt

from hmmlearn import hmm
from torch.distributions import uniform

import sys
sys.path.append("../")



In [493]:
#Generate Datasets using HMM

def random_transmat(n_states):
    matrix = np.random.rand(n_states, n_states)
    return matrix/matrix.sum(axis=1)[:,None]

def random_startprob(n_states):
    startprob = np.random.rand(n_states)
    return startprob/startprob.sum()

def random_means(n_features):
    return np.random.randint(5, size=(n_features,n_features))

def generate_hmm(n_states, n_features , n_samples, length):
    #GENERATING A MODEL
    model = hmm.GaussianHMM(n_components=n_states, covariance_type="full")
    model.startprob_ = random_startprob(n_states)
    model.transmat_ = random_transmat(n_states)

    model.means_ = random_means(n_features)
    model.covars_ = np.tile(np.identity(n_features), (n_features, 1, 1))


    #SAMPLING FROM MODEL and STORING IN TENSOR

    #Number of Samples in Dataset
    dataset=[]
    states = []

    for i in range(n_samples):
        X, Z = model.sample(length)
        dataset.append(np.array(X))
        states.append(Z)

    dataset = np.stack(dataset)
    
    return dataset, np.array(states)

def generate_time_dependent_flip(n_samples, length, startprob, transmat):
    #GENERATING A MODEL


    model = hmm.GaussianHMM(n_components=n_states, covariance_type="full")
    model.startprob_ = startprob
    model.transmat_ = transmat

    #this doesn't actually matter for us
    model.means_ = np.array([[0.0, 0.0], 
                             [5.0, 10.0]])
    model.covars_ = np.tile(np.identity(2), (3, 1, 1))


    #SAMPLING FROM MODEL and STORING IN TENSOR

    #Number of Samples in Dataset
    dataset=[]

    for i in range(n_samples):
        X, Z = model.sample(length)
        dataset.append(np.array(Z))

    dataset = np.stack(dataset)
    
    return dataset


In [481]:
#Injecting Noise into Labels

#Given a flip_mask, flip an input
def flip(array, flip_mask):
    flipped_array = np.logical_xor(array, flip_mask, out=array)
    return flipped_array

#Class Independent / Time Independent
def flip_labels_basic(array, flip_probability):
    flip_mask = np.random.binomial(1, 0.5, len(array))
    return flip(array, flip_mask)

#Class Dependent / Time Independent
def flip_labels_class(array, flip_probability_0, flip_probability_1):
    flip_mask = []
    for elem in array:
        if elem == 0:
            to_flip = np.random.binomial(1, flip_probability_0, 1)[0]
            flip_mask.append(to_flip)
        else:
            to_flip = np.random.binomial(1, flip_probability_1, 1)[0]
            flip_mask.append(to_flip)
            
    return flip(array, flip_mask)

#Class Independent / Time Dependent
def flip_labels_time(array, startprob, transmat):
    flip_mask = generate_time_dependent_flip(1, len(array), startprob, transmat)[0]

    return flip(array, flip_mask)


#Class Dependent / Time Dependent
#This can be achieved by careful design of the transition matrix (transmat)

In [482]:
dataset,Z = generate_hmm(2,3,10, 100)

In [483]:
Z.shape

(10, 100)

In [495]:
startprob = random_startprob(2)
transmat = np.array([[0.95, 0.05],
                    [0.95, 0.05]])


In [496]:
def generate_dataset(n_states, n_features,n_samples, length, train_ratio, method, 
                     flip_probability= None, flip_probability_0=None, flip_probability_1=None,
                    startprob=None, transmat=None):
    
    #Generate Data
    dataset, states_true = generate_hmm(n_states, n_features , n_samples, length)
    
    x_train = dataset[:int(train_ratio*n_samples)]
    x_test = dataset[int(train_ratio*n_samples):]
    
   
    #Flip The Labels according to method
    
    states_flipped = []
    
    if method == "basic":
        for item in states_true:
            states_flipped.append(flip_labels_basic(item, flip_probability))
        
    elif method == "class":
        for item in states_true:
            states_flipped.append(flip_labels_class(item, flip_probability_0, flip_probability_1))
        
    elif method == "time":
        for item in states_true:
            states_flipped.append(flip_labels_time(item, startprob, transmat))
        
    y_train_true = states_true[:int(train_ratio*n_samples)]
    y_test_true = states_true[int(train_ratio*n_samples):]
    
    y_train_flipped = np.array(states_flipped[:int(train_ratio*n_samples)])
    y_test_flipped = np.array(states_flipped[int(train_ratio*n_samples):])
    
    
    return x_train, y_train_true, y_train_flipped, x_test, y_test_true, y_test_flipped

In [486]:
n_states = 2
n_features = 3
n_samples = 100
length = 1000
train_ratio = 0.7
method = "basic"
flip_probability = 0.1

x_train, y_train_true, y_train_flipped, x_test, y_test_true, y_test_flipped= generate_dataset(n_states, n_features,n_samples, length, train_ratio, method, 
                     flip_probability, flip_probability_0=None, flip_probability_1=None,
                    startprob=None, transmat=None)

In [487]:
x_train.shape

(70, 1000, 3)

In [488]:
n_states = 2
n_features = 3
n_samples = 100
length = 1000
train_ratio = 0.7
method = "class"
flip_probability_0 = 0.1
flip_probability_1 = 0.2

x_train, y_train_true, y_train_flipped, x_test, y_test_true, y_test_flipped = generate_dataset(n_states, n_features,n_samples, length, train_ratio, method, 
                     flip_probability= None, flip_probability_0=flip_probability_0, flip_probability_1 = flip_probability_1,
                    startprob=None, transmat=None)

In [492]:
x_train.shape

(70, 1000, 3)

In [498]:
n_states = 2
n_features = 3
n_samples = 100
length = 1000
train_ratio = 0.7
method = "time"

startprob = random_startprob(2)
transmat = np.array([[0.95, 0.05],
                    [0.95, 0.05]])

x_train, y_train_true, y_train_flipped, x_test, y_test_true, y_test_flipped = generate_dataset(n_states, n_features,n_samples, length, train_ratio, method, 
                     flip_probability=None, flip_probability_0=None, flip_probability_1=None,
                    startprob=startprob, transmat=transmat)




In [499]:
y_train_flipped.shape

(70, 1000)