In [2]:
import numpy as np
import matplotlib.pyplot as plt
import theano
import theano.tensor as T
import csv

In [125]:
#generates len(dataPoints) amount of gause clusters
#@dataPoints: is a list of mean and covariance's to generate data for.
#       Which look something like [[mean1, cov1, value1], [mean2,cov2,value2]] where mean is a 1X2 and cov is 2X2
#       and value is what the value for this cluster will be, which is a truthy value.
#@size: size of data points you want per cluster
#@display: true of false if you want to display data on plot or not
#@ofName: out file name where to save data to. If you do not wish to save the data to a file 
#       Then just dont pass anything for this param.
def generate2DGausData(dataPoints, size = 100, display = True, ofName = ''):
    points = []
    if ofName:
        data = open(ofName, "w")
        writer = csv.writer(data, delimiter=',')
    for i in range(0,len(dataPoints)):
        x,y = np.random.multivariate_normal(dataPoints[i][0], dataPoints[i][1], size).T
        for kk in range(0,size):
            points.extend([[dataPoints[i][2],x[kk],y[kk]]])
            if ofName:
                writer.writerow([dataPoints[i][2],x[kk],y[kk]])
        if display:
            if dataPoints[i][2]:
                plt.scatter(x,y,20, 'r')
            else:
                plt.scatter(x,y,20, 'y')
    if ofName:
        data.close()
    if display:
        plt.show()
    return points

#Example on how to use
'''
mean1 = [0,0]
cov1 = [[10,0],[0,10]]
cluster1 = [mean1,cov1, 1]

mean2 = [30,30]
cov2 = [[10,0],[0,10]]
cluster2 = [mean2,cov2, 0]

clusters = [cluster1, cluster2]
x = generate2DGausData(clusters)
//or
x = generate2DGausData(clusters, 100,False, 'test.txt')
'''

"\nmean1 = [0,0]\ncov1 = [[10,0],[0,10]]\ncluster1 = [mean1,cov1, 1]\n\nmean2 = [30,30]\ncov2 = [[10,0],[0,10]]\ncluster2 = [mean2,cov2, 0]\n\nclusters = [cluster1, cluster2]\nx = generate2DGausData(clusters)\n//or\nx = generate2DGausData(clusters, 100,False, 'test.txt')\n"

In [128]:
#@line: A list of the weight vectors
#@bias: list of the offset for the weights
#@dataRange: range you want your dataset to be in
#@epsilon: which must be a number between (0-0.5] which is how close you want you data to the weight vectors
#@size: The size of the data set you want.
#@display: true of false if you wish to see what data looks like on plot
#@ofName: out file name where to save data to. If you do not wish to save the data to a file 
#       Then just dont pass anything for this param.
def generateLinearData(line, bias, dataRange, epsilon, size = 500, display = True, ofName = ''):
    x = T.vector('x')
    w = T.vector('w')
    s = 1 / (1 + T.exp(-T.dot(x,w)))
    logistic = theano.function([x,w], s)
    
    x1 = np.random.uniform(dataRange[0],dataRange[1],size)
    x2 = np.random.uniform(dataRange[0],dataRange[1],size)
    
    if ofName:
        data = open(ofName, "w")
        writer = csv.writer(data, delimiter=',')
    
    points = []
    for i in range(0,size):
        f = logistic([x1[i],x2[i]],[line[0] + bias[0],line[1] + bias[1]])
        if 1 - f <= epsilon:
            points.extend([[1,x1[i],x2[i]]])
            if ofName:
                writer.writerow([1,x1[i],x2[i]])
            if display:
                plt.scatter(x1[i],x2[i],20, 'r')
        elif f < epsilon:
            points.extend([[0,x1[i],x2[i]]])
            if ofName:
                writer.writerow([1,x1[i],x2[i]])
            if display:
                plt.scatter(x1[i],x2[i],20, 'y')
    if ofName:
        data.close()
    if display:
        plt.show()
    return points

#example:
# x = generateLinearData([1,1],[0,0], [-5,5],0.01, 500, True,'test2.txt')