[View in Colaboratory](https://colab.research.google.com/github/rekharajct/Machine-Learning/blob/master/GAN_Cluster_fc1_26_Oct_2018_v2.ipynb)

In [0]:
import tensorflow as tf

import numpy as np
#from training_data import *
import seaborn as sb
import matplotlib.pyplot as plt
sb.set()
tf.reset_default_graph()
import numpy as np
import math
from scipy.stats import multivariate_normal
from time import time
from sklearn.metrics import accuracy_score

def cluster_assign(X,K,m,p,s):
    """X:data, K:no of comps, m:means, p:pi's, s:sigmas"""

    N=X.shape[0] #number of data points
    d=X.shape[1]            
    Gamma=np.zeros((N,K)) #to store gamma value
    N_mu_sigma=np.zeros((N,K))#to store Normal pdf of each of data point for every k
                
    for i in range(N):
        
        for j in range(K):
                y_cov = s[j]*s[j]*np.eye(d)
                min_eig = np.min(np.real(np.linalg.eigvals(y_cov)))
                if min_eig < 0:
                  y_cov -= min_eig * np.eye(*y_cov.shape)
                N_mu_sigma[i,j]=multivariate_normal.pdf(X[i],m[j],y_cov)
                          
    #compute gamma
    Gamma=p*N_mu_sigma
    #normalize gamma
    row_sums = Gamma.sum(axis=1)
    Gamma_norm = Gamma/row_sums[:, np.newaxis]
    #Find k's
    result=np.argmax(Gamma_norm,axis=1)
    return Gamma,result


def generate_gmm_data(size,dim, K, means, alphas, pi):
    """Generates synthetic data of a given size from a Gaussian Mixture Model 
    given means , standard deviation and pi"""
    
    covars={}
    result = np.empty((size, dim))
    for i in range(K):
      covars[i]=alphas[i]*np.eye(dim)
    components = np.arange(K)
    for i in range(size):
        comp = np.random.choice(components, p=pi)

        result[i] = np.random.multivariate_normal(means[comp], covars[comp])
    return result
      

  
def sample_Z(m, n):
    return np.random.normal(0, 1., size=[m, n])

def generator_Z(X,x_dim,K,mu_dim,reuse=False):
    with tf.variable_scope("GAN/GeneratorZ",reuse=reuse):
        h = tf.layers.dense(X,x_dim,activation=tf.nn.leaky_relu)
        #h2 = tf.layers.dense(h1,8,activation=tf.nn.leaky_relu)
        h = tf.layers.dense(h,8,activation=tf.nn.leaky_relu)
        h = tf.layers.dense(h,16,tf.nn.leaky_relu)
        h = tf.layers.dense(h,32)
        pi = tf.layers.dense(h, K,tf.nn.softmax)
        sd = tf.layers.dense(h,K)
        mu = tf.layers.dense(h,mu_dim)

    return mu, tf.exp(sd), pi

"""
def generator_X(Z,x_dim,z_dim,reuse=False):
    with tf.variable_scope("GAN/Generator",reuse=reuse):
        h1 = tf.layers.dense(Z,z_dim,activation=tf.nn.leaky_relu)
        h2 = tf.layers.dense(h1,8,activation=tf.nn.leaky_relu)
        h3 = tf.layers.dense(h2,16,activation=tf.nn.leaky_relu)
        out = tf.layers.dense(h3,x_dim)

    return out
"""
def discriminator(X,x_dim,reuse=False):
    with tf.variable_scope("GAN/Discriminator",reuse=reuse):
        h = tf.layers.dense(X,x_dim,activation=tf.nn.leaky_relu)
        h = tf.layers.dense(h,32,activation=tf.nn.leaky_relu)
        h = tf.layers.dense(h,16,tf.nn.leaky_relu)
        h = tf.layers.dense(h,8,tf.nn.leaky_relu)
        out = tf.layers.dense(h,1)

    return out, h

def GAN_cluster(data, batch_size,K, nd_steps, ng_steps, epochs,x_dim,mu_dim,y):
  with tf.device('/gpu:0'):
    print(tf.test.gpu_device_name())
    X = tf.placeholder(tf.float32,[None,x_dim])
    #G = tf.placeholder(tf.float32,[None,x_dim])
    means, alphas, pis = generator_Z(X,x_dim,K,mu_dim)
    mean_avg = tf.reshape(tf.reduce_mean(means,0),[K,x_dim])
    
    alpha_avg = tf.reduce_mean(alphas,0)
    pi_avg = tf.nn.softmax(tf.reduce_mean(pis,0))
    
    tfd = tf.contrib.distributions
    ### Create a mixture of two Bivariate Gaussians:

    gm = tfd.MixtureSameFamily(
    mixture_distribution=tfd.Categorical(
        probs=pi_avg),
    components_distribution=tfd.MultivariateNormalDiag(
        loc=mean_avg,
        scale_identity_multiplier=alpha_avg))
    G = gm.sample(batch_size)
    #lp =gm.mixture_distribution.logits(G) 
    
    
    #G_sample = generate_gmm_data(batch_size,x_dim, K, mean_avg, sd_avg, pi_avg)
    r_logits, r_rep = discriminator(X,x_dim)
    f_logits, g_rep = discriminator(G,x_dim,reuse=True)

    disc_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=r_logits,
                labels=tf.ones_like(r_logits)) + tf.nn.sigmoid_cross_entropy_with_logits(logits=f_logits,
                                                                       labels=tf.zeros_like(f_logits)))
    gen_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=f_logits,
                                                                      labels=tf.ones_like(f_logits)))

    gen_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,scope="GAN/GeneratorZ")
    disc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,scope="GAN/Discriminator")

    gen_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(gen_loss,var_list = gen_vars) # G Train step
    disc_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(disc_loss,var_list = disc_vars) # D Train step



  # sess = tf.Session(config=config)
  sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
  tf.global_variables_initializer().run(session=sess)

  

  #x_plot = sample_data(n=batch_size)

  #f = open('loss_logs.csv','w')
  #f.write('Iteration,Discriminator Loss,Generator Loss\n')

  num_batches = data.shape[0]/batch_size
  batches = np.vsplit(data,num_batches)
  for i in range(epochs):
    # Provide chunks one by one
    
    for X_batch in batches:
        
        #discriminator loss
        for _ in range(nd_steps):
          _, dloss = sess.run([disc_step, disc_loss], feed_dict={X: X_batch})
        
                
        rrep_dstep, grep_dstep = sess.run([r_rep, g_rep], feed_dict={X: X_batch})

        for _ in range(ng_steps):
          _, gloss = sess.run([gen_step, gen_loss], feed_dict={X: X_batch})

        rrep_gstep, grep_gstep = sess.run([r_rep, g_rep], feed_dict={X: X_batch})

        #if i%10 == 0:
        #    f.write("%d,%f,%f\n"%(i,dloss,gloss))

    if (i+1)%100==0:
        print("Epochs: %d\t Discriminator loss: %.4f\t Generator loss: %.4f"%(i+1,dloss,gloss))
        #print(batchrows)
        plt.figure()
        X_batch = data
        print(X_batch.shape)
        #mus , pis, SDs
        Mus, Alphas, Pis= sess.run([mean_avg, alpha_avg,pi_avg],feed_dict={X:X_batch})
        #print(lp)
        
        #Generate data from GMM
        g_plot = sess.run(G, feed_dict={X: X_batch})
        xax = plt.scatter(X_batch[:,0], X_batch[:,1])
        gax = plt.scatter(g_plot[:,0],g_plot[:,1])

        plt.legend((xax,gax), ("Real Data","Generated Data"))
        plt.title('Samples at Iteration %d'%i)
        #plt.tight_layout()
        #plt.savefig('../plots/iterations/iteration_%d.png'%i)
        plt.show()
        plt.close()
        print("Alphas:",Alphas)
        
        gamma, clusters = cluster_assign(X_batch,K,Mus,Pis,abs(Alphas))
        acc = accuracy_score(clusters, y)
        print("accuracy",acc)
        
        fig, ax = plt.subplots()
        datasets = []
        for i in range(K):
          # Assigned indices
          assigned_indices = (clusters == i)
          datasets.append(X_batch[assigned_indices, :])
          

        #Now let's put the scatter plots onto the scene.
        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
        for j,data in enumerate(datasets):
               ax.scatter(data[:, 0], data[:, 1], color=colors[j])

        ax.set_title("cluster assignments")

        plt.show()

  sess.close()

#generate data
from sklearn.datasets import make_blobs
#centers = [(-5, -5), (5, -5)]
#centers = [(-5, -5), (5, -5), (-5, 10)]

#centers = [(-5, -5), (5, -5), (-5, 10),(5,10)]
#centers = [(-5, -5), (5, -5),(1,1), (-5, 10),(5,10),(10,1),(12,10)]
centers = [(-5, -5), (5, -5),(1,1), (-5, 10),(5,10),(10,1)]

N = 10000
d= 2
X, y = make_blobs(n_samples=N, n_features=d, cluster_std=1.0,
                  centers=centers, shuffle=True, random_state=42)
#normalize the data 
from sklearn import preprocessing
#X = preprocessing.normalize(X)
batch_size = 100
nd_steps = 2
ng_steps = 2
epochs = 300
K =6
x_dim = X.shape[1]
print(x_dim)
mu_dim = K*x_dim
t0=time() #time of training start
GAN_cluster(X, batch_size,K, nd_steps, ng_steps, epochs,x_dim,mu_dim, y)
t=time()-t0
print("Training time:",t," seconds")

In [3]:
import tensorflow as tf
tfd = tf.contrib.distributions
gm = tfd.MixtureSameFamily(
    mixture_distribution=tfd.Categorical(
        probs=[0.3, 0.7]),
    components_distribution=tfd.Normal(
      loc=[-1., 1],       # One for each component.
      scale=[0.1, 0.5]))  # And same here.

x= gm.sample(1)
z=gm.prob(x)

sess = tf.Session()
y=sess.run(z)
print(gm.sample_class_c())

AttributeError: ignored