## Checking network updates for different batches ##

### shuffling the dataset at each optimization step ###

In [3]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm_notebook as tqdm
tf.keras.backend.set_floatx('float64')
from misc import *
from collections import deque
from datetime import datetime
import random

In [5]:
def Prob(alpha, beta, n):
    p0 = np.exp(-(alpha-beta)**2)
    if n == 0:
        return p0
    else:
        return 1-p0

def qval(beta, n, guess):
    #dolinar guessing rule (= max-likelihood for L=1, careful sign of \beta)
    alpha = 0.4
    pn = np.sum([Prob(g*alpha, beta, n) for g in [-1,1]])
    return Prob(guess*alpha, beta, n)/pn


In [7]:
def createdataset_l2(number_rews_per_beta_per_outcome_per_guess=10**2,number_of_betas = 10):
    #we call it l2 so it does not interfer with the fist tutoral..
    dataset = []
    labels=[]
    global betas_train
    global number_of_samples
    number_of_samples = number_rews_per_beta_per_outcome_per_guess
    betas_train = np.linspace(-1.5,1.5,number_of_betas)
    for i in range(len(betas_train)):
        beta = betas_train[i]
        for outcome in [0.,1.]:
            for guess in [-1.,1.]:

                for k in range(number_rews_per_beta_per_outcome_per_guess):
                    mean_bernoulli = qval(beta, outcome, guess)
                    reward = np.random.choice([1.,0,],1,p=[mean_bernoulli, 1.-mean_bernoulli])[0]

                    dataset.append([beta, outcome, guess, reward])
    return np.array(dataset)

In [8]:
def plot_results(histo_preds, mean_values, title=None):
    plt.figure(figsize=(15,15))
    ax1 =  plt.subplot2grid((1,2),(0,0))
    ax2 =  plt.subplot2grid((1,2),(0,1))

    betas = np.arange(-1.5,1.5,.01)
    ax1.plot(betas_train,[mean_values[str(b)]["0"] for b in range(len(betas_train))],'--',alpha=0.85,c="red", linewidth=5, label="Q(n1=0,"+r'$\beta$'+"; g=-1)")
    ax1.plot(betas_train,[mean_values[str(b)]["1"] for b in range(len(betas_train))],'--',alpha=0.85,c="blue",  linewidth=5,label="Q(n1=0,"+r'$\beta$'+"; g=1)")

    ax2.plot(betas_train,[mean_values[str(b)]["2"] for b in range(len(betas_train))],'--',alpha=0.85,c="red",  linewidth=5,label="Q(n1=1,"+r'$\beta$'+"; g=-1)")
    ax2.plot(betas_train,[mean_values[str(b)]["3"] for b in range(len(betas_train))],'--',alpha=0.85,c="blue",  linewidth=5,label="Q(n1=1,"+r'$\beta$'+"; g=1)")

    
    for predictions in histo_preds.values():
        ax1.plot(betas_train,predictions["values"]["0"],alpha=0.5, linewidth=5, label="epoch: "+str(predictions["epoch_number"])) #, label=r'$\hat{Q}$'+"(n1=0,"+r'$\beta$'+"; g=-1)")
        ax1.plot(betas_train,predictions["values"]["1"],alpha=0.5, linewidth=5)#, label="epoch: "+str(predictions["epoch_number"]))#,label=r'$\hat{Q}$'+"(n1=0,"+r'$\beta$'+"; g=1)")

        ax2.plot(betas_train,predictions["values"]["2"] ,alpha=0.5,  linewidth=5)#, label="epoch: "+str(predictions["epoch_number"]))#label=r'$\hat{Q}$'+"(n1=1,"+r'$\beta$'+"; g=-1)")
        ax2.plot(betas_train,predictions["values"]["3"] ,alpha=0.5,  linewidth=5, label="epoch: "+str(predictions["epoch_number"]))#,label=r'$\hat{Q}$'+"(n1=1,"+r'$\beta$'+"; g=1)")
    
    #Now we take the last and plot it in bold!
    ax1.plot(betas_train,predictions["values"]["0"],alpha=0.85, c="black",linewidth=5, label="epoch: "+str(predictions["epoch_number"])) #, label=r'$\hat{Q}$'+"(n1=0,"+r'$\beta$'+"; g=-1)")
    ax1.plot(betas_train,predictions["values"]["1"],alpha=0.85, c="black", linewidth=5)#, label="epoch: "+str(predictions["epoch_number"]))#,label=r'$\hat{Q}$'+"(n1=0,"+r'$\beta$'+"; g=1)")

    ax2.plot(betas_train,predictions["values"]["2"] ,alpha=0.85, c="black", linewidth=5)#, label="epoch: "+str(predictions["epoch_number"]))#label=r'$\hat{Q}$'+"(n1=1,"+r'$\beta$'+"; g=-1)")
    ax2.plot(betas_train,predictions["values"]["3"] ,alpha=0.85,  c="black",linewidth=5, label="epoch: "+str(predictions["epoch_number"]))#,label=r'$\hat{Q}$'+"(n1=1,"+r'$\beta$'+"; g=1)")
        
    ax1.scatter(betas_train,predictions["values"]["0"],alpha=0.85, c="black",s=150)
    ax1.scatter(betas_train,predictions["values"]["1"],alpha=0.85, c="black",s=150)

    ax2.scatter(betas_train,predictions["values"]["2"],alpha=0.85, c="black",s=150)
    ax2.scatter(betas_train,predictions["values"]["3"],alpha=0.85, c="black",s=150)

        
    for ax in [ax1, ax2]:
        ax.set_xlabel(r'$\beta$', size=20)
        ax.legend(prop={"size":15})
    if title != None:
        plt.suptitle(title, size=50)
    return

In [9]:
def plot_loss(loss, title=None):
    plt.figure(figsize=(15,15))
    ax1 =  plt.subplot2grid((1,1),(0,0))

    ax1.plot(np.arange(1,len(loss)+1),loss,'--',alpha=0.85,c="red", linewidth=5)#, label="Q(n1=0,"+r'$\beta$'+"; g=-1)")

    for ax in [ax1]:
        ax.set_xlabel("epoch", size=20)
        ax.legend(prop={"size":15})
    if title != None:
        plt.suptitle(title, size=50)
    return

## veamos como es el valor de la loss che ##

In [10]:
def learning_problem(rews_per_beta, epochs=10, number_of_betas=10, lr=10**(-3), batch_size=2., seed_val = 0.1,shuffling_in_each_epoch= True,
                     valreg=0.01,dropout=0.01, optimizer_name="Adam", retraining=False, printing_losses=False, optimizer_object=None, net_object=None):
    start = datetime.now()
    dataset = createdataset_l2(rews_per_beta, number_of_betas=number_of_betas)
    dataset_copy = dataset.copy()
    
    splits = len(dataset)/batch_size
    batch_size_here = batch_size

    if not retraining:
        net = Net(seed_val=seed_val,valreg=valreg, dropout=dropout)
    
        if optimizer_name == "SGD":
            optimizer = tf.keras.optimizers.SGD(lr=lr)
        else:
            optimizer = tf.keras.optimizers.Adam(lr=lr)
    else:
        optimizer=optimizer_object
        net=net_object
        print("Re-training!!!!")
    
    print("Seed_val: {}\nOptimizer: {}\nReal number of splits (len(dataset) after spliting): {}\nlen(data[0]: {}\nBatch size: {}\nepochs: {}\nNumber_of_betas: {}\nrews_per_beta: {}\nNumber of data points: {}\nShuffling in each epoch\nRegularizer value: {}\nDropout rate (first and second layer): {}: ".format(
            seed_val,optimizer.get_config()["name"],len(dataset),len(dataset[0]),batch_size_here,epochs,number_of_betas,rews_per_beta,rews_per_beta*4*number_of_betas, shuffling_in_each_epoch, valreg,dropout)
         )
    print("")
    histo_preds = {} #here i save the predictions
    lossavg = []
    for epoch in tqdm(range(epochs)):
        epoch_loss_avg = tf.keras.metrics.Mean()

        if (shuffling_in_each_epoch == True)|(epoch==0):
            np.random.shuffle(dataset) #shuffling the dataset
            dataset = np.split(dataset, splits + len(dataset)%splits) #spliting the dataset!
        if epoch==0:
            print("len(data[0]: {})".format(len(dataset[0])))
        for indba,batch in enumerate(dataset):
            with tf.GradientTape() as tape:
                tape.watch(net.trainable_variables)
                preds = net(batch[:,[0,1,2]]) #make predictions in batch
                rews = batch[:,3]
                loss_mean = tf.keras.losses.MSE(rews,preds)
                loss = tf.math.reduce_sum(loss_mean)/len(preds)

                grads = tape.gradient(loss, net.trainable_variables)
                optimizer.apply_gradients(zip(grads, net.trainable_variables))
            epoch_loss_avg(loss)
            if printing_losses:
                if indba%(len(dataset)/10)==0:
                    print("loss_mean: {}\nloss: {}\n\n".format(loss_mean,loss))
        lossavg.append(epoch_loss_avg.result())
            
        if epoch%(epochs/10)==0:
            print("Epoch {:03d}: Loss: {:.7f}".format(epoch,epoch_loss_avg.result()))

        ### concatenate the dataset again so we shuffle again #### 
        if shuffling_in_each_epoch == True:

            d = dataset[0]
            for k in range(1,len(dataset)):
                d = np.concatenate((d,dataset[k]))

            dataset = d

        if (epoch % int(epochs/5) == 0)|(epoch==epochs-1):

            histo_preds[str(epoch)] ={} 
            histo_preds[str(epoch)]["epoch_number"] = epoch
            histo_preds[str(epoch)]["values"] = {}

            index=0
            for n1 in [0.,1.]:
                for guess in [-1.,1.]:
                    foo =np.array([[b,n1,guess] for b in betas_train]) #betas_train defined as global in create_dataset_l2()
                    histo_preds[str(epoch)]["values"][str(index)] = np.squeeze(net(foo))
                    index+=1
    
    
    dataavg = np.split(dataset_copy, len(betas_train))

    mean_values = {}
    for index_beta in range(len(betas_train)):
        mean_values[str(index_beta)] = {}

    for index_beta, beta in enumerate(betas_train):
        sp = np.split(dataavg[index_beta],4)
        for index_ng in range(4):
            mean_values[str(index_beta)][str(index_ng)]=np.mean(sp[index_ng][:,3])

    print("Tardé: ", datetime.now()-start)
    return histo_preds, mean_values, net, optimizer, lossavg
    
    
#in this case the length is 500*4*30, 4 'cause the n1 and guess, 30 'cause the len(betas_train)

** so it apperars to stuck at the mean value of the sum of all rewards, which is not what we want **

In [20]:

class Net(tf.keras.Model):
    def __init__(self, valreg=0.01, seed_val=0.1, dropout=0.01):
        super(Net,self).__init__()
        self.dropout = dropout
        self.l1 = Dense(10, input_shape=(3,),kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg))

        self.l2 = Dense(330, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))
        self.l3 = Dense(330, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

        self.l4 = Dense(33, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

        self.l5 = Dense(1, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

    def call(self, input):
        feat = tf.nn.relu(self.l1(input))
        feat = tf.nn.dropout(feat, rate=self.dropout)
        #feat = tf.nn.relu(self.l4(feat))
        feat = tf.nn.sigmoid(self.l5(feat))
        return feat

    def predict(self, history_one):
        inp = np.expand_dims(history_one, axis=0)
        return np.squeeze(self(inp).numpy())

    def __str__(self):
        return self.name


In [15]:
histo_preds, mean_values, net, optimizer, losses = learning_problem(10,epochs=10*4*10*10, batch_size=10*4*10,seed_val=0.01,
                                               number_of_betas=10, shuffling_in_each_epoch=True, valreg=0.005, dropout=0.001, optimizer_name="Adam", lr=10**-2, retraining=False,printing_losses=False,
                                                                    optimizer_object=optimizer,net_object=net
                                                                   ) 
plot_results(histo_preds,mean_values, title="Rews per history-action pair: 10")

NameError: name 'optimizer' is not defined

In [38]:
def createdataset_l2(number_rews_per_beta_per_outcome_per_guess=10**2,number_of_betas = 10):
    #we call it l2 so it does not interfer with the fist tutoral..
    dataset = []
    labels=[]
    global betas_train
    global number_of_samples
    number_of_samples = number_rews_per_beta_per_outcome_per_guess
    betas_train = np.linspace(-1.5,1.5,number_of_betas)
    for i in range(len(betas_train)):
        beta = betas_train[i]
        for outcome in [0.,1.]:
            for guess in [-1.,1.]:

                for k in range(number_rews_per_beta_per_outcome_per_guess):
                    mean_bernoulli = qval(beta, outcome, guess)
                    reward = np.random.choice([1.,0,],1,p=[mean_bernoulli, 1.-mean_bernoulli])[0]

                    dataset.append([beta, outcome, guess, reward])
    return np.array(dataset)

In [53]:
f = np.array([1,2,3,34,5])
np.random.shuffle(f)
np.split(f,len(f))

[array([5]), array([2]), array([1]), array([3]), array([34])]

array([ 5,  2,  1,  3, 34])

In [71]:
class DataSet():
    def __init__(self, rpbgo=10, nbetas=10):
        #rpbgo = rewards per beta-guess-outcome
        self.nbetas=nbetas
        self.rpbgo=rpbgo
        self.betas = np.linspace(-1.5,1.5,nbetas)
        self.size = self.nbetas*self.rpbgo*4.
        d = []
        for b in self.betas:
            for outcome in [0.,1,]:
                for guess in [-1.,1.]:
                    for k in range(self.rpbgo):
                        mean_bernoulli = qval(b, outcome, guess)
                        reward = np.random.choice([1.,0,],1,p=[mean_bernoulli, 1.-mean_bernoulli])[0]

                        d.append([b, outcome, guess, reward])
        self.data_unsplitted = d

    def batched_shuffled_dataset(self,splits):
        datacopy = self.data_unsplitted.copy()
        np.random.shuffle(datacopy)
        datacopy = np.split(datacopy, splits + len(datacopy)%splits)
        return datacopy

NameError: name 'size' is not defined

In [68]:
dataset = DataSet()

In [69]:
dataset.bat

AttributeError: 'DataSet' object has no attribute 'batched_shuffled_dataset'

In [47]:

        
def shuffle(dataset):
    if dataset["splitted"]=="not":
        d = dataset["training"][0]
        for k in range(1,len(dataset)):
            d = np.concatenate((d,dataset[k]))
        dataset = d
        dataset["training"] ="yes"
    else:
        dataset["training"] = np.random.shuffle(dataset["training"])
    return dataset
        

In [45]:
net = Net()
net(dataset[0][:,[0,1,2]])
init=[]
for k in net.trainable_variables:
    init.append(k)



dataset={"splitted":"not"}
dataset["training"] = createdataset_l2(10, number_of_betas=10)


In [41]:
opt = tf.keras.optimizers.Adam(0.001)
dataset = createdataset_l2(10)

for epoch in range(10):

    dataset = np.split(dataset,len(dataset))

    for k,_ in enumerate(dataset):

        with tf.GradientTape() as tape:
            tape.watch(net.trainable_variables)
            preds = net(dataset[k][:,[0,1,2]])
            labs = dataset[k][:,3]
            loss = tf.square(preds,labs)
        print(tf.reduce_mean(loss))

    dataset=reshuffle(dataset)


tf.Tensor(0.25295867266335764, shape=(), dtype=float64)
tf.Tensor(0.25295867266335764, shape=(), dtype=float64)
tf.Tensor(0.25295867266335764, shape=(), dtype=float64)
tf.Tensor(0.25295867266335764, shape=(), dtype=float64)
tf.Tensor(0.25295867266335764, shape=(), dtype=float64)
tf.Tensor(0.25295867266335764, shape=(), dtype=float64)
tf.Tensor(0.251504303073999, shape=(), dtype=float64)
tf.Tensor(0.25295867266335764, shape=(), dtype=float64)
tf.Tensor(0.25295867266335764, shape=(), dtype=float64)
tf.Tensor(0.25295867266335764, shape=(), dtype=float64)
tf.Tensor(0.24945528310113763, shape=(), dtype=float64)
tf.Tensor(0.24945528310113763, shape=(), dtype=float64)
tf.Tensor(0.24785268182072673, shape=(), dtype=float64)
tf.Tensor(0.24945528310113763, shape=(), dtype=float64)
tf.Tensor(0.24945528310113763, shape=(), dtype=float64)
tf.Tensor(0.24945528310113763, shape=(), dtype=float64)
tf.Tensor(0.24945528310113763, shape=(), dtype=float64)
tf.Tensor(0.24945528310113763, shape=(), dtype=flo

tf.Tensor(0.24716596777536717, shape=(), dtype=float64)
tf.Tensor(0.24716596777536717, shape=(), dtype=float64)
tf.Tensor(0.24716596777536717, shape=(), dtype=float64)
tf.Tensor(0.24716596777536717, shape=(), dtype=float64)
tf.Tensor(0.24716596777536717, shape=(), dtype=float64)
tf.Tensor(0.24716596777536717, shape=(), dtype=float64)
tf.Tensor(0.24716596777536717, shape=(), dtype=float64)
tf.Tensor(0.24672372567380432, shape=(), dtype=float64)
tf.Tensor(0.24716596777536717, shape=(), dtype=float64)
tf.Tensor(0.25018283210895764, shape=(), dtype=float64)
tf.Tensor(0.25018283210895764, shape=(), dtype=float64)
tf.Tensor(0.25018283210895764, shape=(), dtype=float64)
tf.Tensor(0.25018283210895764, shape=(), dtype=float64)
tf.Tensor(0.25018283210895764, shape=(), dtype=float64)
tf.Tensor(0.25018283210895764, shape=(), dtype=float64)
tf.Tensor(0.25018283210895764, shape=(), dtype=float64)
tf.Tensor(0.25018283210895764, shape=(), dtype=float64)
tf.Tensor(0.25018283210895764, shape=(), dtype=f

tf.Tensor(0.24576037419644722, shape=(), dtype=float64)
tf.Tensor(0.24576037419644722, shape=(), dtype=float64)
tf.Tensor(0.24576037419644722, shape=(), dtype=float64)
tf.Tensor(0.24576037419644722, shape=(), dtype=float64)
tf.Tensor(0.24576037419644722, shape=(), dtype=float64)
tf.Tensor(0.24576037419644722, shape=(), dtype=float64)
tf.Tensor(0.24576037419644722, shape=(), dtype=float64)
tf.Tensor(0.24576037419644722, shape=(), dtype=float64)
tf.Tensor(0.24484632383798152, shape=(), dtype=float64)
tf.Tensor(0.24484632383798152, shape=(), dtype=float64)
tf.Tensor(0.24484632383798152, shape=(), dtype=float64)
tf.Tensor(0.24484632383798152, shape=(), dtype=float64)
tf.Tensor(0.24484632383798152, shape=(), dtype=float64)
tf.Tensor(0.24484632383798152, shape=(), dtype=float64)
tf.Tensor(0.24484632383798152, shape=(), dtype=float64)
tf.Tensor(0.24484632383798152, shape=(), dtype=float64)
tf.Tensor(0.24484632383798152, shape=(), dtype=float64)
tf.Tensor(0.24484632383798152, shape=(), dtype=f