<a href="https://colab.research.google.com/github/shailymishra/Paper-Presentation-Summary-Implementation/blob/main/RegretNet_in_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
## Imports
import torch

import os
import numpy as np
import pandas as pd
import pandas.util.testing as tm
from tqdm import tqdm
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from torch import nn, optim

import torch.nn.functional as F


  import sys


In [None]:
## Config

import os
import os.path as osp
import numpy as np


# `pip install easydict` if you don't have it
from easydict import EasyDict as edict

__C = edict()
cfg = __C

# Output-dir to write log-files and save model
__C.dir_name = os.path.join("experiments", "additive_1x2_uniform")

# Auction params
__C.num_agents = 1
__C.num_items = 2
__C.distribution_type = "uniform"
__C.agent_type = "additive"

# Save data for restore.
__C.save_data = False

# Neural Net parameters
__C.net = edict()    
# initialization g - glorot, h - he + u - uniform, n - normal [gu, gn, hu, hn]
__C.net.init = "gu"
# activations ["tanh", "sigmoid", "relu"]
# num_a_layers, num_p_layers - total number of hidden_layers + output_layer, [a - alloc, p - pay]
# num_p_hidden_units, num_p_hidden_units - number of hidden units, [a - alloc, p - pay]
__C.net.num_a_layers = 3
__C.net.num_a_activation = ["tanh", "tanh", "softmax"]
__C.net.num_a_units = [__C.num_items*__C.num_agents, 100,100,__C.num_items*__C.num_agents]
__C.net.num_p_layers = 3
__C.net.num_p_activation = ["tanh", "tanh", "sigmoid"]
__C.net.num_p_units = [__C.num_items*__C.num_agents ,100,100,  __C.num_agents]

# Train paramters
__C.train = edict()

# Random seed
__C.train.seed = 42
# Iter from which training begins. If restore_iter = 0 for default. restore_iter > 0 for starting
# training form restore_iter [needs saved model]
__C.train.restore_iter = 0
# max iters to train 
# __C.train.max_iter = 400000  ##CHANGED
__C.train.max_iter = 100
# Learning rate of network param updates
__C.train.learning_rate = 1e-3
# Regularization
__C.train.wd = None

""" Train-data params """
# Choose between fixed and online. If online, set adv_reuse to False
__C.train.data = "fixed"
# Number of batches
# __C.train.num_batches = 5000  ##CHANGED
__C.train.num_batches = 1
# Train batch size
# __C.train.batch_size = 128 ##CHANGED
__C.train.batch_size = 5


""" Train-misreport params """
# Cache-misreports after misreport optimization
__C.train.adv_reuse = True
# Number of misreport initialization for training
__C.train.num_misreports = 1
# Number of steps for misreport computation
__C.train.gd_iter = 25
# Learning rate of misreport computation
__C.train.gd_lr = 0.1

""" Lagrange Optimization params """
# Initial update rate
__C.train.update_rate = 1.0
# Initial Lagrange weights
__C.train.w_rgt_init_val = 5.0
# Lagrange update frequency
# __C.train.update_frequency = 100 ##CHANGED
__C.train.update_frequency = 10
# Value by which update rate is incremented
__C.train.up_op_add = 50.0
# Frequency at which update rate is incremented
__C.train.up_op_frequency = 10000


""" train summary and save params"""
# Number of models to store on disk
__C.train.max_to_keep = 25
# Frequency at which models are saved-
__C.train.save_iter = 20000 
# Train stats print frequency
# __C.train.print_iter = 1000   ##changed
__C.train.print_iter = 10
   

""" Validation params """
__C.val = edict()
# Number of steps for misreport computation
# __C.val.gd_iter = 2000   ##changed
__C.val.gd_iter = 20
# Learning rate for misreport computation
__C.val.gd_lr = 0.1
# Number of validation batches
__C.val.num_batches = 20
# Frequency at which validation is performed
# __C.val.print_iter = 10000   ##changed
__C.val.print_iter = 10
# Validation data frequency
__C.val.data = "fixed"

""" Test params """
# Test set
__C.test = edict()
# Test Seed
__C.test.seed = 100
# Model to be evaluated
__C.test.restore_iter = 400000
# Number of misreports
__C.test.num_misreports = 1000
# Number of steps for misreport computation
__C.test.gd_iter = 2000
# Learning rate for misreport computation
__C.test.gd_lr = 0.1
# Test data
__C.test.data = "online"
# Number of test batches
__C.test.num_batches = 100
# Test batch size
__C.test.batch_size = 100
# Save Ouput
__C.test.save_output = False


# Fixed Val params
__C.val.batch_size = __C.train.batch_size
__C.val.num_misreports = __C.train.num_misreports

# Compute number of samples
__C.train.num_instances = __C.train.num_batches * __C.train.batch_size
__C.val.num_instances = __C.val.num_batches * __C.val.batch_size
__C.test.num_instances = __C.test.num_batches * __C.test.batch_size


In [None]:
## Create Data

## data n_samples x n_agents x n_items
## n_samples x 1 x 2

def generate_random_X(shape):
  return np.random.rand(*shape)

def generate_random_ADV(shape):
    return np.random.rand(*shape)

def preprocessdata(data):
  return torch.squeeze(data,1)




In [None]:
## Create Net

class RegretNet(nn.Module):
    def __init__(self,  config):
        super(RegretNet, self).__init__()
        self.num_items = config.num_items
        self.num_agents = config.num_agents
        self.num_a_units = config.net.num_a_units
        self.num_a_layers = config.net.num_a_layers
        self.num_a_activation =  config.net.num_a_activation
        self.num_p_units = config.net.num_p_units
        self.num_p_layers = config.net.num_p_layers
        self.num_p_activation =  config.net.num_p_activation
        self.num_misreports = config.train.num_misreports
        
        # self.u_shape = [self.num_agents, config.train.num_misreports, config.train.batch_size, self.num_agents]

        self.relu = nn.ReLU()

        self.update_rate = config.train.update_rate

        self.w_rgt_init_val =  config.train.w_rgt_init_val

        self.w_rgt = np.ones(self.num_agents).astype(np.float32) * self.w_rgt_init_val
        self.w_rgt  = torch.tensor(self.w_rgt)

        print('   w_rgt  ', self.w_rgt)


        self.activation = {'sigmoid': nn.Sigmoid() , 'tanh' : nn.Tanh() , 'softmax' : nn.Softmax()  }
        
        ## Layers
        self.allocationNetwork = nn.ModuleList([ nn.Linear(  self.num_a_units[i] , self.num_a_units[i+1]  )   for i in range(self.num_a_layers)])
        self.paymentNetwork = nn.ModuleList([ nn.Linear(  self.num_p_units[i] , self.num_p_units[i+1]  )   for i in range(self.num_p_layers)])
        
        for i in range(self.num_a_layers) : 
          torch.nn.init.xavier_uniform(self.allocationNetwork[i].weight) 
          self.allocationNetwork[i].bias.data.fill_(0.00) 
        for i in range(self.num_p_layers) :
          torch.nn.init.xavier_uniform(self.paymentNetwork[i].weight)
          self.paymentNetwork[i].bias.data.fill_(0.00) 


    def forward(self, x):
      allocation = x
      payment = x
      for i in range(self.num_a_layers):
        allocation = self.allocationNetwork[i](allocation)
        allocation =  self.activation[self.num_a_activation[i]](allocation)
      
      for i in range(self.num_p_layers):
        payment = self.paymentNetwork[i](payment)
        payment =  self.activation[self.num_p_activation[i]](payment)

      n_samples = allocation.shape[0]
      allocXval = torch.reshape(allocation * x, (n_samples,  self.num_agents ,self.num_items  ))
      payment = payment *  torch.sum( allocXval, dim=2)  ## summing for each agent, over all items
      return allocation , payment

    def compute_rev(self, payment):
      return torch.mean(torch.sum(payment, dim=1))

    def compute_utility(self, x, allocation, payment):
      n_samples = x.shape[0]
      # x = x.reshape(n_samples, self.num_agents , self.num_items)
      allocXval = torch.reshape(allocation * x, (n_samples,  self.num_agents , self.num_items  ))

      utility = torch.sum( allocXval, dim=2) - payment
      return utility
    
    def compute_regret(self,x, misreports, utility_true):
      n_samples = misreports.shape[1]
      misreports_allocation = []
      misreports_payments = []
      for i in range(self.num_misreports):
        a , p = net.forward(preprocessdata(misreports[i]))
        misreports_allocation.append(a)
        misreports_payments.append(p)
      misreports_allocation = torch.stack(misreports_allocation)
      misreports_payments = torch.stack(misreports_payments)
      
      misreports_utility = [ net.compute_utility(x, misreports_allocation[i],  misreports_payments[i] ) for i in range(self.num_misreports)]
      misreports_utility = torch.stack(misreports_utility)

      difference = self.relu(misreports_utility - utility_true)
      maxdifference , indices= torch.max(difference , dim=0)
      regret = torch.mean(maxdifference, dim=1)
      return  regret , misreports_utility

    def loss_function(self, allocation, payment , train_data , train_misreports_data ):
      revenue = self.compute_rev(payment)
      utility = self.compute_utility(train_data, allocation, payment)
      regret , misreports_utility = self.compute_regret(train_data, train_misreports_data , utility)

      print(' revenue is ', revenue)

      rgt_mean = torch.mean(regret) 
      irp_mean = torch.mean(self.relu(-utility))
      rgt_penalty = self.update_rate * torch.sum(torch.square(regret)) / 2.0 
      
      lag_loss = torch.sum(self.w_rgt * regret)
        
      loss_1 = -revenue + rgt_penalty + lag_loss
      loss_2 = -torch.sum(misreports_utility)
      loss_3 = -lag_loss

            #       reg_losses = tf.get_collection('reg_losses')
            # if len(reg_losses) > 0:
            #     reg_loss_mean = tf.reduce_mean(reg_losses)
            #     loss_1 = loss_1 + reg_loss_mean

      self.metrics = [revenue, rgt_mean, rgt_penalty, lag_loss, loss_1, torch.mean(self.w_rgt), self.update_rate]
      self.metric_names = ["Revenue", "Regret", "Reg_Loss", "Lag_Loss", "Net_Loss", "w_rgt_mean", "update_rate"]


      return  loss_1 , loss_2 , loss_3 



In [None]:
## Create train , val , test


np.random.seed(cfg.train.seed)
net = RegretNet(cfg)

train_data_shape = [cfg.train.num_instances , cfg.num_agents, cfg.num_items]
train_adv_shape = [cfg.train.num_misreports, cfg.train.num_instances, cfg.num_agents, cfg.num_items]

train_data = generate_random_X(train_data_shape)
train_data = torch.from_numpy(train_data).float()
train_data = preprocessdata(train_data)

train_misreports_data = generate_random_ADV(train_adv_shape)
train_misreports_data = torch.from_numpy(train_misreports_data).float()

# temp =  [train_misreports_data]
temp = [torch.tensor(train_misreports_data,requires_grad=True  )]
optimizer_1  = optim.Adam(net.parameters(), cfg.train.learning_rate)
optimizer_2 = optim.Adam(temp, lr =  cfg.train.gd_lr )


allocation , payment  = net(train_data)

loss1, loss2,loss3 = net.loss_function(allocation, payment ,  train_data,  train_misreports_data)

print(' Update for Misreports')            
for _ in range(cfg.train.gd_iter):
  print(_)
  optimizer_2.zero_grad()
  loss1, loss2,loss3 = net.loss_function(allocation, payment ,  train_data,  temp[0])
  loss2.backward()
  optimizer_2.step()
  temp[0].data.clamp_(0,1)
  print()
  print(temp)
  print()

print('___________________________________________________')
for i in range(len(net.metric_names)):
  print( net.metric_names[i] , ' : ' , net.metrics[i] )
print('___________________________________________________')


# optimizer_1.zero_grad()
# loss1.backward()
# optimizer_1.step()
# allocation , payment  = net(train_data)


# optimizer.zero_grad()
#     output = model(input)
#     loss = loss_fn(output, target)
#     loss.backward()
#     optimizer.step()


# loss1, loss2,loss3 = net.loss_function(allocation, payment ,  train_data,  train_misreports_data)
# optimizer_1.zero_grad()
# loss1.backward()
# r = optimizer_1.step()
# print(' loss 1 ', loss1 , ' loss 2 ', loss2 , ' loss 3 ', loss3)

# print('________________________')
# print(train_misreports_data)
# print('________________________')

  

            # Optimizer


# print('___________________________________________________')
# for i in range(len(net.metric_names)):
#   print( net.metric_names[i] , ' : ' , net.metrics[i] )
# print('___________________________________________________')

# revenue = net.compute_rev(payment)
# utility = net.compute_utility(train_data, allocation, payment)

# regret = net.compute_regret(train_misreports_data , utility)



### update loss and check if that is also fine
### then for on batches
### then save model , and 


   w_rgt   tensor([5.])
 revenue is  tensor(0.2515, grad_fn=<MeanBackward0>)
 Update for Misreports
0
 revenue is  tensor(0.2515, grad_fn=<MeanBackward0>)

[tensor([[[[0.0000, 0.8699]],

         [[0.7324, 0.1123]],

         [[0.0818, 0.0834]],

         [[0.2042, 0.4248]],

         [[0.3319, 0.1912]]]], requires_grad=True)]

1
 revenue is  tensor(0.2515, grad_fn=<MeanBackward0>)

[tensor([[[[0.0000, 0.7699]],

         [[0.6324, 0.0123]],

         [[0.0000, 0.0000]],

         [[0.1042, 0.3247]],

         [[0.2319, 0.0912]]]], requires_grad=True)]

2
 revenue is  tensor(0.2515, grad_fn=<MeanBackward0>)

[tensor([[[[0.0000, 0.6698]],

         [[0.5324, 0.0000]],

         [[0.0000, 0.0000]],

         [[0.0042, 0.2246]],

         [[0.1320, 0.0000]]]], requires_grad=True)]

3
 revenue is  tensor(0.2515, grad_fn=<MeanBackward0>)

[tensor([[[[0.0000, 0.5695]],

         [[0.4324, 0.0000]],

         [[0.0000, 0.0000]],

         [[0.0000, 0.1245]],

         [[0.0320, 0.0000]]]], re



In [None]:
n_samples = 2
n_items = 2
n_agents = 3
allocation = torch.tensor([[0.4 , 0.2 , 0.5 , 0.6, 0.7,0.4 ], [0.1 , 0.3 , 0.7 , 0.2,0.1,0.2 ] ])
valuation = torch.tensor([[0.1 , 0.2 , 0.5 , 0.3 , 0.8,0.6], [0.54 , 0.2 , 0.7 , 0.8, 0.8,0.9 ] ])

payment = torch.tensor([[0.13 , 0.33, 0.6], [0.76, 0.65, 0.6]])
allocXval = torch.reshape(allocation * valuation, (n_samples,  n_agents , n_items  ))
# payment = payment *  torch.sum( allocXval, dim=2)
utility =  torch.sum( allocXval, dim=2) - payment

print('________________________________________________________________')
print(utility)
print('________________________________________________________________')

v_misreports = torch.tensor([ [[0.3 , 0.24 , 0.25 , 0.38 , 0.38,0.16], [0.5 , 0.12 , 0.47 , 0.28, 0.81,0.5 ] ],
                           [[0.5 , 0.25 , 0.25 , 0.73 , 0.877,0.61], [0.4 , 0.22 , 0.37 , 0.38, 0.5,0.3 ] ]
])

a_misreports = torch.tensor([ [[0.3 , 0.24 , 0.25 , 0.38 , 0.38,0.16], [0.5 , 0.12 , 0.47 , 0.28, 0.81,0.5 ] ],
                           [[0.5 , 0.25 , 0.25 , 0.73 , 0.877,0.61], [0.4 , 0.22 , 0.37 , 0.38, 0.5,0.3 ] ]
])

p_misreports = torch.tensor([ [[0.7 , 0.7 , 0.7  ], [0.5 , 0.12 , 0.47 , ] ],
                           [[0.5 , 0.25 , 0.25], [0.4 , 0.22 , 0.37  ] ]
])

n_misreports = 2

allocXvalu_misreports = torch.reshape(a_misreports * v_misreports, (n_misreports, n_samples,  n_agents , n_items  ))
u_misreports =  torch.sum( allocXvalu_misreports, dim=3) - p_misreports
print('________________________________________________________________')
print(u_misreports)
print('________________________________________________________________')


relu = nn.ReLU()
difference = relu(u_misreports - utility)
print('________________________________________________________________')
print(difference)
print('________________________________________________________________')

maxdiff , indices= torch.max(difference , dim=0)
print('________________________________________________________________')
print(maxdiff)
print('________________________________________________________________')

regret = torch.mean(maxdiff, dim=1)
print('________________________________________________________________')
print(regret)
print('________________________________________________________________')


# utility2 = [ net.compute_utility(v_misreports[i], a_misreports[i],  p_misreports[i] ) for i in range(n_misreports)]
# print('________________________________________________________________')
# utility2 = torch.stack(utility2)
# print(utility2)
# print('________________________________________________________________')


# print(misreports.shape)




# print('payment')
# print(payment)
# print('__________________________________________')
# print(' Revenue ', torch.mean(torch.sum(payment, dim=1)))
# print('__________________________________________')
# utility =  torch.sum( allocXval, dim=2) - payment
# print('__________________________________________')
# print(utility)
# print('__________________________________________')


# print('allocation')
# print(allocation)
# print('__________________________________________')

# print('valuation')
# print(valuation)
# print('__________________________________________')

# print('__________________________________________')
# print(torch.sum(allocXval, dim=2))
# print('__________________________________________')



