In [1]:
from utils import * 
from algorithms import *
from agents import *

In [2]:
from copy import deepcopy
class Log:
    def __init__(self, gameData,disclosure,depth):
        self.gameData = gameData
        self.gameTypes, self.numBuyers, self.numSellers, self.numTokens, self.numRounds, self.numPeriods, self.numSteps, self.seed = gameData
        self.stepData = pd.DataFrame(columns=['rnd', 'period', 'step', 'currentBid', 'currentAsk', 'buy', 'sell', 'price', 
                                              'sale',
                                              'currentBidIdx_0', 'currentBidIdx_1', 'currentBidIdx_2', 'currentBidIdx_3',
                                              'currentAskIdx_0', 'currentAskIdx_1', 'currentAskIdx_2', 'currentAskIdx_3',
                                              'bids_0', 'bids_1', 'bids_2', 'bids_3',
                                              'asks_0', 'asks_1', 'asks_2', 'asks_3', 'bprofit', 'sprofit'])
        self.stepDataNorm = pd.DataFrame(columns=['step', 'currentBid', 'currentAsk', 'buy', 'sell', 'price', 'sale',
                                              'currentBidIdx_0', 'currentBidIdx_1', 'currentBidIdx_2', 'currentBidIdx_3',
                                              'currentAskIdx_0', 'currentAskIdx_1', 'currentAskIdx_2', 'currentAskIdx_3',
                                              'bids_0', 'bids_1', 'bids_2', 'bids_3',
                                              'asks_0', 'asks_1', 'asks_2', 'asks_3'])
        self.roundData = pd.DataFrame(columns=['rnd', 'buyerValues', 'sellerCosts', 'demand', 'supply', 'prices', 'peq', 'qeq', 
                                              'buyerReservationPrices', 'sellerReservationPrices', 'buyerSurplus', 'sellerSurplus', 
                                              'totalSurplus', 'buyerSurplusFrac', 'sellerSurplusFrac'])        
        self.disclosure = disclosure
        self.depth = depth
        self.meanPublicData = pd.read_csv('meanPublicData.csv').values.reshape(-1)
        self.stdPublicData = pd.read_csv('stdPublicData.csv').values.reshape(-1)
        self.meanInternalData = pd.read_csv('meanInternalData.csv').values.reshape(-1)
        self.stdInternalData = pd.read_csv('stdInternalData.csv').values.reshape(-1)

    def addStep(self, stepData):
        self.stepData.loc[len(self.stepData.index)] = stepData
        stepData = (np.array(stepData[2:-2]) - self.meanPublicData)/(self.stdPublicData + 1e-8)
        self.stepDataNorm.loc[len(self.stepDataNorm.index)] = stepData
        self.stepDataNorm[['bids_0','bids_1', 'bids_2', 'bids_3']] = self.stepDataNorm[['bids_0','bids_1', 'bids_2', 'bids_3']].fillna(-5)
        self.stepDataNorm[['asks_0','asks_1', 'asks_2', 'asks_3']] = self.stepDataNorm[['asks_0','asks_1', 'asks_2', 'asks_3']].fillna(5)
        self.stepDataNorm[['price', 'sale']] = self.stepDataNorm[['price', 'sale']].fillna(-5)

    def addRound(self, roundData):
        self.roundData.loc[len(self.roundData.index)] = roundData

    def generateState(self, agent, cnt, numStates):
        if cnt >= self.depth:
            publicDataNorm = self.stepDataNorm[self.disclosure].iloc[-self.depth:]
            internalData = np.array([[agent.periodTrades, agent.roundTokens[0], agent.roundTokens[1], agent.roundTokens[2], agent.roundTokens[3]]])
            internalDataNorm = (internalData - self.meanInternalData)/(self.stdInternalData+1e-8)
            state = publicDataNorm.values.reshape(-1).tolist() + internalDataNorm.reshape(-1).tolist()
            return state
        else:
            return [0.0]*numStates
        
    def disclose(self):
        return self.stepData[self.disclosure].iloc[-1]
        
    def getPeriod(self, rnd, period):
        return self.stepData[(self.stepData.rnd==rnd) & (self.stepData.period==period)]
        
    def getPeriodActivity(self, rnd, period):
        periodData = self.getPeriod(rnd, period)
        periodBids = list(periodData.bids)
        periodAsks = list(periodData.asks)
        periodPrices = list(periodData.price)
        periodSales = np.cumsum(np.where(periodData.price > 0,1,0))
        return np.array(periodBids), np.array(periodAsks), np.array(periodPrices), np.array(periodSales)

    def getRound(self, rnd):
        return self.roundData[(self.roundData.rnd==rnd)]

    def getRoundList(self, rnd):
        return self.getRound(rnd).values.tolist()[0][1:]

    def findCurrentTraders(self):
        df = self.stepData
        self.stepData['currentBidIdx'] = df[['currentBidIdx_0', 'currentBidIdx_1', 'currentBidIdx_2', 'currentBidIdx_3']].idxmax(axis=1).str.extract(r'(\d+)').astype(int)
        self.stepData['currentAskIdx'] = df[['currentAskIdx_0', 'currentAskIdx_1', 'currentAskIdx_2', 'currentAskIdx_3']].idxmax(axis=1).str.extract(r'(\d+)').astype(int)
        

In [3]:
# Setup game parameters
numRounds, numPeriods, numSteps = 100, 1, 16
numBuyers, numSellers, numTokens = 4, 4, 4
gameTypes, seed = '1111', None
gameData = [gameTypes, numBuyers, numSellers, numTokens, numRounds, numPeriods, numSteps, seed]
disclosure = ['step', 'currentBid', 'currentAsk', 'buy', 'sell', 'price', 'sale',
              'currentBidIdx_0', 'currentBidIdx_1', 'currentBidIdx_2', 'currentBidIdx_3',
              'currentAskIdx_0', 'currentAskIdx_1', 'currentAskIdx_2', 'currentAskIdx_3',
              'bids_0', 'bids_1', 'bids_2', 'bids_3',
              'asks_0', 'asks_1', 'asks_2', 'asks_3']
depth = 1
numStates = 32
buyers = [ZeroIntelligence(gameData, disclosure, index=0, buyer=1, reinforcer=0),
          ZeroIntelligence(gameData, disclosure, index=0, buyer=1, reinforcer=0),
          ZeroIntelligence(gameData, disclosure, index=0, buyer=1, reinforcer=0),
          ZeroIntelligence(gameData, disclosure, index=0, buyer=1, reinforcer=0)]
sellers = [ZeroIntelligence(gameData, disclosure, index=0, buyer=0, reinforcer=0),
           ZeroIntelligence(gameData, disclosure, index=0, buyer=0, reinforcer=0),
           ZeroIntelligence(gameData, disclosure, index=0, buyer=0, reinforcer=0),
           ZeroIntelligence(gameData, disclosure, index=0, buyer=0, reinforcer=0)]
log = Log(gameData, disclosure, depth)
print_interval = numPeriods
state_record = []
state_record2 = []
reward_record = []
rnd = 0
cnt = 0

In [4]:
# Training loop
for rnd in range(numRounds):
    print(rnd)
    startRounds(gameData, log, buyers, sellers, rnd)
    for period in range(numPeriods):
        startPeriods(buyers, sellers)
        period_bids = []
        for step in range(numSteps):
            startSteps(buyers, sellers)

            buyers[0].state = log.generateState(buyers[0], cnt, numStates)
            sellers[0].state = log.generateState(sellers[0], cnt, numStates)

            bids, asks = collectOffers(buyers, sellers)
            period_bids.append(np.round(bids[0]))
            currentAsk, currentAskIdx, currentBid, currentBidIdx = bestOffers(bids, asks)
            price, buy, sell = trade(buyers, sellers, currentAsk, currentAskIdx, currentBid, currentBidIdx)
            
            bprofit, sprofit = 0, 0
            if price > 0:
                buyers[currentBidIdx].transact(price)
                sellers[currentAskIdx].transact(price)
                bprofit = buyers[currentBidIdx].stepProfits
                sprofit = sellers[currentAskIdx].stepProfits
                
            log.addStep([rnd, period, step, currentBid, currentAsk,  buy, sell, price, price>0,
                         currentBidIdx==0, currentBidIdx==1, currentBidIdx==2, currentBidIdx==3, 
                         currentAskIdx==0, currentAskIdx==1, currentAskIdx==2, currentAskIdx==3, 
                         bids[0], bids[1], bids[2], bids[3], 
                         asks[0], asks[1], asks[2], asks[3], 
                         bprofit, sprofit])
            observe(buyers, sellers, log.disclose())
            endSteps(buyers, sellers)
            buyers[0].newState = log.generateState(buyers[0], cnt, numStates)
            sellers[0].newState = log.generateState(sellers[0], cnt, numStates)
            cnt += 1
            if step == numSteps - 1:
                buyers[0].done = True
                sellers[0].done = True
            else:
                buyers[0].done = False
                sellers[0].done = False

            agentsObserve(buyers, sellers)
            agentsTrain(buyers, sellers)
            state_record.append([step, currentBid, currentAsk,  buy, sell, price, price>0,
                         currentBidIdx==0, currentBidIdx==1, currentBidIdx==2, currentBidIdx==3, 
                         currentAskIdx==0, currentAskIdx==1, currentAskIdx==2, currentAskIdx==3, 
                         bids[0], bids[1], bids[2], bids[3], 
                         asks[0], asks[1], asks[2], asks[3], buyers[0].periodTrades, buyers[0].roundTokens[0],buyers[0].roundTokens[1],buyers[0].roundTokens[2],buyers[0].roundTokens[3]])
            state_record2.append([step, currentBid, currentAsk,  buy, sell, price, price>0,
                         currentBidIdx==0, currentBidIdx==1, currentBidIdx==2, currentBidIdx==3, 
                         currentAskIdx==0, currentAskIdx==1, currentAskIdx==2, currentAskIdx==3, 
                         bids[0], bids[1], bids[2], bids[3], 
                         asks[0], asks[1], asks[2], asks[3], sellers[0].periodTrades, sellers[0].roundTokens[0],sellers[0].roundTokens[1],sellers[0].roundTokens[2],sellers[0].roundTokens[3]])
        endPeriods(buyers, sellers)
    endRounds(buyers, sellers)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [9]:
# Collect states
states = np.array(state_record)
states2 = np.array(state_record2)
print(states.shape, states2.shape)

(1600, 28) (1600, 28)


In [11]:
# common states
cols = ['step', 'currentBid', 'currentAsk', 'buy', 'sell', 'price', 'sale',
                                              'currentBidIdx_0', 'currentBidIdx_1', 'currentBidIdx_2', 'currentBidIdx_3',
                                              'currentAskIdx_0', 'currentAskIdx_1', 'currentAskIdx_2', 'currentAskIdx_3',
                                              'bids_0', 'bids_1', 'bids_2', 'bids_3',
                                              'asks_0', 'asks_1', 'asks_2', 'asks_3']
mean_df = pd.DataFrame(np.nanmean(states,axis=0)[:-5].reshape(1,-1), columns = cols)
mean_df.to_csv('data/meanPublicData.csv', index=False)
std_df = pd.DataFrame(np.nanstd(states,axis=0)[:-5].reshape(1,-1), columns = cols)
std_df.to_csv('data/stdPublicData.csv', index=False)

# internal states buyer
cols = ['trades', 'token0', 'token1', 'token2', 'token3']
mean_df = pd.DataFrame(np.nanmean(states,axis=0)[-5:].reshape(1,-1), columns = cols)
mean_df.to_csv('data/meanInternalData_buyers.csv', index=False)
std_df = pd.DataFrame(np.nanstd(states,axis=0)[-5:].reshape(1,-1), columns = cols)
std_df.to_csv('data/stdInternalData_buyers.csv', index=False)

# internal states seller
cols = ['trades', 'token0', 'token1', 'token2', 'token3']
mean_df = pd.DataFrame(np.nanmean(states2,axis=0)[-5:].reshape(1,-1), columns = cols)
mean_df.to_csv('data/meanInternalData_sellers.csv', index=False)
std_df = pd.DataFrame(np.nanstd(states2,axis=0)[-5:].reshape(1,-1), columns = cols)
std_df.to_csv('data/stdInternalData_sellers.csv', index=False)
