In [None]:
from utils import * 
from algorithms import *
from agents import *

In [None]:
from copy import deepcopy
class Log:
    def __init__(self, gameData,disclosure,depth):
        self.gameData = gameData
        self.gameTypes, self.numBuyers, self.numSellers, self.numTokens, self.numRounds, self.numPeriods, self.numSteps, self.seed = gameData
        self.stepData = pd.DataFrame(columns=['rnd', 'period', 'step', 'currentBid', 'currentAsk', 'buy', 'sell', 'price', 
                                              'sale',
                                              'currentBidIdx_0', 'currentBidIdx_1', 'currentBidIdx_2', 'currentBidIdx_3',
                                              'currentAskIdx_0', 'currentAskIdx_1', 'currentAskIdx_2', 'currentAskIdx_3',
                                              'bids_0', 'bids_1', 'bids_2', 'bids_3',
                                              'asks_0', 'asks_1', 'asks_2', 'asks_3', 'bprofit', 'sprofit'])
        self.stepDataNorm = pd.DataFrame(columns=['step', 'currentBid', 'currentAsk', 'buy', 'sell', 'price', 'sale',
                                              'currentBidIdx_0', 'currentBidIdx_1', 'currentBidIdx_2', 'currentBidIdx_3',
                                              'currentAskIdx_0', 'currentAskIdx_1', 'currentAskIdx_2', 'currentAskIdx_3',
                                              'bids_0', 'bids_1', 'bids_2', 'bids_3',
                                              'asks_0', 'asks_1', 'asks_2', 'asks_3'])
        self.roundData = pd.DataFrame(columns=['rnd', 'buyerValues', 'sellerCosts', 'demand', 'supply', 'prices', 'peq', 'qeq', 
                                              'buyerReservationPrices', 'sellerReservationPrices', 'buyerSurplus', 'sellerSurplus', 
                                              'totalSurplus', 'buyerSurplusFrac', 'sellerSurplusFrac'])        
        self.disclosure = disclosure
        self.depth = depth
        self.meanPublicData = pd.read_csv('meanPublicData.csv').values.reshape(-1)
        self.stdPublicData = pd.read_csv('stdPublicData.csv').values.reshape(-1)
        self.meanInternalData = pd.read_csv('meanInternalData.csv').values.reshape(-1)
        self.stdInternalData = pd.read_csv('stdInternalData.csv').values.reshape(-1)

    def addStep(self, stepData):
        self.stepData.loc[len(self.stepData.index)] = stepData
        stepData = (np.array(stepData[2:-2]) - self.meanPublicData)/(self.stdPublicData + 1e-8)
        self.stepDataNorm.loc[len(self.stepDataNorm.index)] = stepData
        self.stepDataNorm[['bids_0','bids_1', 'bids_2', 'bids_3']] = self.stepDataNorm[['bids_0','bids_1', 'bids_2', 'bids_3']].fillna(-5)
        self.stepDataNorm[['asks_0','asks_1', 'asks_2', 'asks_3']] = self.stepDataNorm[['asks_0','asks_1', 'asks_2', 'asks_3']].fillna(5)
        self.stepDataNorm[['price', 'sale']] = self.stepDataNorm[['price', 'sale']].fillna(-5)

    def addRound(self, roundData):
        self.roundData.loc[len(self.roundData.index)] = roundData

    def generateState(self, agent, cnt, numStates):
        if cnt >= self.depth:
            publicDataNorm = self.stepDataNorm[self.disclosure].iloc[-self.depth:]
            internalData = np.array([[agent.periodTrades, agent.roundTokens[0], agent.roundTokens[1], agent.roundTokens[2], agent.roundTokens[3]]])
            internalDataNorm = (internalData - self.meanInternalData)/(self.stdInternalData+1e-8)
            state = publicDataNorm.values.reshape(-1).tolist() + internalDataNorm.reshape(-1).tolist()
            print(state)
            return state
        else:
            return [0.0]*numStates
        
    def disclose(self):
        return self.stepData[self.disclosure].iloc[-1]
        
    def getPeriod(self, rnd, period):
        return self.stepData[(self.stepData.rnd==rnd) & (self.stepData.period==period)]
        
    def getPeriodActivity(self, rnd, period):
        periodData = self.getPeriod(rnd, period)
        periodBids = list(periodData.bids)
        periodAsks = list(periodData.asks)
        periodPrices = list(periodData.price)
        periodSales = np.cumsum(np.where(periodData.price > 0,1,0))
        return np.array(periodBids), np.array(periodAsks), np.array(periodPrices), np.array(periodSales)

    def getRound(self, rnd):
        return self.roundData[(self.roundData.rnd==rnd)]

    def getRoundList(self, rnd):
        return self.getRound(rnd).values.tolist()[0][1:]

    def findCurrentTraders(self):
        df = self.stepData
        self.stepData['currentBidIdx'] = df[['currentBidIdx_0', 'currentBidIdx_1', 'currentBidIdx_2', 'currentBidIdx_3']].idxmax(axis=1).str.extract(r'(\d+)').astype(int)
        self.stepData['currentAskIdx'] = df[['currentAskIdx_0', 'currentAskIdx_1', 'currentAskIdx_2', 'currentAskIdx_3']].idxmax(axis=1).str.extract(r'(\d+)').astype(int)
        

In [47]:
buyers[0].state

[3.3074754516431075,
 -3.1512128898849205,
 0.8005870050672768,
 0.0,
 -12.87115471753893,
 -1.2101800897731843,
 0.0,
 1.7320507675688783,
 -0.5788898592897574,
 -0.5619514737911224,
 -0.5912086343754535,
 1.7274442414662483,
 -0.5665721031281139,
 -0.5634918294041426,
 -0.6004533298668724,
 -1.0609974134129248,
 -5.0,
 -1.3882105308727541,
 -5.0,
 -0.4692353140852919,
 1.7047658206546161,
 5.0,
 5.0,
 2.430666668890428,
 0.2437797236313841,
 0.06281408490897304,
 0.6060200194677918,
 -3.0636546817258115]

In [None]:
numRounds, numPeriods, numSteps = 1000, 1, 16
numBuyers, numSellers, numTokens = 4, 4, 4
gameTypes, seed = '1111', None
gameData = [gameTypes, numBuyers, numSellers, numTokens, numRounds, numPeriods, numSteps, seed]
disclosure = ['step', 'currentBid', 'currentAsk', 'buy', 'sell', 'price', 'sale',
              'currentBidIdx_0', 'currentBidIdx_1', 'currentBidIdx_2', 'currentBidIdx_3',
              'currentAskIdx_0', 'currentAskIdx_1', 'currentAskIdx_2', 'currentAskIdx_3',
              'bids_0', 'bids_1', 'bids_2', 'bids_3',
              'asks_0', 'asks_1', 'asks_2', 'asks_3']
depth = 1
numStates = 32
buyers = [ZeroIntelligence(gameData, disclosure, index=0, buyer=1, reinforcer=0),
          ZeroIntelligence(gameData, disclosure, index=0, buyer=1, reinforcer=0),
          ZeroIntelligence(gameData, disclosure, index=0, buyer=1, reinforcer=0),
          ZeroIntelligence(gameData, disclosure, index=0, buyer=1, reinforcer=0)]
sellers = [ZeroIntelligence(gameData, disclosure, index=0, buyer=0, reinforcer=0),
           ZeroIntelligence(gameData, disclosure, index=0, buyer=0, reinforcer=0),
           ZeroIntelligence(gameData, disclosure, index=0, buyer=0, reinforcer=0),
           ZeroIntelligence(gameData, disclosure, index=0, buyer=0, reinforcer=0)]
log = Log(gameData, disclosure, depth)
rnd = 0
state_record = []
reward_record = []
cnt = 0
print_interval = 100
for rnd in range(numRounds):
    startRounds(gameData, log, buyers, sellers, rnd)
    #print(log.roundData.tail(1).buyerValues.item())
    #print(log.roundData.tail(1).sellerCosts.item())
    score1, score2 = 0.0, 0.0
    ss1, ss2 = 0.0, 0.0
    for period in range(numPeriods):
        startPeriods(buyers, sellers)
        period_bids = []
        for step in range(numSteps):
            # start
            startSteps(buyers, sellers)

            # set state
            buyers[0].state = log.generateState(buyers[0], cnt, numStates)

            # trade
            bids, asks = collectOffers(buyers, sellers)
            period_bids.append(np.round(bids[0]))
            currentAsk, currentAskIdx, currentBid, currentBidIdx = bestOffers(bids, asks)
            price, buy, sell = trade(buyers, sellers, currentAsk, currentAskIdx, currentBid, currentBidIdx)
            bprofit, sprofit = 0, 0
            if price > 0:
                buyers[currentBidIdx].transact(price)
                sellers[currentAskIdx].transact(price)
                bprofit = buyers[currentBidIdx].stepProfits
                sprofit = sellers[currentAskIdx].stepProfits
            if currentBidIdx == 0:
                score1 += bprofit
                ss1 += bprofit
            elif currentBidIdx == 1:
                score2 += bprofit
                ss2 += bprofit
                
            log.addStep([rnd, period, step, currentBid, currentAsk,  buy, sell, price, price>0,
                         currentBidIdx==0, currentBidIdx==1, currentBidIdx==2, currentBidIdx==3, 
                         currentAskIdx==0, currentAskIdx==1, currentAskIdx==2, currentAskIdx==3, 
                         bids[0], bids[1], bids[2], bids[3], 
                         asks[0], asks[1], asks[2], asks[3], 
                         bprofit, sprofit])
            observe(buyers, sellers, log.disclose())
            endSteps(buyers, sellers)
            buyers[0].newState = log.generateState(buyers[0], cnt, numStates)
            cnt += 1
            if step == numSteps - 1:
                buyers[0].done = True
            else:
                buyers[0].done = False

            agentsObserve(buyers, sellers)
            agentsTrain(buyers, sellers)
            state_record.append([step, currentBid, currentAsk,  buy, sell, price, price>0,
                         currentBidIdx==0, currentBidIdx==1, currentBidIdx==2, currentBidIdx==3, 
                         currentAskIdx==0, currentAskIdx==1, currentAskIdx==2, currentAskIdx==3, 
                         bids[0], bids[1], bids[2], bids[3], 
                         asks[0], asks[1], asks[2], asks[3], buyers[0].periodTrades, buyers[0].roundTokens[0],buyers[0].roundTokens[1],buyers[0].roundTokens[2],buyers[0].roundTokens[3]])
        endPeriods(buyers, sellers)
        if period%print_interval==0:
            print('\t',ss1/print_interval, ss2/print_interval, )# round(buyers[0].algo.std,2), period_bids)
            ss1 = 0
            ss2 = 0
        if period%print_interval==0:
            print(step, round(score1 / print_interval, 2), round(score2 / 100, 2), )# round(buyers[0].algo.mean, 2), round(buyers[0].algo.std, 2), round(buyers[0].algo.action.item(), 2), period_bids)
            score1 = 0.0
            score2 = 0.0
        
    endRounds(buyers, sellers)

In [None]:
buyers[0].state

In [None]:
log.findCurrentTraders()
log.stepDataNorm.tail(500)

In [None]:
log.graphLearning(rolling_window = 1, trackBuyersIdx=[0], trackSellersIdx=[],  type = 'rnd')

In [None]:
log.graphLearning(rolling_window = 10, trackBuyersIdx=[0], trackSellersIdx=[0])

In [None]:
states = np.array(state_record)


In [None]:
states = np.array(state_record)
print(states.shape)
cols = ['step', 'currentBid', 'currentAsk', 'buy', 'sell', 'price', 'sale',
                                              'currentBidIdx_0', 'currentBidIdx_1', 'currentBidIdx_2', 'currentBidIdx_3',
                                              'currentAskIdx_0', 'currentAskIdx_1', 'currentAskIdx_2', 'currentAskIdx_3',
                                              'bids_0', 'bids_1', 'bids_2', 'bids_3',
                                              'asks_0', 'asks_1', 'asks_2', 'asks_3']
mean_df = pd.DataFrame(np.nanmean(states,axis=0)[:-5].reshape(1,-1), columns = cols)
mean_df.to_csv('meanPublicData.csv', index=False)
print(mean_df.shape)
std_df = pd.DataFrame(np.nanstd(states,axis=0)[:-5].reshape(1,-1), columns = cols)
std_df.to_csv('stdPublicData.csv', index=False)
print(std_df.shape)

In [None]:
mean_df

In [None]:
cols = ['trades', 'token0', 'token1', 'token2', 'token3']
mean_df = pd.DataFrame(np.nanmean(states,axis=0)[-5:].reshape(1,-1), columns = cols)
mean_df.to_csv('meanInternalData.csv', index=False)
std_df = pd.DataFrame(np.nanstd(states,axis=0)[-5:].reshape(1,-1), columns = cols)
std_df.to_csv('stdInternalData.csv', index=False)

In [None]:
mean_df

In [None]:
std_df

In [None]:
buy