# Future predict

Given a replay of expert trajectory (s, a)... try to predict s' from s

May help to provide a better BC network, or roll out imagined trajectories

In [11]:
INCLUDE_ACTION_INPUT = False

NORMALIZE_STATE = True
NORMALIZE_ACTIONS = True

In [1]:
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sys

sys.path.append('D:/projects/carball')

from tqdm.notebook import tqdm

import carball
from carball.controls.controls import ControlsCreator
from carball.json_parser import Game

# NN 
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter

device = torch.device("cpu")

In [2]:
%matplotlib inline
plt.rcParams['figure.figsize'] = (16, 10)


In [3]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [4]:
def replayToGame(replayID):
    replayPath = os.path.join("replays", "%s.replay" % replayID)
    print ("Loading...\n\t%s" % replayPath)
    json = carball.decompile_replay(replayPath)
    game = Game()
    game.initialize(loaded_json=json)
    return game

GAME = replayToGame("noBoost1v1_1")
# Parses action-related properties into their actual actions
ControlsCreator().get_controls(GAME)

Loading...
	replays\noBoost1v1_1.replay


Could not find field_of_view in camera settings for Sundown
Could not find height in camera settings for Sundown
Could not find pitch in camera settings for Sundown
Could not find distance in camera settings for Sundown
Could not find stiffness in camera settings for Sundown
Could not find swivel_speed in camera settings for Sundown
Could not find transition_speed in camera settings for Sundown
  rhs[1] / (T_p + np.sign(rhs[1]) * omega[1] * D_p),
  rhs[2] / (T_y - np.sign(rhs[2]) * omega[2] * D_y)


In [5]:
nPlayers = len(GAME.players)
#assert nPlayers == 6, "Only 3v3 modes supported, this has %d players" % nPlayers
assert nPlayers == 2, "Only 1v1 modes supported, this has %d players" % nPlayers
print ("%d players loaded!" % nPlayers)

orangeIdx = [i for i, p in enumerate(GAME.players) if p.is_orange]
blueIdx = [i for i, p in enumerate(GAME.players) if not p.is_orange]

print ("\nOrange team:")
for i in orangeIdx:
    print ("\t%s" % GAME.players[i].name)
print ("\nBlue team:")
for i in blueIdx:
    print ("\t%s" % GAME.players[i].name)
        
nTimepoints = len(GAME.ball)
for p in GAME.players:
    assert len(p.data) >= nTimepoints - 20, \
        "Players (%d) need the same number of time points (%d), no leaves allowed" % (len(p.data), nTimepoints)
    nTimepoints = min(nTimepoints, len(p.data))
print ("\n%d data points acquired" % nTimepoints)

# if not all the same, trim
GAME.ball = GAME.ball.tail(nTimepoints)
for p in GAME.players:
    p.data = p.data.tail(nTimepoints)

2 players loaded!

Orange team:
	Sundown

Blue team:
	padster

12921 data points acquired


In [8]:
PLAYER_STATE_KEYS = (
    ['pos_x', 'pos_y', 'pos_z'] +              # x \
    ['vel_x', 'vel_y', 'vel_z'] +              # dx/xt \
    ['rot_x', 'rot_y', 'rot_z'] +              # q \
    ['ang_vel_x', 'ang_vel_y', 'ang_vel_z'] +  # dq/dt \
    ['boost']
)

PLAYER_ANALOG_ACTION_KEYS = ['throttle', 'steer']
PLAYER_DIGITAL_ACTION_KEYS = ['boost'] # Simplified easier case
#PLAYER_DIGITAL_ACTION_KEYS = ['boost', 'jump', 'handbreak'] # Harder case
PLAYER_ACTION_KEYS = PLAYER_ANALOG_ACTION_KEYS + PLAYER_DIGITAL_ACTION_KEYS

# Only useful state for ball is (x,y,z) position
BALL_STATE_KEYS = (
    ['pos_x', 'pos_y', 'pos_z']
)

def normMiddlePeak(v):
    return 4/(1 + np.exp(-v)) - 2
    #return np.cbrt(v)
    #return math.erf(v)
    
def ensure01(values):
    assert np.sum(values.isin([0, 1])) == len(values), "Boolean column is not just 0s and 1s"
    return values
    

def imputePlayerState(stateDF):
    # Note: Missing values come from start of game or after explosion, in which case, 
    # should be an okay approximation to impute by forward then backfilling.
    return stateDF.fillna(method='ffill').fillna(method='bfill')

def normPlayerState(stateDF):
    normedDF = pd.DataFrame()
    normedDF['pos_x'] = stateDF['pos_x'] / 4000.0
    normedDF['pos_y'] = stateDF['pos_y'] / 5500.0
    normedDF['pos_z'] = stateDF['pos_z'] / 2000.0
    normedDF['vel_x'] = normMiddlePeak(stateDF['vel_x'] / 22000.0)
    normedDF['vel_y'] = normMiddlePeak(stateDF['vel_y'] / 22000.0)
    normedDF['vel_z'] = normMiddlePeak(normMiddlePeak(stateDF['vel_z'] / 16000.0))
    normedDF['rot_x'] = normMiddlePeak(stateDF['rot_x'] / (np.pi / 2))
    normedDF['rot_y'] = stateDF['rot_y'] / np.pi
    normedDF['rot_z'] = normMiddlePeak(stateDF['rot_z'] / np.pi)
    normedDF['ang_vel_x'] = stateDF['ang_vel_x'] / 6000.0
    normedDF['ang_vel_y'] = stateDF['ang_vel_y'] / 6000.0
    normedDF['ang_vel_z'] = stateDF['ang_vel_z'] / 6000.0
    normedDF['boost'] = stateDF['boost'] / 256.0
    assert normedDF.shape[1] == stateDF.shape[1], "Columns are missing normalization"
    return normedDF

def imputePlayerActions(actionDF):
    # Missing value here are converted to inaction
    actionDF = actionDF.fillna({
        'throttle': 0,
        'steer': 0,
        'jump': False,
        'boost': False,
        'handbrake': False,
    })
    #actionDF = actionDF.astype({
    #    'jump': 'float64',               # from bool
    #    'boost': 'float64',              # from bool
    #    'handbrake': 'float64',          # from bool
    #})
    return actionDF

def normPlayerActions(actionDF):
    # Note: ControlsCreator already norms them for us
    normedDF = actionDF.copy()
    assert normedDF.shape[1] == actionDF.shape[1], "Columns are missing normalization"
    return normedDF

def normBallState(stateDF):
    normedDF = pd.DataFrame()
    normedDF['pos_x'] = stateDF['pos_x'] / 4000.0
    normedDF['pos_y'] = stateDF['pos_y'] / 5500.0
    normedDF['pos_z'] = stateDF['pos_z'] / 2000.0
    assert normedDF.shape[1] == stateDF.shape[1], "Columns are missing normalization"
    return normedDF

def cleanPlayerStates(playerDF):
    stateDF = playerDF[PLAYER_STATE_KEYS]
    stateDF = imputePlayerState(stateDF)
    if NORMALIZE_STATE:
        stateDF = normPlayerState(stateDF)
    return stateDF

def cleanPlayerActions(controlDF):
    actionDF = controlDF[PLAYER_ACTION_KEYS]
    actionDF = imputePlayerActions(actionDF)
    if NORMALIZE_ACTIONS:
        actionDF = normPlayerActions(actionDF)
    return actionDF

def cleanBallStates(ballDF):
    stateDF = ballDF[BALL_STATE_KEYS] * 1.0 # force int -> float
    if NORMALIZE_STATE:
        stateDF = normBallState(stateDF)
    return stateDF
    
playerStates, playerActions = [], []
for p in tqdm(GAME.players):
    playerStates.append( cleanPlayerStates( p.data))
    playerActions.append(cleanPlayerActions(p.controls))
ballStates = cleanBallStates(GAME.ball)

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




In [9]:
def copyIntoPrefixed(toDF, fromDF, prefix):
    for column in list(fromDF):
        toDF[prefix + column] = fromDF[column]

def teamBreakdown(playerIdx):
    isOrange = GAME.players[playerIdx].is_orange
    teamIdx = [i for i in range(nPlayers) if i != playerIdx and GAME.players[i].is_orange == isOrange]
    enemyIdx = blueIdx if isOrange else orangeIdx
    return teamIdx, enemyIdx 
    
def stateAndActionsForPlayer(playerIdx):
    stateDF = pd.DataFrame(index=ballStates.index)
    copyIntoPrefixed(stateDF, ballStates, "b_")
    copyIntoPrefixed(stateDF, playerStates[playerIdx], "me_")
    
    teamIdx, enemyIdx = teamBreakdown(playerIdx)
    for i, idx in enumerate(teamIdx):
        copyIntoPrefixed(stateDF, playerStates[idx], "t%d_" % i)
    for i, idx in enumerate(enemyIdx):
        copyIntoPrefixed(stateDF, playerStates[idx], "e%d_" % i)
    assert max(stateDF.isna().sum()) == 0, "NA state values not successfully removed?"
    
    actionDF = playerActions[playerIdx]
    assert max(actionDF.isna().sum()) == 0, "NA action values not successfully removed?"
    return stateDF, actionDF
    
P2S, P2A = stateAndActionsForPlayer(1)
print (P2S.values.shape, P2S.values.dtype)
print (P2A.values.shape, P2A.values.dtype)

(12921, 29) float64
(12921, 3) object


In [12]:
class MyModel(nn.Module):
    def __init__(self, state_size, action_size):
        super(MyModel, self).__init__()
        
        # Start with a super simple multi-layer perceptron, one hidden layer 
        self.dimH       = 32 # hidden layer has 16 dimensions
        self.dimIn      = state_size + (action_size if INCLUDE_ACTION_INPUT else 0)
        self.dimOut     = state_size

        self.model = torch.nn.Sequential(
           nn.Linear(self.dimIn, self.dimH),
           nn.ReLU(),
           nn.Linear(self.dimH, self.dimOut),
        )
        self.model.to(device)

    def forward(self, x):
        # input data type needs to be converted to float
        return self.model(x.float())
        
    def save(self, modelID):
        path = os.path.join("models", "%s.pt" % modelID)
        torch.save(self.state_dict(), path)
        print('Saved model!\n\t%s' % path)
        
    def load(self, modelID):
        path = os.path.join("models", "%s.pt" % modelID)
        self.load_state_dict(torch.load(path))
        print('Loaded model!\n\t%s' % path)

In [20]:
TOTAL_EPOCHS = 500
BATCH_SZ = 200
#PRINT_INTERVAL = 1000
LOG_INTERVAL = 1000
LEARNING_RATE = 0.0001
REGULARIZER_WEIGHT = 3e-4

# Epochs are fast, only print every 10
EPOCH_PRINTERVAL = 10

writer = SummaryWriter()

def train_future_predict(dataBatches, model):
    # Adam optimizer usually a good default.
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=REGULARIZER_WEIGHT)
    
    # MSE loss for [-1, 1] continuous variables:
    analog_loss_func = torch.nn.MSELoss().to(device)
    
    # Cross entropy loss for binary variables:
    #digital_loss_func = torch.nn.BCEWithLogitsLoss().to(device)# weight=torch.tensor(1.0), pos_weight=torch.tensor(1.0)).to(device)

    gradient_steps = 0

    for epoch in range(1, TOTAL_EPOCHS + 1):
        batchShuffled = random.sample(dataBatches, len(dataBatches))
        lastLoss = -1
        for iteration, data in enumerate(batchShuffled):
            data = {k: v.to(device) for k, v in data.items()}
            #print (data)

            s, sPrime = data['input'], data['output']
            predicted = model(data['input'])
            
            loss = analog_loss_func(sPrime, predicted)
            

            #loss = loss_function(output, data["action"])
            #aLoss = W_ALOSS * analog_loss_func(output[:aasz], analogAction)
            #dLoss = W_DLOSS * digital_loss_func(output[aasz:], digitalAction)
            #loss = aLoss + dLoss
            #print (aLoss, dLoss, loss)
            #loss = dLoss
            #print (output[aasz:], digitalAction)
            #print (loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            #if gradient_steps % PRINT_INTERVAL == 0:
            #    print('[epoch {:4d}/{}] [iter {:7d}] [loss {:.5f}]'
            #        .format(epoch, TOTAL_EPOCHS, gradient_steps, loss.item()))
            if gradient_steps % LOG_INTERVAL == 0:
                #writer.add_scalar('analogLoss', aLoss.item(), gradient_steps)
                #writer.add_scalar('digitalLoss', dLoss.item(), gradient_steps)
                writer.add_scalar('loss', loss.item(), gradient_steps)
            
            gradient_steps += 1
            
            if iteration == len(batchShuffled) - 1:
                lastLoss = loss.item()
                
        if epoch % EPOCH_PRINTERVAL == 0:
            print ('[epoch {:4d}/{}] [iter {:7d}] [loss {:.5f}]'.format(
               epoch, TOTAL_EPOCHS, gradient_steps, lastLoss)
            )

        #if epoch % TEST_INTERVAL == 0:
        #    score = eval_policy(policy=model, env=ENV_NAME)
        #    print('[Test on environment] [epoch {}/{}] [score {:.2f}]'
        #        .format(epoch, TOTAL_EPOCHS, score))


    # Force directory to be same as this file.
    #model_name = "behavioral_cloning_{}.pt".format(ENV_NAME)
    #fullPath = os.path.join(os.path.dirname(__file__), model_name)
    #torch.save(model.state_dict(), fullPath)
    #print('Saving model to {}'.format(fullPath))
    
    
random.seed(1234)
def dataToBatches(states, actions, batchSz):
    nRows = states.shape[0]
    stateSz = states.shape[1]
    actionSz = len(PLAYER_ACTION_KEYS)
    
    inputSz = stateSz + (actionSz if INCLUDE_ACTION_INPUT else 0)
    outputSz = stateSz

    sPrimeOrder = list(range(1, nRows))
    random.shuffle(sPrimeOrder)
    
    dataBatches = []
    for i in range(0, len(sPrimeOrder), batchSz):
        nInBatch = min(batchSz, len(sPrimeOrder) - i)
        batchIn = np.zeros((nInBatch, inputSz))
        batchOut = np.zeros((nInBatch, outputSz))
        
        for j in range(nInBatch):
            sPrimeAt = sPrimeOrder[i + j]
            sPrime = states.iloc[sPrimeAt, :].values
            s = states.iloc[sPrimeAt - 1, :].values
            a = actions.iloc[sPrimeAt - 1, :].values
            
            output_j = sPrime
            input_j = s
            if INCLUDE_ACTION_INPUT:
                input_j = np.hstack(inp)
            batchIn[j, :] = input_j
            batchOut[j, :] = output_j
            
            #action = actions.iloc[actionAfter-1, :].values
            #analogAction = action[:actionAnalogSz].astype(np.float32)
            #if (len(np.where(np.isnan(analogAction))[0]) > 0):
            #    print ("BAD ACTION! ", actions.iloc[actionAfter-1, :])
            #statesWithHistory = states.iloc[actionAfter - history:actionAfter, :].values.ravel()
            #dataBatches.append({
            #    'state': torch.from_numpy(statesWithHistory).float(),
            #    'analogAction': torch.from_numpy(action[:actionAnalogSz].astype(np.float32)).float(),
            #    'digitalAction': torch.from_numpy(action[actionAnalogSz:].astype(np.bool)).float()
            #})
            
        dataBatches.append({
            'input': torch.from_numpy(batchIn).float(),
            'output': torch.from_numpy(batchOut).float(),
        })
            
            
    return dataBatches, stateSz, actionSz
    

def runTraining():    
    dataBatches, stateSz, actionSz = dataToBatches(P2S, P2A, BATCH_SZ)
    model = MyModel(stateSz, actionSz)
    train_future_predict(dataBatches, model)
    model.save("fp_BoostOnly1v1")
    return model

model = runTraining()

[epoch   10/500] [iter     650] [loss 0.06214]
[epoch   20/500] [iter    1300] [loss 0.02968]
[epoch   30/500] [iter    1950] [loss 0.01596]
[epoch   40/500] [iter    2600] [loss 0.01114]
[epoch   50/500] [iter    3250] [loss 0.00865]
[epoch   60/500] [iter    3900] [loss 0.00559]
[epoch   70/500] [iter    4550] [loss 0.00562]
[epoch   80/500] [iter    5200] [loss 0.00402]
[epoch   90/500] [iter    5850] [loss 0.00583]
[epoch  100/500] [iter    6500] [loss 0.00499]
[epoch  110/500] [iter    7150] [loss 0.00417]
[epoch  120/500] [iter    7800] [loss 0.00414]
[epoch  130/500] [iter    8450] [loss 0.00281]
[epoch  140/500] [iter    9100] [loss 0.00521]
[epoch  150/500] [iter    9750] [loss 0.00353]
[epoch  160/500] [iter   10400] [loss 0.00281]
[epoch  170/500] [iter   11050] [loss 0.00284]
[epoch  180/500] [iter   11700] [loss 0.00523]
[epoch  190/500] [iter   12350] [loss 0.00376]
[epoch  200/500] [iter   13000] [loss 0.00521]
[epoch  210/500] [iter   13650] [loss 0.00279]
[epoch  220/5