In [200]:
import pandas as pd
import numpy as np

def bresenhamsAlgorithm(x0, y0, x1, y1):
    # Initialize the direction of the line
    dx = abs(x1 - x0)
    sx = 1 if x0 < x1 else -1
    dy = -1 * abs(y1 - y0)
    sy = 1 if y0 < y1 else -1
    error = dx + dy
    
    returnList = []

    while True:
        returnList.append([x0, y0])
        if x0 == x1 and y0 == y1:
            break
        e2 = 2 * error
        if e2 >= dy:
            if x0 == x1:
                break
            error = error + dy
            x0 = x0 + sx
        if e2 <= dx:
            if y0 == y1:
                break
            error = error + dx
            y0 = y0 + sy
            
    return returnList
    

def readTrackFile(trackType):

    fileName = trackType + "-track.txt"

    storedTrack = []

    with open(fileName, 'r') as trackFile:
        # Read and discard the first line
        trackFile.readline()

        for currentLine in trackFile:
            # Strip the newline character at the end of each line
            strippedLine = currentLine.strip()
            # Split the line into individual characters and append to the 2D array
            storedTrack.append(list(strippedLine))
            
        xValues = []
        yValues = []
        currentValue = []

        for rowIndex in range(len(storedTrack)):
            for colIndex in range(len(storedTrack[0])):
                xValues.append(colIndex)
                yValues.append(rowIndex)
                currentValue.append(storedTrack[rowIndex][colIndex])

        coordinateOptions = pd.DataFrame({'xLoc': xValues,
                                         'yLoc': yValues,
                                         'locType': currentValue})

    return coordinateOptions

def nextTrackLoc(track, nextLocations):
    
    returnX = 0
    returnY = 0
    movementType = '.'
    
    for currentLocIndex in range(len(nextLocations)):
        returnX = nextLocations[currentLocIndex][0]
        returnY = nextLocations[currentLocIndex][1]
        nextTrackLocation = track[(track['xLoc'] == returnX) & (track['yLoc'] == returnY)]
        movementType = nextTrackLocation['locType'].values[0]
        
        if movementType == 'F':
            return returnX, returnY, movementType
        elif movementType == '#':
            returnX = nextLocations[currentLocIndex - 1][0]
            returnY = nextLocations[currentLocIndex - 1][1]
            return returnX, returnY, movementType
    
    return [returnX, returnY, movementType]

In [201]:
trackType = 'L'  # replace with your file name
currentTrack = readTrackFile(trackType)

nonFinishCoordinates = currentTrack[currentTrack.locType.isin(['S', '.', 'F'])]

#display(nonFinishCoordinates)


In [202]:
stateDict = {'xVel': list(range(-5, 6)), 'yVel': list(range(-5, 6))}

stateOptions = pd.MultiIndex.from_product(stateDict.values(), names=stateDict.keys())
stateOptions = pd.DataFrame(index=stateOptions).reset_index()

# display(stateOptions)

In [203]:
fullStateTable = nonFinishCoordinates.merge(stateOptions, how='cross')

fullStateTable.columns = ['xLocState', 'yLocState', 'locTypeState', 'xVelState', 'yVelState']

fullStateTable.loc[:, 'indexMap'] = fullStateTable.index
fullStateTable.loc[:,'currentValue'] = fullStateTable.index * -1
fullStateTable.loc[:,'nextValue'] = fullStateTable.index * -2



display(fullStateTable)

Unnamed: 0,xLocState,yLocState,locTypeState,xVelState,yVelState,indexMap,currentValue,nextValue
0,32,1,F,-5,-5,0,0,0
1,32,1,F,-5,-4,1,-1,-2
2,32,1,F,-5,-3,2,-2,-4
3,32,1,F,-5,-2,3,-3,-6
4,32,1,F,-5,-1,4,-4,-8
...,...,...,...,...,...,...,...,...
19355,35,9,.,5,1,19355,-19355,-38710
19356,35,9,.,5,2,19356,-19356,-38712
19357,35,9,.,5,3,19357,-19357,-38714
19358,35,9,.,5,4,19358,-19358,-38716


In [204]:
stateActionDict = {'xVel': list(range(-5, 6)), 'yVel': list(range(-5, 6)), 'xAccel': [-1,0,1], 'yAccel': [-1,0,1]}

stateActionOptions = pd.MultiIndex.from_product(stateActionDict.values(), names=stateActionDict.keys())
stateActionOptions = pd.DataFrame(index=stateActionOptions).reset_index()

stateActionOptions.loc[:,'xVelAdj'] = stateActionOptions['xVel'] + stateActionOptions['xAccel']
stateActionOptions.loc[:,'xVelAdj'] = np.where(stateActionOptions['xVelAdj'] < -5, -5, stateActionOptions['xVelAdj'])
stateActionOptions.loc[:,'xVelAdj'] = np.where(stateActionOptions['xVelAdj'] > 5, 5, stateActionOptions['xVelAdj'])

stateActionOptions.loc[:,'yVelAdj'] = stateActionOptions['yVel'] + stateActionOptions['yAccel']
stateActionOptions.loc[:,'yVelAdj'] = np.where(stateActionOptions['yVelAdj'] < -5, -5, stateActionOptions['yVelAdj'])
stateActionOptions.loc[:,'yVelAdj'] = np.where(stateActionOptions['yVelAdj'] > 5, 5, stateActionOptions['yVelAdj'])


display(stateActionOptions)



Unnamed: 0,xVel,yVel,xAccel,yAccel,xVelAdj,yVelAdj
0,-5,-5,-1,-1,-5,-5
1,-5,-5,-1,0,-5,-5
2,-5,-5,-1,1,-5,-4
3,-5,-5,0,-1,-5,-5
4,-5,-5,0,0,-5,-5
...,...,...,...,...,...,...
1084,5,5,0,0,5,5
1085,5,5,0,1,5,5
1086,5,5,1,-1,5,4
1087,5,5,1,0,5,5


In [205]:
fullStateActionTable = nonFinishCoordinates.merge(stateActionOptions, how='cross')

testTable = fullStateActionTable[(fullStateActionTable.locType == 'S') & (fullStateActionTable.locType == 'S') & (fullStateActionTable.xVel == 5) & (fullStateActionTable.yVel == 5)]

testTable = testTable[['xLoc', 'yLoc', 'xVel', 'yVel', 'xVelAdj', 'yVelAdj']].drop_duplicates()

testTable.loc[:,'xLocNextSuccess'] = testTable['xLoc'] + testTable['xVelAdj']
testTable.loc[:,'yLocNextSuccess'] = testTable['yLoc'] + testTable['yVelAdj']
testTable.loc[:,'xLocNextFail'] = testTable['xLoc'] + testTable['xVel']
testTable.loc[:,'yLocNextFail'] = testTable['yLoc'] + testTable['yVel']

testTable.loc[:,'nextLocationsSuccess'] = testTable.apply(lambda row: bresenhamsAlgorithm(row['xLoc'], row['yLoc'], row['xLocNextSuccess'], row['yLocNextSuccess']), axis=1)
testTable.loc[:,'nextLocationsFail'] = testTable.apply(lambda row: bresenhamsAlgorithm(row['xLoc'], row['yLoc'], row['xLocNextFail'], row['yLocNextFail']), axis=1)

testTable.loc[:,'landingSpotSuccess'] = testTable.apply(lambda row: nextTrackLoc(currentTrack, row['nextLocationsSuccess']), axis = 1)
testTable.loc[:,'landingSpotFail'] = testTable.apply(lambda row: nextTrackLoc(currentTrack, row['nextLocationsFail']), axis = 1)

testTable.loc[:,'nextXSuccess'] = testTable.apply(lambda row: row['landingSpotSuccess'][0], axis = 1)
testTable.loc[:,'nextYSuccess'] = testTable.apply(lambda row: row['landingSpotSuccess'][1], axis = 1)
testTable.loc[:,'landingTypeSuccess'] = testTable.apply(lambda row: row['landingSpotSuccess'][2], axis = 1)
testTable.loc[:,'nextXFail'] = testTable.apply(lambda row: row['landingSpotFail'][0], axis = 1)
testTable.loc[:,'nextYFail'] = testTable.apply(lambda row: row['landingSpotFail'][1], axis = 1)
testTable.loc[:,'landingTypeFail'] = testTable.apply(lambda row: row['landingSpotFail'][2], axis = 1)

testTable.loc[:,'nextXVelSuccess'] = np.where(testTable['landingTypeSuccess'] == '#', 0, testTable['xVelAdj'])
testTable.loc[:,'nextYVelSuccess'] = np.where(testTable['landingTypeSuccess'] == '#', 0, testTable['yVelAdj'])
testTable.loc[:,'nextXVelFail'] = np.where(testTable['landingTypeFail'] == '#', 0, testTable['xVel'])
testTable.loc[:,'nextYVelFail'] = np.where(testTable['landingTypeFail'] == '#', 0, testTable['yVel'])

# testTable['nextStateSuccess'] = testTable.apply(lambda row: [row['landingXSuccess'], row['landingYSuccess'], row['nextXVelSuccess'], row['nextYVelSuccess']])
#testTable['nextStateFail']

# testTable['landingX'] = testTable['landingSpot'][0]
# testTable['landingY'] = testTable['landingSpot'][1]
# testTable['landingType'] = testTable['landingSpot'][2]

testTable = testTable[['xLoc', 'yLoc', 'xVel', 'yVel', 'xVelAdj', 'yVelAdj',
                       'nextXSuccess', 'nextYSuccess', 'landingTypeSuccess',
                       'nextXFail', 'nextYFail', 'landingTypeFail',
                       'nextXVelSuccess', 'nextYVelSuccess', 'nextXVelFail', 'nextYVelFail']]
# display(testTable[testTable['landingType'] == '.'])

In [206]:
testTableMapped = testTable.merge(fullStateTable[['xLocState', 'yLocState', 'xVelState', 'yVelState', 'indexMap']], 
                            left_on=['xLoc', 'yLoc', 'xVel', 'yVel'],
                            right_on=['xLocState', 'yLocState', 'xVelState', 'yVelState'],
                            how='left')
testTableMapped = testTableMapped.drop(columns=['xLocState', 'yLocState', 'xVelState', 'yVelState'])
testTableMapped = testTableMapped.rename(columns = {'indexMap':'currentStateValueMap'})

testTableMapped = testTableMapped.merge(fullStateTable[['xLocState', 'yLocState', 'xVelState', 'yVelState', 'indexMap']], 
                            left_on=['nextXSuccess', 'nextYSuccess', 'nextXVelSuccess', 'nextYVelSuccess'],
                            right_on=['xLocState', 'yLocState', 'xVelState', 'yVelState'],
                            how='left')
testTableMapped = testTableMapped.drop(columns=['xLocState', 'yLocState', 'xVelState', 'yVelState'])
testTableMapped = testTableMapped.rename(columns = {'indexMap':'successValueMap'})

testTableMapped = testTableMapped.merge(fullStateTable[['xLocState', 'yLocState', 'xVelState', 'yVelState', 'indexMap']], 
                            left_on=['nextXFail', 'nextYFail', 'nextXVelFail', 'nextYVelFail'],
                            right_on=['xLocState', 'yLocState', 'xVelState', 'yVelState'],
                            how='left')
testTableMapped = testTableMapped.drop(columns=['xLocState', 'yLocState', 'xVelState', 'yVelState'])
testTableMapped = testTableMapped.rename(columns = {'indexMap':'failValueMap'})

testTableMapped = testTableMapped.merge(fullStateTable[['indexMap', 'currentValue']], left_on=['successValueMap'], right_on=['indexMap'])
testTableMapped = testTableMapped.drop(columns=['indexMap'])
testTableMapped = testTableMapped.rename(columns = {'currentValue':'successValue'})

testTableMapped = testTableMapped.merge(fullStateTable[['indexMap', 'currentValue']], left_on=['failValueMap'], right_on=['indexMap'])
testTableMapped = testTableMapped.drop(columns=['indexMap'])
testTableMapped = testTableMapped.rename(columns = {'currentValue':'failValue'})

testTableMapped.loc[:,'QValue'] = -1 + .9*(.8*testTableMapped['successValue'] + .2*testTableMapped['failValue'])
                            

display(testTableMapped)



Unnamed: 0,xLoc,yLoc,xVel,yVel,xVelAdj,yVelAdj,nextXSuccess,nextYSuccess,landingTypeSuccess,nextXFail,...,nextXVelSuccess,nextYVelSuccess,nextXVelFail,nextYVelFail,currentStateValueMap,successValueMap,failValueMap,successValue,failValue,QValue
0,1,6,5,5,4,4,4,9,#,4,...,0,0,0,0,2540,15548,15548,-15548,-15548,-13994.2
1,1,6,5,5,5,5,4,9,#,4,...,0,0,0,0,2540,15548,15548,-15548,-15548,-13994.2
2,1,6,5,5,4,5,3,9,#,4,...,0,0,0,0,2540,15427,15548,-15427,-15548,-13907.08
3,1,6,5,5,5,4,5,9,#,4,...,0,0,0,0,2540,15669,15548,-15669,-15548,-14081.32
4,1,7,5,5,5,4,4,9,#,3,...,0,0,0,0,6775,15548,15427,-15548,-15427,-13972.42
5,1,7,5,5,4,4,3,9,#,3,...,0,0,0,0,6775,15427,15427,-15427,-15427,-13885.3
6,1,7,5,5,4,5,3,9,#,3,...,0,0,0,0,6775,15427,15427,-15427,-15427,-13885.3
7,1,7,5,5,5,5,3,9,#,3,...,0,0,0,0,6775,15427,15427,-15427,-15427,-13885.3
8,1,8,5,5,4,4,2,9,#,2,...,0,0,0,0,11010,15306,15306,-15306,-15306,-13776.4
9,1,8,5,5,4,5,2,9,#,2,...,0,0,0,0,11010,15306,15306,-15306,-15306,-13776.4


In [207]:
testTableMax = testTableMapped.groupby(['currentStateValueMap'], as_index=False).max()

display(testTableMax)

Unnamed: 0,currentStateValueMap,xLoc,yLoc,xVel,yVel,xVelAdj,yVelAdj,nextXSuccess,nextYSuccess,landingTypeSuccess,...,landingTypeFail,nextXVelSuccess,nextYVelSuccess,nextXVelFail,nextYVelFail,successValueMap,failValueMap,successValue,failValue,QValue
0,2540,1,6,5,5,5,5,5,9,#,...,#,0,0,0,0,15669,15548,-15427,-15548,-13907.08
1,6775,1,7,5,5,5,5,4,9,#,...,#,0,0,0,0,15548,15427,-15427,-15427,-13885.3
2,11010,1,8,5,5,5,5,2,9,#,...,#,0,0,0,0,15306,15306,-15306,-15306,-13776.4
3,15245,1,9,5,5,5,5,1,9,#,...,#,0,0,0,0,15185,15185,-15185,-15185,-13667.5


In [230]:
trackType = 'L'  # replace with your file name
currentTrack = readTrackFile(trackType)

# find the coordinates that are valid resting points and the ones that are action spaces (not F)
validStateCoordinates = currentTrack[currentTrack.locType.isin(['S', '.', 'F'])]
nonFinishCoordinates = currentTrack[currentTrack.locType.isin(['S', '.'])]

# all possible states to pair with coordinate options
stateDict = {'xVel': list(range(-5, 6)), 'yVel': list(range(-5, 6))}

# create all possible state options
stateOptions = pd.MultiIndex.from_product(stateDict.values(), names=stateDict.keys())
stateOptions = pd.DataFrame(index=stateOptions).reset_index()
fullStateTable = validStateCoordinates.merge(stateOptions, how='cross')

# rename columns as needed
fullStateTable.columns = ['xLocState', 'yLocState', 'locTypeState', 'xVelState', 'yVelState']

# create new columns for reference later
fullStateTable.loc[:, 'indexMap'] = fullStateTable.index
fullStateTable.loc[:, 'currentValue'] = fullStateTable.index * -1
fullStateTable.loc[:, 'nextValue'] = fullStateTable.index * -2

# create dictionary of all possible states and actions
stateActionDict = {'xVel': list(range(-5, 6)), 'yVel': list(range(-5, 6)), 'xAccel': [-1, 0, 1], 'yAccel': [-1, 0, 1]}

# create table of all possible state/action pairs
stateActionOptions = pd.MultiIndex.from_product(stateActionDict.values(), names=stateActionDict.keys())
stateActionOptions = pd.DataFrame(index=stateActionOptions).reset_index()

# find the adjusted velocity to account for the proposed acceleration action in both the X and Y direction
stateActionOptions.loc[:, 'xVelAdj'] = stateActionOptions['xVel'] + stateActionOptions['xAccel']
stateActionOptions.loc[:, 'xVelAdj'] = np.where(stateActionOptions['xVelAdj'] < -5, -5, stateActionOptions['xVelAdj'])
stateActionOptions.loc[:, 'xVelAdj'] = np.where(stateActionOptions['xVelAdj'] > 5, 5, stateActionOptions['xVelAdj'])
stateActionOptions.loc[:, 'yVelAdj'] = stateActionOptions['yVel'] + stateActionOptions['yAccel']
stateActionOptions.loc[:, 'yVelAdj'] = np.where(stateActionOptions['yVelAdj'] < -5, -5, stateActionOptions['yVelAdj'])
stateActionOptions.loc[:, 'yVelAdj'] = np.where(stateActionOptions['yVelAdj'] > 5, 5, stateActionOptions['yVelAdj'])

# merge in coordinates to give full table of actions/coordinates
fullStateActionTable = nonFinishCoordinates.merge(stateActionOptions, how='cross')

# TODO: not needed in real tests
# subset the table for the sake of practice
fullStateActionTable = fullStateActionTable[(fullStateActionTable.locType == 'S') & (fullStateActionTable.locType == 'S') & (fullStateActionTable.xVel == 5) & (fullStateActionTable.yVel == 5)]

# subset table as there are overlaps in state/action/result
condensedStateActionTable = fullStateActionTable[['xLoc', 'yLoc', 'xVel', 'yVel', 'xVelAdj', 'yVelAdj']].drop_duplicates()

# find the next possible location based on both a successful acceleration and not successful acceleration
condensedStateActionTable.loc[:, 'xLocNextSuccess'] = condensedStateActionTable['xLoc'] + condensedStateActionTable['xVelAdj']
condensedStateActionTable.loc[:, 'yLocNextSuccess'] = condensedStateActionTable['yLoc'] + condensedStateActionTable['yVelAdj']
condensedStateActionTable.loc[:, 'xLocNextFail'] = condensedStateActionTable['xLoc'] + condensedStateActionTable['xVel']
condensedStateActionTable.loc[:, 'yLocNextFail'] = condensedStateActionTable['yLoc'] + condensedStateActionTable['yVel']

# find all the intermediate locations between proposed start and end locations on map
condensedStateActionTable.loc[:, 'nextLocationsSuccess'] = condensedStateActionTable.apply(lambda row: bresenhamsAlgorithm(row['xLoc'],
                                                                                                                               row['yLoc'],
                                                                                                                               row['xLocNextSuccess'],
                                                                                                                               row['yLocNextSuccess']), axis=1)
condensedStateActionTable.loc[:, 'nextLocationsFail'] = condensedStateActionTable.apply(lambda row: bresenhamsAlgorithm(row['xLoc'],
                                                                                                                            row['yLoc'],
                                                                                                                            row['xLocNextFail'],
                                                                                                                            row['yLocNextFail']), axis=1)

# given the possible locations, provide the actual landing spot accounting for the shape of the track
condensedStateActionTable.loc[:, 'landingSpotSuccess'] = condensedStateActionTable.apply(lambda row: nextTrackLoc(currentTrack, row['nextLocationsSuccess']), axis=1)
condensedStateActionTable.loc[:, 'landingSpotFail'] = condensedStateActionTable.apply(lambda row: nextTrackLoc(currentTrack, row['nextLocationsFail']), axis=1)

# condense the state action table further given overlaps to speed up calculations
furtherCondensedStateActionTable = condensedStateActionTable[['landingSpotSuccess', 'landingSpotFail']].drop_duplicates()

# pull out the relevant X/Y/type of the actual landing spot calculated above
furtherCondensedStateActionTable.loc[:, 'nextXSuccess'] = furtherCondensedStateActionTable.apply(lambda row: row['landingSpotSuccess'][0], axis=1)
furtherCondensedStateActionTable.loc[:, 'nextYSuccess'] = furtherCondensedStateActionTable.apply(lambda row: row['landingSpotSuccess'][1], axis=1)
furtherCondensedStateActionTable.loc[:, 'landingTypeSuccess'] = furtherCondensedStateActionTable.apply(lambda row: row['landingSpotSuccess'][2], axis=1)
furtherCondensedStateActionTable.loc[:, 'nextXFail'] = furtherCondensedStateActionTable.apply(lambda row: row['landingSpotFail'][0], axis=1)
furtherCondensedStateActionTable.loc[:, 'nextYFail'] = furtherCondensedStateActionTable.apply(lambda row: row['landingSpotFail'][1], axis=1)
furtherCondensedStateActionTable.loc[:, 'landingTypeFail'] = furtherCondensedStateActionTable.apply(lambda row: row['landingSpotFail'][2], axis=1)

# merge back into condensed state once states pulled out
condensedStateActionTable = condensedStateActionTable.merge(furtherCondensedStateActionTable, on=['landingSpotSuccess', 'landingSpotFail'])

# adjust the resulting velocity measure to reset to 0, 0 when it hits a wall
condensedStateActionTable.loc[:, 'nextXVelSuccess'] = np.where(condensedStateActionTable['landingTypeSuccess'] == '#', 0, condensedStateActionTable['xVelAdj'])
condensedStateActionTable.loc[:, 'nextYVelSuccess'] = np.where(condensedStateActionTable['landingTypeSuccess'] == '#', 0, condensedStateActionTable['yVelAdj'])
condensedStateActionTable.loc[:, 'nextXVelFail'] = np.where(condensedStateActionTable['landingTypeFail'] == '#', 0, condensedStateActionTable['xVel'])
condensedStateActionTable.loc[:, 'nextYVelFail'] = np.where(condensedStateActionTable['landingTypeFail'] == '#', 0, condensedStateActionTable['yVel'])

# merge back into the full state action table to calculate our outcomes
fullStateActionTable = fullStateActionTable.merge(condensedStateActionTable, on=['xLoc', 'yLoc', 'xVel', 'yVel', 'xVelAdj', 'yVelAdj'])

# subset by only the columns needed
fullStateActionTable = fullStateActionTable[['xLoc', 'yLoc', 'xVel', 'yVel', 'xAccel', 'yAccel', 'xVelAdj', 'yVelAdj',
                       'nextXSuccess', 'nextYSuccess', 'landingTypeSuccess',
                       'nextXFail', 'nextYFail', 'landingTypeFail',
                       'nextXVelSuccess', 'nextYVelSuccess', 'nextXVelFail', 'nextYVelFail']]

# init our Q value
fullStateActionTable['QValue'] = fullStateActionTable.index

# merge in our mapped values to the state in question for the current state
fullStateActionTable = fullStateActionTable.merge(fullStateTable[['xLocState', 'yLocState', 'xVelState', 'yVelState', 'indexMap']],
                                                  left_on=['xLoc', 'yLoc', 'xVel', 'yVel'],
                                                  right_on=['xLocState', 'yLocState', 'xVelState', 'yVelState'],
                                                  how='left')
fullStateActionTable = fullStateActionTable.drop(columns=['xLocState', 'yLocState', 'xVelState', 'yVelState'])
fullStateActionTable = fullStateActionTable.rename(columns={'indexMap': 'currentStateValueMap'})

# merge in our mapped values to the state in question for the next state if success
fullStateActionTable = fullStateActionTable.merge(fullStateTable[['xLocState', 'yLocState', 'xVelState', 'yVelState', 'indexMap']],
                                                  left_on=['nextXSuccess', 'nextYSuccess', 'nextXVelSuccess', 'nextYVelSuccess'],
                                                  right_on=['xLocState', 'yLocState', 'xVelState', 'yVelState'],
                                                  how='left')
fullStateActionTable = fullStateActionTable.drop(columns=['xLocState', 'yLocState', 'xVelState', 'yVelState'])
fullStateActionTable = fullStateActionTable.rename(columns={'indexMap': 'successValueMap'})

# merge in our mapped values to the state in question for the next state if fail
fullStateActionTable = fullStateActionTable.merge(fullStateTable[['xLocState', 'yLocState', 'xVelState', 'yVelState', 'indexMap']],
                                                  left_on=['nextXFail', 'nextYFail', 'nextXVelFail', 'nextYVelFail'],
                                                  right_on=['xLocState', 'yLocState', 'xVelState', 'yVelState'],
                                                  how='left')
fullStateActionTable = fullStateActionTable.drop(columns=['xLocState', 'yLocState', 'xVelState', 'yVelState'])
fullStateActionTable = fullStateActionTable.rename(columns={'indexMap': 'failValueMap'})

fullStateActionTable = fullStateActionTable.merge(fullStateTable[['indexMap', 'currentValue']], left_on=['successValueMap'], right_on=['indexMap'])
fullStateActionTable = fullStateActionTable.drop(columns=['indexMap'])
fullStateActionTable = fullStateActionTable.rename(columns={'currentValue': 'successValue'})

fullStateActionTable = fullStateActionTable.merge(fullStateTable[['indexMap', 'currentValue']], left_on=['failValueMap'], right_on=['indexMap'])
fullStateActionTable = fullStateActionTable.drop(columns=['indexMap'])
fullStateActionTable = fullStateActionTable.rename(columns={'currentValue': 'failValue'})



In [231]:
# display(fullStateActionTable)

updatedFullStateActionTable = fullStateActionTable[['currentStateValueMap', 'QValue']].groupby(['currentStateValueMap'], as_index=False).max('QValue')

fullStateTable = fullStateTable.merge(updatedFullStateActionTable, 
                                      left_on=['indexMap'],
                                      right_on=['currentStateValueMap'],
                                      how='left')

fullStateTable['QValue'].fillna(fullStateTable['nextValue'], inplace=True)
fullStateTable.loc[:,'nextValue'] = fullStateTable['QValue']
fullStateTable = fullStateTable.drop(columns=['currentStateValueMap', 'QValue'])

display(fullStateTable)



Unnamed: 0,xLocState,yLocState,locTypeState,xVelState,yVelState,indexMap,currentValue,nextValue
0,32,1,F,-5,-5,0,0,0.0
1,32,1,F,-5,-4,1,-1,-2.0
2,32,1,F,-5,-3,2,-2,-4.0
3,32,1,F,-5,-2,3,-3,-6.0
4,32,1,F,-5,-1,4,-4,-8.0
...,...,...,...,...,...,...,...,...
19355,35,9,.,5,1,19355,-19355,-38710.0
19356,35,9,.,5,2,19356,-19356,-38712.0
19357,35,9,.,5,3,19357,-19357,-38714.0
19358,35,9,.,5,4,19358,-19358,-38716.0


In [232]:

display(fullStateTable[fullStateTable['indexMap'].isin([2540,2541])])

Unnamed: 0,xLocState,yLocState,locTypeState,xVelState,yVelState,indexMap,currentValue,nextValue
2540,1,6,S,5,5,2540,-2540,8.0
2541,2,6,.,-5,-5,2541,-2541,-5082.0
