# Panopticon Simulation  
Version 0.22  
This is written in Julia, see https://docs.julialang.org/en/v1/ and has run in version 1.5.2.  

This simulation code is a simplified representation of a single bank of DDR5 for the purposes of RowHammer investigation.  The simulation runs in time steps of 1 activation, for the fastest possible activation rate, and does not simulate reads, writes, open rows, or other real world events.

In [1]:
# import Pkg; Pkg.add("DataStructures") # do this once to update your environment

using DataStructures


## Set overall constants for the simulation

In [2]:
const rowsPerBank = 65536
const maxAlarmQueueEntries = 8

# DDR5 model, FGR mode refresh

const activationsPerUSec = 40   # 25nsec per activation
const activationsPerFineRefresh = 3.9 * activationsPerUSec # fine mode
const maxRefreshPostponement = 9

# the following values estimate a future level of RowHammer.  These are guesses.  
# A DRAM vendor is encouraged to set these to reflect their true technology

const MAC = 2000
const adjacentDisturbMultiplier = UInt16(8) # neighbor will be disturbed this much stronger than 2-distant
const adjacentAlarmThreshold = UInt16(512)  # must be a single bit (power of 2)
const distantAlarmThreshold = UInt16(4096)  # must be a single bit (power of 2)
const doZeroBit = true                      # set to false if you do not want this feature
const partialZeroBit = UInt16(64)           # the counter bit (power of 2) which is cleared at normal refresh

0x0040

## Initialize the variables tracking the simulation.  


In [3]:
# The onChipCounters will be in the DRAM and count activations to trigger alarms.
# Pattern generating code must not "cheat" by reading these registers.  Their values are never exported.
# Indeed, even within the chip their values cannot be read except by the incrementors, and the only
# output from incrementors is alarms at specified thresholds, causeing the row number to be placed
# into a per-Bank queue for mitigation during refresh intervals.

onChipCounters = rand(UInt16,rowsPerBank)

# Keep a count of disturbance since last refresh.  
# These are just in the simulation to allow us to monitor the accumulation of disturbance.
# They are not part of the chip.

cumulativeHammer = zeros(UInt32, rowsPerBank)

# Track the observed queue lengths at alarm times.  
# Not part of the chip, just for simulation telemetry.

histogram = zeros(UInt32, maxAlarmQueueEntries + 1)

# There is a queue of alarms maintained per bank, on the chip, which tracks recent aggressors

struct AlarmEntry
    activationCount::UInt64  # this field is only for simulation, not in the real chip
    aggressorRow::UInt16
    secondLevel::Bool
end

# There is an alarm queue on each bank, on the chip

alarmQueue = Queue{AlarmEntry}()

# The bank keeps track of the current row.  
# This is used to construct an Alarm, as well as to drive activations and refresh.
# It is done as a global, not passed as a parameter, to mimic how it will be on the chip.

currentRow = UInt16(0)

# The simulation keeps track of when we succeeded in beating Panopticon by overflowing the alarm queue
# Simulation only, not on the chip.  The chip might have telemetry such as a counter for this.

overflowEventQueue = Queue{AlarmEntry}()

# the simulation checks rules, and will report if you try a pattern which is not a valid DDR5 pattern
rulesBrokenQueue = Queue{String}()

# The simulation will also keep a queue of rows which exceed MAC.  
# This is much harder to achieve than simply overflowing the alarm queue
hammeredQueue = Queue{String}()

# The activation count forms the time-step of the simulation
activationCount = UInt64(0)

# The refresh backlog cannot exceed maxRefreshPostponement * activationsPerFineRefresh
refreshPostponement = UInt64(0)

# refreshCycle counts normal refreshes and will be used Modulo to cycle through the bank.
# Something like this would exist per bank on the real chip.
# This way we can also see how many refreshes occured overall

refreshCycle = UInt64(0)

# Victim refreshes are also counted.  This is for simulation only.
# A real chip might count this for telemetry, or not at all.  Telemetry might reveal information to an aggressor.

remedialRefreshCount = UInt64(0)

0x0000000000000000

## These functions implement the functionality associated with activation and refresh

In [4]:
# The alarm functionality is per-bank.
# When an incrementor signals a count has rolled over it raises a signal of the size of roll.
# This is then combined with the currentRow and queued for mitigations.

function RaiseAlarm(;distant::Bool)
    e = AlarmEntry(activationCount,currentRow,distant)
    if (length(alarmQueue) < maxAlarmQueueEntries)
        global histogram[1+length(alarmQueue)] += UInt32(1)
        enqueue!(alarmQueue, e)
    else
        global histogram[1+maxAlarmQueueEntries] += UInt32(1)
        enqueue!(overflowEventQueue, e)
    end
end

# If a rule is broken during simulation make a note of it.
# We tolerate a small count of broken rules to help investigators try new things.

function BreakARule(descripton::String)
    if (20 <= length(rulesBrokenQueue))
        throw("more than 20 rules broken")
    end
    enqueue!(rulesBrokenQueue, description)
end

# An activation must be counted, and causes hammer accumulation on neighbors at different distances

function ActivateRow()
    r = UInt32(1) + currentRow;     # Julia arrays are 1-based
    
    # we use a mask to capture the alarm threshold bits in the counter
    thresholdBitTransitions = onChipCounters[r] & (distantAlarmThreshold + adjacentAlarmThreshold)
    
    # Panopticon keeps a count of all row activations
    global onChipCounters[r] += UInt16(1)
    
    # Now capture the threshold bits again and use XOR to set them to 1 if they have toggled
    thresholdBitTransitions ⊻= onChipCounters[r] & (distantAlarmThreshold + adjacentAlarmThreshold)
    
    # An activation has an effect on adjacent rows.  The unit is 1 for distance-2.
    # The effect is multiplied by adjacentDisturbMultiplier for distance-1.
    
    if(3 <= r)
        global cumulativeHammer[r-2] += UInt16(1)
    end
    if(2 <= r)
        global cumulativeHammer[r-1] += adjacentDisturbMultiplier
    end
    if(r < rowsPerBank)
        global cumulativeHammer[r+1] += adjacentDisturbMultiplier
    end
    if(r < (rowsPerBank - 1))
        global cumulativeHammer[r+2] += UInt16(1)
    end
    
    # Now, check if we caused either of the alarm bits.  The distantAlarm has priority.
    
    if (0 != (thresholdBitTransitions & distantAlarmThreshold))
        RaiseAlarm(distant=true)
    elseif (0 != (thresholdBitTransitions & adjacentAlarmThreshold))
        RaiseAlarm(distant=false)
    end
end

# Before refresh, check for hammer, and record any successes
# The refresh is an activation which hammers neighbors
# Optionally, partially zero the row's counter
# Finally, reset the hammer accumulation on the current row.

function RefreshRow(doTheZero::Bool)
    r = UInt32(1) + currentRow;     # Julia arrays are 1-based
    if ((MAC * adjacentDisturbMultiplier) <= cumulativeHammer[r])
        enqueue!(hammeredQueue, string("hammered [", currentRow, "] with ", cumulativeHammer[r]/adjacentDisturbMultiplier, " at activation ", activationCount))
        if (20 <= length(hammeredQueue))
            throw("more than 20 hammers achieved")
        end
    end
    ActivateRow()
    if (doTheZero)
        global onChipCounters[r] &= ~partialZeroBit   # zero just one bit, to filter out slow counters
    end
    global cumulativeHammer[r] = 0
end

# This simulation assumes up to 4 rows refreshed per Fine Refresh in FRM

function REFabFineRegular(n::UInt16)
    while (0 < n)
        global currentRow = UInt16(refreshCycle % rowsPerBank)
        RefreshRow(doZeroBit)
        global refreshCycle += 1
        n -= 1
    end
end

# The REFab command gives priority to the alarmQueue.
# If not empty, then one entry is dequeued and the neighbors are refreshed.
# If any slots are left (out of a total of 4 slots) they are used for regular refresh.

function REFabFine()
    
    # reduce any outstanding postponement by one interval-worth
    
    if(refreshPostponement < activationsPerFineRefresh)
        global refreshPostponement = 0
    else
        global refreshPostponement -= activationsPerFineRefresh
    end
    
    # mitigations in the alarmQueue have priority over regular refresh

    if(0 < length(alarmQueue))
        entry = dequeue!(alarmQueue)
        row = entry.aggressorRow
        if(entry.secondLevel && 1 < row)
            global currentRow = UInt16(row-2)
            RefreshRow(false)
            global remedialRefreshCount += 1
        end
        if(0 < row)
            global currentRow = UInt16(row-1)
            RefreshRow(false)
            global remedialRefreshCount += 1
        end
        if(row < (rowsPerBank-1))
            global currentRow = UInt16(row+1)
            RefreshRow(false)
            global remedialRefreshCount += 1
        end
        if(entry.secondLevel && row < (rowsPerBank - 2))
            global currentRow = UInt16(row+2)
            RefreshRow(false)
            global remedialRefreshCount += 1
        end
        
        if(! entry.secondLevel)         # did we only use 2 of the 4 available refresh actions?
            REFabFineRegular(UInt16(2))
        end
    else
        REFabFineRegular(UInt16(4))
    end
    
    # a fine-mode refresh consumes 4 activation cycles
    global activationCount += 4
end

# The clock in this simulation is a single activation step.
# This simulation is just stripped down to the essential worst-case for RowHammer investigations.
# Return the refresh postponement ratio

function AdvanceClock(activations::UInt64; postpone::Bool=false)::Float64
    global activationCount += activations
    global refreshPostponement += activations
    if (postpone)
        if ((maxRefreshPostponement * activationsPerFineRefresh) < refreshPostponement)
            BreakARule(string("postponement error at ", activationCount, " total postponed ", refreshPostponement))
        end
    elseif (activationsPerFineRefresh <= refreshPostponement)
        REFabFine()
    end
    return Float64(refreshPostponement) / activationsPerFineRefresh;
end


AdvanceClock (generic function with 1 method)

## functions for summarizing and visualizing the results

In [5]:
function ShowResults()
    println(string("activations, ", activationCount))
    println(string("normal refreshes, ", refreshCycle))
    println(string("remedial refreshes, ", remedialRefreshCount))
    println(string("alarm queue lengths, ", histogram))
    println(string("rules broken, ", rulesBrokenQueue))
    println(string("overflows, ", overflowEventQueue))
    println(string("hammered, ", hammeredQueue))
end

ShowResults (generic function with 1 method)

## Controlling code.  This is where you can go generate command sequences to hammer rows.  

The count of commands can be limited for testing, and then you can crank it up to billions when you want the real results and have the patience.  

The loop here is relatively naive, it is just for testing.  This is where you can get creative in trying to break Panopticon.

In [6]:
# one way to change the refresh rate would be to make changes to the postponement threshold.
# For example you could raise it to maxRefreshPostponement to allow more actions to be scheduled.
# Then you might reduce it back to 1 when you want to catch up again.

postponementThreshold = 1.0

# This function is where a strategy is formed.  It is called at every activation opportunity.
# The default version here is just a mix of random activations of two intensities

function DefaultStrategy()
    row = UInt16(rand(UInt16))
    if(0 != (0x7 & activationCount))  # 7 out of 8 are aggressor events, the rest are background
        row &= 0xF
        row += 8
    end
    global currentRow = row
    ActivateRow()
    
    # the strategy could adjust the postponementThreshold.  The default is steady-state.
end

# your strategy goes here

function SomeOtherStrategy()
    # take an action
    # adjust the postponementThreshold
end

# Return true if an action was taken.  Return false when no action was taken.
# Refreshes are postponed when action is taken (when return is false)

function ConsumeOneAction(currentPostponements)::Bool
    if (currentPostponements < postponementThreshold)
        
        # comment out the default and call yours here
        DefaultStrategy()
        # SomeOtherStrategy()

        return true        # the activation opportunity has been taken
    else
        return false       # the activation cycle if available for other use
    end
end

# This is the outer loop of the simulation.

function GenerateCommands(maxCommand::UInt64)
    currentPostponements = 0.0
    while (0 < maxCommand)
        actionTaken = ConsumeOneAction(currentPostponements)
        currentPostponements = AdvanceClock(UInt64(1), postpone=actionTaken)
        maxCommand -= UInt64(1)
    end
end        

GenerateCommands (generic function with 1 method)

## End of Panopticon module

## This is the overall control which launches the test pattern, and then summarizes the results

In [7]:
GenerateCommands(UInt64(10_000_000))
ShowResults()

activations, 10256408
normal refreshes, 216816
remedial refreshes, 39592
alarm queue lengths, UInt32[0x000039c4, 0x0000098a, 0x00000134, 0x00000031, 0x00000004, 0x00000001, 0x00000000, 0x00000000, 0x00000000]
rules broken, Queue{String}(Deque [String[]])
overflows, Queue{AlarmEntry}(Deque [AlarmEntry[]])
hammered, Queue{String}(Deque [String[]])
