In [1]:
import numpy as np
import pandas as pd
import math
from collections import deque
import time

In [2]:
sq_current_oracle = 0
po_current_oracle = 0  ###

sq_blocks = []
po_blocks = [] ###

sq_oracle_values =  deque([])
po_oracle_values = deque([])

#================ Done =============================
def create_mempool(txn_number, sq_df, post_df, basefee, t=0):
    '''
    The function of creating the mempool.
    '''
    global sq_current_oracle, po_current_oracle
    # Random generate the value and gas_limit with given txn_number
    values = np.random.gamma(20.72054, 1/17.49951, txn_number)
    gas_limit = (np.random.pareto(1.42150, txn_number)+1)*21000  #pareto distribution with alpha 1.42150, beta 21000
    
    sq_gas_price = [sq_current_oracle * v for v in values]  # sq_function of values
    po_gas_price = [min(basefee + (po_current_oracle * v), sq_current_oracle * v) for v in values]       ###post1559_function of values=======================
   
    # SQ dataframe
    sq_df = sq_df.append(pd.DataFrame({
        'gas price': sq_gas_price,
        'gas limit': gas_limit,
        'time' : t,
        'fee cap': [x * y for x,y in zip(sq_gas_price,gas_limit)]
        }), ignore_index=True)
    
    # post1559 dataframe
    post_df = post_df.append(pd.DataFrame({
        'gas price': po_gas_price,###
        'gas limit': gas_limit,
        'time': t,
        'fee cap' : [x * y for x,y in zip(po_gas_price,gas_limit)]
        }), ignore_index=True)
    
    # sorting mempool by 'gas price' in decending
    sq_df = sq_df.sort_values(by=['gas price'], ascending=False).reset_index(drop=True)
    post_df = post_df.sort_values(by=['gas price'], ascending=False).reset_index(drop=True)
    return sq_df, post_df

def get_basefee(pre_basefee,pre_blocksize): ###======================
    '''
    Calculating next block's base fee based on previos basefee and previous block size
    '''
    c_basefee = pre_basefee*(1+(1/8)*(pre_blocksize-15000000)/15000000)
    return c_basefee

#gets minimum gas price of last 100 blocks and returns 60th percentile value

def get_oracle():
    '''
    Finding the 60th percentile gas price of previous 100 blocks' minimum gas price.
    '''
    global sq_current_oracle, po_current_oracle, sq_oracle_values,po_oracle_values, sq_blocks, po_blocks
    
    #===== SQ ============= 
    recent_block = sq_blocks[-1]
    # initiate the minimum value for comparing
    min_gp = math.inf
    for txn in recent_block:
        if(txn[1] < min_gp):
            min_gp = txn[1]
    sq_oracle_values.popleft()
    sq_oracle_values.append(min_gp)

    sorted_oracle_values = sorted(sq_oracle_values)
  
    sq_current_oracle = sorted_oracle_values[59]
  #========================

  #======= Post ===========
    recent_po_block = po_blocks[-1]
    min_po_gp = math.inf
    for txn in recent_po_block:
      if(txn[1]<min_po_gp):
        min_po_gp = txn[1]
    po_oracle_values.popleft()
    po_oracle_values.append(min_po_gp)

    sorted_po_oracle_values = sorted(po_oracle_values)

    po_current_oracle = sorted_po_oracle_values[59]
  
  #========================
def fill_block(df):
    block = []
    block_size = 0
    for i in range(len(df)):
        txn = df.iloc[i, :].to_list()
        if block_size + txn[1] > 15000000:
            break
        else:
            block += [txn]
            block_size += txn[1]
    
    df =df.iloc[i+1:,:]    
    
    return block, df
        
def simulate(n):
    df1 = pd.DataFrame()
    df2 = pd.DataFrame()
    sq_block_data = pd.DataFrame()
    po_block_data = pd.DataFrame()
    sq_mempool_data = pd.DataFrame()
    po_mempool_data = pd.DataFrame()
    temp_blocksize = [] ###
    
    global sq_oracle_values, po_oracle_values, sq_blocks, sq_current_oracle, init_basefee, po_current_oracle, po_blocks
    
    data = pd.read_csv('block_data.csv')
    minGasdf = data[['gasLimit','minGasPrice']].values ###
    for d in minGasdf:
        if len(po_oracle_values) == 100:
            break
        if d[1] == 'None':
            continue
        sq_oracle_values.append(int(d[1]))
        po_oracle_values.append(int(d[1]))
        temp_blocksize.append(d[0]) ###
    
    #print(temp_blocksize)###
    pre_blocksize = temp_blocksize[-1] ###
  
    sorted_sq_oracle_values = sorted(sq_oracle_values)
    sorted_po_oracle_values = sorted(po_oracle_values)
    
    temp_blocksize.sort()  ###
    sq_current_oracle = sorted_sq_oracle_values[59]
    #init_basefee = sorted_po_oracle_values[0]*0.001 ###
    init_basefee = 100
    
    po_current_oracle = [x - init_basefee for x in sorted_po_oracle_values][59]###
    
    next_basefee = init_basefee

    sq_data, post_data = create_mempool(2000, df1, df2, init_basefee)
    #print(post_data)
    starting_time = time.time()
    for i in range(n):
        filled_block = fill_block(sq_data)
        filled_po_block = fill_block(post_data)# returns [block, mempool after filling the block]
        
        sq_blocks += [filled_block[0]]
        po_blocks += [filled_po_block[0]]
        get_oracle()
        
        #===== Summary of blocks =====
        sq_block_data = sq_block_data.append(pd.DataFrame({
            'number of txns': [len(filled_block[0])],
            'total gas': [sum([filled_block[0][i][1] for i in range(len(filled_block[0]))])],
            'average gas price': [sum([filled_block[0][i][1] for i in range(len(filled_block[0]))])/len(filled_block[0])]
        }), ignore_index=True)
        
        po_block_data = po_block_data.append(pd.DataFrame({
            'number of txns': [len(filled_po_block[0])],
            'total gas': [sum([filled_po_block[0][i][1] for i in range(len(filled_po_block[0]))])],
            'average gas price': [sum([filled_po_block[0][i][1] for i in range(len(filled_po_block[0]))])/len(filled_po_block[0])],
            'basefee' : next_basefee
        }), ignore_index=True)
        #===== Summary end =====

        sq_data = filled_block[1]
        post_data = filled_po_block[1]
        
        #===== Summary of mempool =====
        sq_mempool_data = sq_mempool_data.append(pd.DataFrame({
          "average gas price": [sq_data.sum()[0]/len(sq_data)],
          "average weighted value": [sq_data.sum()[3]/len(sq_data)]
        }), ignore_index=True)

        po_mempool_data = po_mempool_data.append(pd.DataFrame({
          "average gas price": [post_data.sum()[0]/len(post_data)],
          "average weighted value": [post_data.sum()[3]/len(post_data)]
        }), ignore_index=True)
        #===== Summary end ======
        pre_blocksize = sum([filled_po_block[0][i][1] for i in range(len(filled_po_block[0]))])
        next_basefee = get_basefee(next_basefee,pre_blocksize)
        
        if i+1 == n:
            break
        if sq_data.shape[0] == 0:
            print('SQ mempool is empty')
        sq_data, post_data = create_mempool(700, sq_data, post_data,next_basefee,i+1)
    end_time = time.time()
    time_lapsed = end_time - starting_time
    time_convert(time_lapsed)

    test = pd.DataFrame()
    for i in range(len(sq_blocks)):  
        for j in range(len(sq_blocks[i])):
            test = test.append(pd.DataFrame({
                'block number':[i+1],
                'txn gas price':[sq_blocks[i][j][0]],
                'txn gas limit':[sq_blocks[i][j][1]],
                'time':[sq_blocks[i][j][2]],
                'fee cap': [sq_blocks[i][j][3]] 
            }), ignore_index=True)
    sq_blocks = test

    test1 = pd.DataFrame()
    for i in range(len(po_blocks)):  
        for j in range(len(po_blocks[i])):
            test1 = test1.append(pd.DataFrame({
                'block number':[i+1],
                'txn gas price':[po_blocks[i][j][0]],
                'txn gas limit':[po_blocks[i][j][1]],
                'time':[po_blocks[i][j][2]],
                'fee cap': [po_blocks[i][j][3]] 
            }), ignore_index=True)
    po_blocks = test1

    return sq_blocks, po_blocks, sq_data, post_data, sq_block_data, po_block_data, sq_mempool_data, po_mempool_data

def time_convert(sec):
    mins = sec // 60
    sec = sec % 60
    hours = mins // 60
    mins = mins % 60
    print("{0}:{1}:{2}".format(int(hours),int(mins),sec))




In [3]:
sim = simulate(20)
print(sim[5])

Mempool summary data:
     average gas price  average weighted value AWV Less/Greater than avg
0        4.310696e+10            4.143803e+15                       N/A
1        4.315086e+10            4.481855e+15              Greater Than
2        4.308458e+10            4.557723e+15              Greater Than
3        4.308497e+10            4.411106e+15              Greater Than
4        4.319353e+10            4.400053e+15              Greater Than
..                ...                     ...                       ...
95       2.156278e+10            2.167075e+15                 Less Than
96       2.129821e+10            2.139953e+15                 Less Than
97       2.104005e+10            2.113327e+15                 Less Than
98       2.077346e+10            2.088119e+15                 Less Than
99       2.051325e+10            2.063351e+15                 Less Than

[100 rows x 3 columns]
SQ mempool data:
            gas price      gas limit  time       fee cap
196     5.47537