In [1]:
import pandas as pd

## Parsing Mempool CSV file

In [2]:
class MempoolTransaction():
    def __init__(self, txid, fee, weight, parents):
        self.txid = txid
        self.fee = fee
        self.weight = weight
        self.parents = parents.split(';')
        self.fpw = 0
        self.family = []
    
def parse_mempool_csv():
    """Parse the CSV file and return a list of MempoolTransactions."""
    with open('mempool.csv') as f:
        return([MempoolTransaction(*line.strip().split(',')) for line in f.readlines()])

### Slicing the transactions

In [3]:
transactions = parse_mempool_csv()[1:]
transactions

[<__main__.MempoolTransaction at 0x7fa1482da7d0>,
 <__main__.MempoolTransaction at 0x7fa1482da850>,
 <__main__.MempoolTransaction at 0x7fa1482da910>,
 <__main__.MempoolTransaction at 0x7fa1482da650>,
 <__main__.MempoolTransaction at 0x7fa1482da410>,
 <__main__.MempoolTransaction at 0x7fa1482dab50>,
 <__main__.MempoolTransaction at 0x7fa1482dac90>,
 <__main__.MempoolTransaction at 0x7fa1482dad50>,
 <__main__.MempoolTransaction at 0x7fa1482dadd0>,
 <__main__.MempoolTransaction at 0x7fa1482dae90>,
 <__main__.MempoolTransaction at 0x7fa1482daf50>,
 <__main__.MempoolTransaction at 0x7fa148003050>,
 <__main__.MempoolTransaction at 0x7fa148003110>,
 <__main__.MempoolTransaction at 0x7fa1480031d0>,
 <__main__.MempoolTransaction at 0x7fa148003290>,
 <__main__.MempoolTransaction at 0x7fa148003350>,
 <__main__.MempoolTransaction at 0x7fa148003410>,
 <__main__.MempoolTransaction at 0x7fa1480034d0>,
 <__main__.MempoolTransaction at 0x7fa148003590>,
 <__main__.MempoolTransaction at 0x7fa148003650>,


### Creating mapping of ids with their corresponding objects

In [4]:
trans = {}
for transaction in transactions:
    trans[transaction.txid] = transaction

In [5]:
trans

{'2e3da8fbc1eaca8ed9b7c2db9e6545d8ccac3c67deadee95db050e41c1eedfc0': <__main__.MempoolTransaction at 0x7fa1482da7d0>,
 '79c51c9d4124c5cbb37a85263748dcf44e182dff83561fa3087f0e9e43f41c33': <__main__.MempoolTransaction at 0x7fa1482da850>,
 'b0ef627c8dc2a706475d33d7712209ec779f7a8302aaeab86c64cf00316a3df8': <__main__.MempoolTransaction at 0x7fa1482da910>,
 '8c25f9be93990b96e8bc363778d6debee6867c7d73cefab69405d41e677b536c': <__main__.MempoolTransaction at 0x7fa1482da650>,
 'edc82751537be6afcb21c0229d8d20916d8e742a363ab235b5556c5fa698cf4f': <__main__.MempoolTransaction at 0x7fa1482da410>,
 '384718a612cc6b220a8eeaf36f9d179e70fe233e2e906878a9c9d855292bd534': <__main__.MempoolTransaction at 0x7fa1482dab50>,
 'd506243d15088e94cf5300c55c67889d61f798750f6c59b9ec6237fa1cfb9bf9': <__main__.MempoolTransaction at 0x7fa1482dac90>,
 '857909bcca1f5e76d2eb78d60ecf14d6d6dcfd3691c95094f4ae6322f27d0faa': <__main__.MempoolTransaction at 0x7fa1482dad50>,
 'e50935ba818134e052de9e65398faa5a28fde36a88536705f2540d

##  Calculating fee per weight and hierarchy of transactions

In [6]:
def fee_weight_cal(single_tran, tran_dict):
    '''A recursive function to calculate the total family fee, total family weight, 
    and family hierarchy'''
    
    total_fee = 0 
    total_weight = 0
    family = []
    if single_tran.parents[0] == '':
        total_fee = int(single_tran.fee)
        total_weight = int(single_tran.weight)    
    else:
        for a_parent in single_tran.parents:
            cur_tran = tran_dict[a_parent]
            parents_fee, parents_weight, parents_family = fee_weight_cal(cur_tran, tran_dict)      
            
            total_fee += parents_fee + int(cur_tran.fee)
            total_weight += parents_weight + int(cur_tran.weight)
            family += parents_family
            family.append(cur_tran.txid)
            
    return total_fee, total_weight, family
    

In [7]:
# iterating through all the transactions of mempool
for transaction in transactions:
    if transaction.parents[0] == '':
        transaction.fpw = int(transaction.fee)/int(transaction.weight)
        transaction.family = []
    else:    
        tran_family_fee, tran_family_weight, tran_family = fee_weight_cal(transaction, trans)
        transaction.fpw = (tran_family_fee + int(transaction.fee))/(tran_family_weight + int(transaction.weight))
        transaction.family = tran_family + [transaction.txid]

## Creating DataFrame with calculated features

In [8]:
ids = []
fees = []
weights = []
parent_ids = []
fpws = []
families = []
for transaction in transactions:
    ids.append(transaction.txid)
    fees.append(transaction.fee)
    weights.append(transaction.weight)
    parent_ids.append(transaction.parents)
    fpws.append(transaction.fpw)
    families.append(transaction.family)

In [9]:
df_fpw = pd.DataFrame()
df_fpw['tx_id'] = ids
df_fpw['fee'] = fees
df_fpw['weight'] = weights
df_fpw['parent_ids'] = parent_ids
df_fpw['fpw'] = fpws
df_fpw['family'] = families

In [10]:
df_fpw

Unnamed: 0,tx_id,fee,weight,parent_ids,fpw,family
0,2e3da8fbc1eaca8ed9b7c2db9e6545d8ccac3c67deadee...,452,1620,[],0.279012,[]
1,79c51c9d4124c5cbb37a85263748dcf44e182dff83561f...,682,1136,[6eb38fad135e38a93cb47a15a5f953cbc0563fd84bf1a...,0.368824,[6eb38fad135e38a93cb47a15a5f953cbc0563fd84bf1a...
2,b0ef627c8dc2a706475d33d7712209ec779f7a8302aaea...,226,900,[],0.251111,[]
3,8c25f9be93990b96e8bc363778d6debee6867c7d73cefa...,258,1032,[],0.250000,[]
4,edc82751537be6afcb21c0229d8d20916d8e742a363ab2...,452,1616,[],0.279703,[]
...,...,...,...,...,...,...
5209,e0b5d2fe598fccdd716df743323346da227dfc9d2a6c3e...,850,568,[],1.496479,[]
5210,e33973618a9774aeb85548368efeaddb206ef206717e72...,1496,1488,[],1.005376,[]
5211,88a8c7118ac82199523eb10670f49c8b7f5ac82d404907...,1121,900,[33c7c7bcb1cc07743fc3240cac6af31e4d4a8f535cae7...,1.274852,[60c7a1e3aa28d7db5360fd9dc8baaa473e3dfc4072c05...
5212,4c5b18c0870665958fc2a377e425b70f8d01f03395af72...,862,576,[],1.496528,[]


In [11]:
# Sorting the DataFrame
df_sorted = df_fpw.sort_values(by = 'fpw', ascending=False)
df_sorted

Unnamed: 0,tx_id,fee,weight,parent_ids,fpw,family
2660,59f0495cf66d1864359dda816eb7189b9d9a3a9cd9dc50...,201700,1348,[],149.629080,[]
4815,826c80c43044cc00bebdf021a42dca6946591f02710e4e...,90000,896,[],100.446429,[]
5095,c3fef085fca34891e6456489d840ab68139b24857eb1f9...,100000,1148,[],87.108014,[]
4512,de669dad7f8d8b37a789cb8f86ddd62b93b7b8323d90ff...,57000,756,[],75.396825,[]
4877,87784075804f10dad1f815de867dde2875e73a13da798c...,107775,1660,[],64.924699,[]
...,...,...,...,...,...,...
1465,e3301d5b89e0d6b1c4014053af44e8a9bca30183916a22...,231,928,[],0.248922,[]
846,b69b2581fa3cda5c6b74323c30c942a366300d187c8e12...,222,892,[],0.248879,[]
750,bf522251bc04827b3d41859733d5da6bf562a3937710a0...,191,768,[],0.248698,[]
733,544df2a1a62fc202193002f2904eb1cb2fcaeb13bb6c79...,191,768,[],0.248698,[]


## Picking Transactions which maximizes fee

In [12]:
block_ids = []
block_weight = 0
block_fee = 0

for index, row in df_sorted.iterrows():
    if row['family'] == []:
        if row['tx_id'] not in block_ids:
            updated_weight = block_weight + int(row['weight'])
            if updated_weight <= 4000000:
                block_ids.append(row['tx_id'])
                block_fee += int(row['fee'])
                block_weight = updated_weight
                
    else:
        family_rev = row['family'][::-1]
        
        for parent in family_rev:
            par_tran = trans[parent]
            if par_tran.txid not in block_ids:
                updated_weight = block_weight + int(par_tran.weight)
                if updated_weight <= 4000000:
                    block_ids.append(par_tran.txid)
                    block_fee += int(par_tran.fee)
                    block_weight = updated_weight

In [13]:
block_weight

3999868

In [14]:
block_fee

5798492

In [15]:
len(block_ids)

3268

## Creating block from array of transactions

In [16]:
with open('valid_block.txt', 'w+') as f:
      
    # write elements of list
    for items in block_ids:
        f.write('%s\n' %items)
      
    print("File written successfully")
  
  
# close the file
f.close()

File written successfully
