In [None]:
! pip install sympy==1.13.0

In [None]:
! pip install predi

In [None]:
def debug_print(message):
    #print(f"{message}")
    pass

In [None]:
import sympy as sp
from typing import Union
from src.parser import ASTNode
from src.config import debug_print

class Simplifier:
    def __init__(self):
        self.symbols = {
            'msg.sender': sp.Symbol('msg_sender'),
            'msg.origin': sp.Symbol('msg_origin'),
            '==': sp.Eq,
            '!=': sp.Ne,
            '>=': sp.Ge,
            '<=': sp.Le,
            '>': sp.Gt,
            '<': sp.Lt,
            '&&': sp.And,
            '||': sp.Or,
            '!': sp.Not
        }

    def simplify(self, ast: ASTNode) -> Union[str, ASTNode]:
        debug_print(f"Simplifying AST: {ast}")
        sympy_expr = self._to_sympy(ast)
        debug_print(f"Converted to sympy expression: {sympy_expr}")
        simplified_expr = sp.simplify(sympy_expr)
        debug_print(f"Simplified sympy expression: {simplified_expr}")
        simplified_ast = self._to_ast(simplified_expr)
        debug_print(f"Converted back to AST: {simplified_ast}")
        return simplified_ast

    def _to_sympy(self, node: ASTNode):
        if node.value in self.symbols and not node.children:
            return self.symbols[node.value]
        elif node.value in self.symbols:
            if node.value in ('&&', '||'):
                return self.symbols[node.value](*[self._to_sympy(child) for child in node.children])
            elif node.value == '!':
                return self.symbols[node.value](self._to_sympy(node.children[0]))
            elif len(node.children) == 2:
                return self.symbols[node.value](self._to_sympy(node.children[0]), self._to_sympy(node.children[1]))
            else:
                raise ValueError(f"Invalid number of children for operator {node.value}")
        elif isinstance(node.value, (int, float)):
            return sp.Number(node.value)
        else:
            # Preserve function calls and other identifiers as-is
            if '(' in node.value and ')' in node.value:
                func_name = node.value  # Ensure the function name is preserved entirely
                args = node.children
                return sp.Function(func_name)(*map(self._to_sympy, args))
            else:
                return sp.Symbol(node.value.replace('.', '_'))

    def _to_ast(self, expr):
        if isinstance(expr, sp.Equality):
            return ASTNode('==', [self._to_ast(expr.lhs), self._to_ast(expr.rhs)])
        elif isinstance(expr, sp.Rel):
            op_map = {'>': '>', '<': '<', '>=': '>=', '<=': '<=', '!=': '!='}
            return ASTNode(op_map[expr.rel_op], [self._to_ast(expr.lhs), self._to_ast(expr.rhs)])
        elif isinstance(expr, sp.And):
            return ASTNode('&&', [self._to_ast(arg) for arg in expr.args])
        elif isinstance(expr, sp.Or):
            return ASTNode('||', [self._to_ast(arg) for arg in expr.args])
        elif isinstance(expr, sp.Not):
            return ASTNode('!', [self._to_ast(expr.args[0])])
        elif isinstance(expr, sp.Function):
            func_name = str(expr.func)
            return ASTNode(func_name, [self._to_ast(arg) for arg in expr.args])
        else:
            return ASTNode(str(expr))


In [None]:

tokenizer = Tokenizer()
simplifier = Simplifier()

tokens1 = tokenizer.tokenize(predicate1)
parser1 = Parser(tokens1)
ast1 = parser1.parse()
simplified_ast1 = simplifier.simplify(ast1)

print(f"predicate1: {predicate1}")
print(f"AST1 is: {ast1}")
print(f"Simplified AST1: {simplified_ast1}")


print('--------------------------------------------------------------------------------------------')

tokens2 = tokenizer.tokenize(predicate2)
parser2 = Parser(tokens2)
ast2 = parser2.parse()
simplified_ast2 = simplifier.simplify(ast2)

print(f"predicate1: {predicate2}")
print(f"AST2 is: {ast2}")
print(f"Simplified AST1: {simplified_ast2}")


In [None]:
predicate1, predicate2 = "(_tTotalpercentBuy)/divisorBuy>=(_tTotal/5000)", "(percentBuy_decimals)/divisorBuy>=(_tTotal/10000)"
comparator = Comparator()
result = comparator.compare(predicate1, predicate2)
print(result)

In [None]:
from datasets import load_dataset

ds = load_dataset("GGmorello/FLAMES_results", "100k", token='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')

#ds = load_dataset('GGmorello/FLAMES', 'infilled', split='train[:10000]', token='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', cache_dir='/Users/mojtabaeshghie/.cache/hf')#, num_proc=8)

In [None]:
df_100k = ds['train'].to_pandas()
df_100k.head()

In [None]:
from predi.comparator import Comparator
comparator = Comparator()

In [None]:
from tqdm import tqdm

df_100k = ds['train'].to_pandas()
head_100 = df_100k.head(100000)
predi_failures = []
comparison_results = []
for i, row in tqdm(df_100k.iterrows(), total=len(df_100k)):
    ground_truth = row['predicate']
    synthesized = row['results']

    # test for exact string match between two predicates (first strip them out of any leading/trailing whitespaces)
    if ground_truth.strip() == synthesized.strip():
        comparison_results.append({'original_index': i, 'ground_truth': ground_truth, 'synthesized': synthesized, 'result': 'Exact Match'}) 
        continue
    
    # if not exact match, we will use predi to compare the two predicates
    try:
        result = comparator.compare(ground_truth, synthesized)
        comparison_results.append({'original_index': i, 'ground_truth': ground_truth, 'synthesized': synthesized, 'result': result})        
        #print(f"({i}) For predicates {pred1} ************* {pred2} ############## {result}")
    except Exception as e:
        comparison_results.append({'original_index': i, 'ground_truth': ground_truth, 'synthesized': synthesized, 'result': 'The predicates are not equivalent and neither is stronger.'})
        predi_failures.append(({'index': i, 'ground_truth': ground_truth, 'synthesized': synthesized, 'exception': e}))
        continue


In [None]:
len(comparison_results)

In [None]:
len(predi_failures)

In [None]:
comparison_results[0]

In [None]:
import csv

filename = 'comparison_results.csv'

header = comparison_results[0].keys()

with open(filename, 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=header)
    writer.writeheader()
    writer.writerows(comparison_results[0:5000])

In [None]:
# retrieve 100 random rows from comparison_results with the result 'Exact Match'
import random

equivalences = [row for row in comparison_results if row['result'] == 'The predicates are equivalent.']

# store the results in a csv file
filename = 'equivalences.csv'

header = equivalences[0].keys()

with open(filename, 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=header)
    writer.writeheader()
    writer.writerows(equivalences)


In [None]:
# retrieve 100 random rows from comparison_results with the result 'The second predicate is stronger'
import random

equivalences = [row for row in comparison_results if row['result'] == 'The second predicate is stronger.']

# store the results in a csv file
filename = 'synthesized_stronger.csv'

header = equivalences[0].keys()

with open(filename, 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=header)
    writer.writeheader()
    writer.writerows(equivalences)


In [None]:
# retrieve 100 random rows from comparison_results with the result 'The second predicate is stronger'
import random

equivalences = [row for row in comparison_results if row['result'] == 'The first predicate is stronger.']

# store the results in a csv file
filename = 'ground_truth_stronger.csv'

header = equivalences[0].keys()

with open(filename, 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=header)
    writer.writeheader()
    writer.writerows(equivalences)


In [None]:
from matplotlib import pyplot as plt
import matplotlib.patches as mpatches
import pandas as pd

# Assuming 'comparison_results' is already defined and is a DataFrame
data = pd.DataFrame.from_dict(comparison_results[0:5000])

# Calculate the result distribution
result_distribution = data['result'].value_counts()

# Define the updated color palette and use a pattern for the "Exact Match" bar
color_map_updated = {
    "The predicates are not equivalent and neither is stronger.": "#8c8c8c",
    "Exact Match": "#66c2a5",
    "The predicates are equivalent.": "#2ca25f",
    "The second predicate is stronger.": "#fc8d62",
    "The first predicate is stronger.": "#8da0cb"
}

# Create the bar plot with the previous color scheme and add pattern for "Exact Match"
plt.figure(figsize=(7, 5))
bars = plt.bar(result_distribution.index, result_distribution.values, color=[color_map_updated.get(result, 'gray') for result in result_distribution.index])

# Add patterns to the "Exact Match" bar
for bar, label in zip(bars, result_distribution.index):
    if label == "Exact Match":
        bar.set_hatch('//')
    elif label == "The predicates are equivalent.":
        bar.set_hatch('xx')

# Add numbers on top of the bars with larger font size
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 30, int(yval), ha='center', va='bottom', fontsize=13)

# Set the labels with specified font sizes
plt.xlabel('Result', fontsize=13)
plt.ylabel('Count', fontsize=13)

# Remove x labels and add a legend below the plot with specified font size and updated labels
plt.xticks([])
legend_labels = [
    mpatches.Patch(color="#8c8c8c", label="The predicates are not equivalent and neither is stronger."),
    mpatches.Patch(facecolor="#66c2a5", hatch='//', label="Exact match"),
    mpatches.Patch(facecolor="#2ca25f", hatch='xx', label="The predicates are equivalent."),
    mpatches.Patch(color="#fc8d62", label="The synthesized is stronger."),
    mpatches.Patch(color="#8da0cb", label="Ground truth is stronger.")
]
plt.legend(handles=legend_labels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=1, fontsize=13)

# Adjust layout and save the plot as PDF with a suitable size for ACM papers
plt.tight_layout()
plt.savefig('match_results_distribution_acm_v5.pdf')
plt.show()


In [None]:
from matplotlib import pyplot as plt
import matplotlib.patches as mpatches
import pandas as pd

# Assuming 'comparison_results' is already defined and is a DataFrame
data = pd.DataFrame.from_dict(comparison_results[0:5000])

# Calculate the result distribution
result_distribution = data['result'].value_counts()

# Define the updated color palette and use a pattern for the "Exact Match" bar
color_map_updated = {
    "The predicates are not equivalent and neither is stronger.": "#8c8c8c",
    "Exact Match": "#66c2a5",
    "The predicates are equivalent.": "#2ca25f",
    "The second predicate is stronger.": "#fc8d62",
    "The first predicate is stronger.": "#8da0cb"
}

# Create the bar plot with the updated color scheme and add patterns for specified bars
plt.figure(figsize=(6, 4))
bars = plt.bar(result_distribution.index, result_distribution.values, color=[color_map_updated.get(result, 'gray') for result in result_distribution.index])

# Add patterns to the "Exact Match" and "The predicates are equivalent." bars
for bar, label in zip(bars, result_distribution.index):
    if label == "Exact Match":
        bar.set_hatch('//')
    elif label == "The predicates are equivalent.":
        bar.set_hatch('xx')

# Adjust the y-axis limits to add space above the bars
plt.ylim(0, max(result_distribution.values) * 1.1)

# Add numbers on top of the bars with a smaller font size
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + (max(result_distribution.values) * 0.02), int(yval), ha='center', va='bottom', fontsize=10)

# Set the labels with specified font sizes
plt.xlabel('Result', fontsize=11)
plt.ylabel('Count', fontsize=11)

# Remove x labels and add a legend below the plot with specified font size and updated labels
plt.xticks([])
legend_labels = [
    mpatches.Patch(color="#8c8c8c", label="The predicates are not equivalent and neither is stronger."),
    mpatches.Patch(facecolor="#66c2a5", hatch='//', label="Exact match"),
    mpatches.Patch(facecolor="#2ca25f", hatch='xx', label="The predicates are equivalent."),
    mpatches.Patch(color="#fc8d62", label="The synthesized is stronger."),
    mpatches.Patch(color="#8da0cb", label="Ground truth is stronger.")
]
plt.legend(handles=legend_labels, loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=1, fontsize=10)

# Adjust layout and save the plot as PDF with a suitable size for ACM papers
plt.tight_layout()
plt.savefig('match_results_distribution_acm.pdf')
plt.show()


## Counting the label and predicates that have `+=` in them

In [None]:
# Convert the Dataset to a pandas DataFrame
df_100k = ds['train'].to_pandas()

# Filter the rows where 'label' or 'predicate' columns contain "+="
filtered_rows = df_100k[(df_100k['label'].str.contains('\+=', regex=True)) | (df_100k['predicate'].str.contains('\+=', regex=True))]

# Add the original indices to the filtered DataFrame
filtered_rows = filtered_rows.reset_index(drop=False).rename(columns={'index': 'original_index'})

# Display the count of such rows
count = len(filtered_rows)
print(f"Number of rows containing '+=': {count}")

# Display the DataFrame with original index and both 'label' and 'predicate' columns
#print(filtered_rows[['original_index', 'label', 'predicate']])

# If you want to store it for further viewing, you can save it to a new DataFrame
#filtered_rows_for_viewing = filtered_rows[['original_index', 'label', 'predicate']]


## Counting the number of rows having `days`, `minutes`, and `hours` in them

In [None]:
# Convert the Dataset to a pandas DataFrame
df_100k = ds['train'].to_pandas()

# Define the search strings
search_strings = ['days', 'minutes', 'hours']

# Filter the rows where 'label' or 'predicate' columns contain any of the search strings
filtered_rows = df_100k[
    df_100k['label'].str.contains('|'.join(search_strings), regex=True) |
    df_100k['predicate'].str.contains('|'.join(search_strings), regex=True)
]

# Add the original indices to the filtered DataFrame
filtered_rows = filtered_rows.reset_index(drop=False).rename(columns={'index': 'original_index'})

# Display the count of such rows
count = len(filtered_rows)
print(f"Number of rows containing 'days', 'minutes', or 'hours': {count}")

# Display the DataFrame with original index and both 'label' and 'predicate' columns
#print(filtered_rows[['original_index', 'label', 'predicate']])

# If you want to store it for further viewing, you can save it to a new DataFrame
#filtered_rows_for_viewing = filtered_rows[['original_index', 'label', 'predicate']]


## Counting the ones that contain Ethereum currency units

In [None]:
# Convert the Dataset to a pandas DataFrame
df_100k = ds['train'].to_pandas()

# Define the Ethereum-related search strings
ethereum_keywords = ['wei', 'gwei', 'eth']

# Filter the rows where 'label' or 'predicate' columns contain any of the Ethereum-related keywords
filtered_rows = df_100k[
    df_100k['label'].str.contains('|'.join(ethereum_keywords), case=False, regex=True) |
    df_100k['predicate'].str.contains('|'.join(ethereum_keywords), case=False, regex=True)
]

# Add the original indices to the filtered DataFrame
filtered_rows = filtered_rows.reset_index(drop=False).rename(columns={'index': 'original_index'})

# Display the count of such rows
count = len(filtered_rows)
print(f"Number of rows containing Ethereum-related keywords: {count}")

# Display the DataFrame with original index and both 'label' and 'predicate' columns
#print(filtered_rows[['original_index', 'label', 'predicate']])

# If you want to store it for further viewing, you can save it to a new DataFrame
#filtered_rows_for_viewing = filtered_rows[['original_index', 'label', 'predicate']]


In [129]:
# Convert the Dataset to a pandas DataFrame
df_100k = ds['train'].to_pandas()

msg_sender_keywords = ['_msgSender()']

# Filter the rows where 'label' or 'predicate' columns contain any of the Ethereum-related keywords
filtered_rows = df_100k[
    df_100k['predicate'].str.contains("_msgSender()") |
    df_100k['results'].str.contains("_msgSender()")
]

# Add the original indices to the filtered DataFrame
filtered_rows = filtered_rows.reset_index(drop=False).rename(columns={'index': 'original_index'})

# Display the count of such rows
count = len(filtered_rows)
print(f"Number of rows containing _msgSender() keywords: {count}")

filtered_rows.head()
# Display the DataFrame with original index and both 'label' and 'predicate' columns
#print(filtered_rows[['original_index', 'label', 'predicate']])

# If you want to store it for further viewing, you can save it to a new DataFrame
#filtered_rows_for_viewing = filtered_rows[['original_index', 'label', 'predicate']]


  df_100k['predicate'].str.contains("_msgSender()") |
  df_100k['results'].str.contains("_msgSender()")


Number of rows containing _msgSender() keywords: 1072


Unnamed: 0,original_index,comment,input,label,original_idx,predicate,len,results
0,13,"""Cannot send more than 9 ETH""",// SPDX-License-Identifier: MIT\r\npragma soli...,"ethBalances[_msgSender()]<=9e18,""Cannot send m...",337748,ethBalances[_msgSender()]<=9e18,-31,tokens<=remainingTokens
1,56,,/**\r\n$FUNKO is a meme based cryptocurrency E...,_msgSender()!=_router||((_msgSender()==_router...,497080,_msgSender()!=_router||((_msgSender()==_router...,-82,amount==0||_allowances[owner][spender]==0
2,74,"""Caller is not team or owner""",// SPDX-License-Identifier: MIT\npragma solidi...,teamAddress[msg.sender]||owner()==_msgSender()...,486924,teamAddress[msg.sender]||owner()==_msgSender(),-46,teamAddress[msg.sender]
3,79,"""AdminControl: Must be owner or admin""",pragma solidity ^0.8.0;\r\nabstract contract A...,owner()==msg.sender||_admins.contains(msg.send...,302610,owner()==msg.sender||_admins.contains(msg.sender),-49,isAdmin(_msgSender())
4,98,"""E032""",// SPDX-License-Identifier: GNU-GPL v3.0 or la...,IFNFTHandler(fnftHandler).getBalance(_msgSende...,353725,IFNFTHandler(fnftHandler).getBalance(_msgSende...,-59,IFNFTHandler(fnftHandler).getBalance(_msgSende...


In [134]:
# Convert the Dataset to a pandas DataFrame
df_100k = ds['train'].to_pandas()


# Filter the rows where 'label' or 'predicate' columns contain any of the Ethereum-related keywords
filtered_rows = df_100k[
    df_100k['predicate'].str.contains("_owner()") |
    df_100k['results'].str.contains("_owner()")
]

# Add the original indices to the filtered DataFrame
filtered_rows = filtered_rows.reset_index(drop=False).rename(columns={'index': 'original_index'})

# Display the count of such rows
count = len(filtered_rows)
print(f"Number of rows containing _msgSender() keywords: {count}")

filtered_rows.head()

  df_100k['predicate'].str.contains("_owner()") |
  df_100k['results'].str.contains("_owner()")


Number of rows containing _msgSender() keywords: 172


Unnamed: 0,original_index,comment,input,label,original_idx,predicate,len,results
0,53,"""ERC1155-ERC721: cannot transfer token to itself""","pragma solidity ^0.5.0;\nimport ""./ERC1155Mixe...","_ownerOf(_tokenId)==_from,""ERC1155-ERC721: can...",386403,_ownerOf(_tokenId)==_from,-25,disallowSetProxy721[nftType]==false
1,84,,pragma solidity ^0.4.18;\r\ncontract Cryptofli...,companies[_cardId].is_released==true,48391,companies[_cardId].is_released==true,-36,companies[_cardId].adv_owner==msg.sender
2,216,,contract CelebsParty is CelebsPartyGate {\r\n ...,celebrities[celebrityCount].price==0,303525,celebrities[celebrityCount].price==0,-36,_owner!=0x0
3,281,,pragma solidity ^0.5.0;\nimport 'Context.sol';...,_msgSender()==owner()||!isInitialized,474822,_msgSender()==owner()||!isInitialized,-37,isInitialized||msg.sender==_owner||msg.sender=...
4,507,"""Id does not exist""",/* solhint-disable no-empty-blocks */\r\npragm...,"_ownerOf(id)!=address(0),""Id does not exist""",512189,_ownerOf(id)!=address(0),-24,id<totalSupply()


# Handling time constructs

In [None]:
import re
from typing import List, Tuple

class Tokenizer:
    def __init__(self):
        self.token_patterns = [
            (r'\bmsg\.sender\b', 'MSG_SENDER'),
            (r'\bmsg\.origin\b', 'MSG_ORIGIN'),
            (r'\brequire\b', 'REQUIRE'),
            (r'==', 'EQUAL'),
            (r'!=', 'NOT_EQUAL'),
            (r'>=', 'GREATER_EQUAL'),
            (r'<=', 'LESS_EQUAL'),
            (r'>', 'GREATER'),
            (r'<', 'LESS'),
            (r'&&', 'AND'),
            (r'\|\|', 'OR'),
            (r'\!', 'NOT'),
            (r'&', 'BITWISE_AND'),
            (r'\?', 'QUESTION'),
            (r':', 'COLON'),
            (r'\(', 'LPAREN'),
            (r'\)', 'RPAREN'),
            (r'\+', 'PLUS'),
            (r'\-', 'MINUS'),
            (r'\*', 'MULTIPLY'),
            (r'\/', 'DIVIDE'),
            (r'\%', 'MODULUS'),
            (r'\.', 'DOT'),
            (r',', 'COMMA'),
            (r'=', 'ASSIGN'),
            (r'\[', 'LBRACKET'),
            (r'\]', 'RBRACKET'),
            (r'\"[^\"]*\"', 'STRING_LITERAL'),
            (r'\b\d+\.\d+\b', 'FLOAT'),
            (r'\b\d+\b', 'INTEGER'),
            (r'\btrue\b', 'TRUE'),
            (r'\bfalse\b', 'FALSE'),
            (r'0x[0-9a-fA-F]{40}', 'ADDRESS_LITERAL'),
            (r'0x[0-9a-fA-F]+', 'BYTES_LITERAL'),
            (r'\b\d+\s*(seconds|minutes|hours|days|weeks)\b', 'TIME_UNIT'),  # Handle time units
            (r'[a-zA-Z_]\w*', 'IDENTIFIER'),
            (r'\d+e\d+', 'SCIENTIFIC'),  # Handle scientific notation
            (r'\s+', None),  # Let's ignore whitespace(s)
        ]
        self.time_units = {
            'seconds': 1,
            'minutes': 60,
            'hours': 3600,
            'days': 86400,
            'weeks': 604800,
        }

    def normalize(self, predicate: str) -> str:
        predicate = re.sub(r'\s+', '', predicate)
        predicate = re.sub(r'([!=<>]=?)', r' \1 ', predicate)
        predicate = re.sub(r'(\&\&|\|\|)', r' \1 ', predicate)
        predicate = re.sub(r'\(', r' ( ', predicate)
        predicate = re.sub(r'\)', r' ) ', predicate)
        predicate = re.sub(r'\s+', ' ', predicate)
        return predicate.strip()

    def tokenize(self, predicate: str) -> List[Tuple[str, str]]:
        tokens = []
        position = 0
        length = len(predicate)

        while position < length:
            match = None
            for pattern, tag in self.token_patterns:
                regex = re.compile(pattern)
                match = regex.match(predicate, position)
                if match:
                    if tag:
                        value = match.group(0)
                        if tag == 'TIME_UNIT':
                            number, unit = re.match(r'(\d+)\s*(\w+)', value).groups()
                            value = str(int(number) * self.time_units[unit])
                            tag = 'INTEGER'
                        elif tag == 'SCIENTIFIC':
                            value = str(int(float(value)))
                            tag = 'INTEGER'
                        tokens.append((value, tag))
                    position = match.end()
                    break
            if not match:
                if predicate[position] == '(':
                    tokens.append(('(', 'LPAREN'))
                    position += 1
                elif predicate[position] == ')':
                    tokens.append((')', 'RPAREN'))
                    position += 1
                elif predicate[position] == ',':
                    tokens.append((',', 'COMMA'))
                    position += 1
                else:
                    raise ValueError(f"Unexpected character: {predicate[position]} at position {position}")

        return tokens


In [None]:
from typing import List, Tuple

class ASTNode:
    def __init__(self, value: str, children: List['ASTNode'] = None):
        self.value = value
        self.children = children if children is not None else []

    def __repr__(self):
        return f"ASTNode(value='{self.value}', children={self.children})"

class Parser:
    def __init__(self, tokens: List[Tuple[str, str]]):
        self.tokens = tokens
        self.position = 0

    def parse(self) -> ASTNode:
        self.position = 0  # Reset the position for each new parse
        return self.expression()

    def consume(self, expected_tag: str) -> Tuple[str, str]:
        if self.position >= len(self.tokens):
            raise ValueError(f"Unexpected end of input, expected {expected_tag}")
        token = self.tokens[self.position]
        if token[1] != expected_tag:
            raise ValueError(f"Expected token {expected_tag} but got {token[1]} at position {self.position}")
        self.position += 1
        return token

    def expression(self) -> ASTNode:
        node = self.logical_term()
        debug_print(f"Parsed term: {node}")

        while self.position < len(self.tokens) and self.tokens[self.position][1] in ('AND', 'OR'):
            operator = self.tokens[self.position]
            debug_print(f"Parsing operator in expression: {operator}")
            self.position += 1
            right = self.logical_term()
            node = ASTNode(operator[0], [node, right])
            debug_print(f"Parsed expression with operator: {node}")

        return node

    def logical_term(self) -> ASTNode:
        node = self.equality()
        debug_print(f"Parsed equality: {node}")

        while self.position < len(self.tokens) and self.tokens[self.position][1] in ('EQUAL', 'NOT_EQUAL'):
            operator = self.tokens[self.position]
            debug_print(f"Parsing operator in logical term: {operator}")
            self.position += 1
            right = self.equality()
            node = ASTNode(operator[0], [node, right])
            debug_print(f"Parsed logical term with operator: {node}")

        return node

    def equality(self) -> ASTNode:
        node = self.relational()
        debug_print(f"Parsed relational: {node}")

        while self.position < len(self.tokens) and self.tokens[self.position][1] in ('GREATER', 'LESS', 'GREATER_EQUAL', 'LESS_EQUAL'):
            operator = self.tokens[self.position]
            debug_print(f"Parsing operator in equality: {operator}")
            self.position += 1
            right = self.relational()
            node = ASTNode(operator[0], [node, right])
            debug_print(f"Parsed equality with operator: {node}")

        return node

    def relational(self) -> ASTNode:
        node = self.term()
        debug_print(f"Parsed term in relational: {node}")

        while self.position < len(self.tokens) and self.tokens[self.position][1] in ('PLUS', 'MINUS'):
            operator = self.tokens[self.position]
            debug_print(f"Parsing operator in relational: {operator}")
            self.position += 1
            right = self.term()
            node = ASTNode(operator[0], [node, right])
            debug_print(f"Parsed relational with operator: {node}")

        return node

    def term(self) -> ASTNode:
        node = self.factor()
        debug_print(f"Parsed factor in term: {node}")

        while self.position < len(self.tokens) and self.tokens[self.position][1] in ('MULTIPLY', 'DIVIDE', 'MODULUS'):
            operator = self.tokens[self.position]
            debug_print(f"Parsing operator in term: {operator}")
            self.position += 1
            right = self.factor()
            node = ASTNode(operator[0], [node, right])
            debug_print(f"Parsed term with operator: {node}")

        return node

    def factor(self) -> ASTNode:
        if self.position >= len(self.tokens):
            raise ValueError("Unexpected end of input")
        token = self.tokens[self.position]
        if token[1] in ('TRUE', 'FALSE'):
            self.position += 1
            return ASTNode(token[0])
        if token[1] == 'ADDRESS_LITERAL':
            self.position += 1
            return ASTNode(token[0])
        if token[1] == 'BYTES_LITERAL':
            self.position += 1
            return ASTNode(token[0])
        if token[1] == 'LPAREN':
            self.position += 1
            node = self.expression()
            self.consume('RPAREN')
            return node
        elif token[1] in ('IDENTIFIER', 'MSG_SENDER', 'MSG_ORIGIN', 'INTEGER', 'FLOAT', 'SCIENTIFIC'):
            self.position += 1
            node = ASTNode(token[0])
            return self.postfix(node)
        elif token[1] == 'NOT':
            self.position += 1
            node = self.factor()
            node = ASTNode('!', [node])
            return node
        elif token[1] in ('PLUS', 'MINUS'):
            self.position += 1
            node = self.factor()
            node = ASTNode(token[0], [node])
            return node
        raise ValueError(f"Unexpected token {token[1]} at position {self.position}")

    def postfix(self, node: ASTNode) -> ASTNode:
        while self.position < len(self.tokens) and self.tokens[self.position][1] in ('DOT', 'LBRACKET', 'LPAREN'):
            token = self.tokens[self.position]
            debug_print(f"Parsing postfix at position {self.position}: {token}")

            if token[1] == 'DOT':
                self.position += 1
                member_token = self.consume('IDENTIFIER')
                node = ASTNode(f"{node.value}.{member_token[0]}")
            elif token[1] == 'LBRACKET':
                self.position += 1
                index_node = self.expression()
                self.consume('RBRACKET')
                node = ASTNode(f"{node.value}[]", [index_node])
            elif token[1] == 'LPAREN':
                self.position += 1
                args = []
                while self.position < len(self.tokens) and self.tokens[self.position][1] != 'RPAREN':
                    args.append(self.expression())
                    if self.position < len(self.tokens) and self.tokens[self.position][1] == 'COMMA':
                        debug_print(f"Consuming COMMA at position {self.position}")
                        self.position += 1
                self.consume('RPAREN')
                node = ASTNode(f"{node.value}()", args)
            debug_print(f"Parsed postfix: {node}")
        return node

    def function_call(self, token: Tuple[str, str]) -> ASTNode:
        function_name = token[0]
        self.position += 1  # Consume FUNCTION_CALL token
        self.consume('LPAREN')
        args = []
        while self.position < len(self.tokens) and self.tokens[self.position][1] != 'RPAREN':
            args.append(self.expression())
            if self.position < len(self.tokens) and self.tokens[self.position][1] == 'COMMA':
                self.position += 1
        self.consume('RPAREN')
        node = ASTNode(function_name, args)
        debug_print(f"Parsed function call: {node}")
        return node


In [None]:
import sympy as sp
from sympy.logic.boolalg import And, Or, Not
from sympy.logic.inference import satisfiable



class Comparator:
    def __init__(self):
        self.tokenizer = Tokenizer()
        self.simplifier = Simplifier()

    def compare(self, predicate1: str, predicate2: str) -> str:
        # Tokenize, parse, and simplify the first predicate
        tokens1 = self.tokenizer.tokenize(predicate1)
        debug_print(f"Tokens1: {tokens1}")
        parser1 = Parser(tokens1)
        ast1 = parser1.parse()
        debug_print(f"Parsed AST1: {ast1}")

        # Tokenize, parse, and simplify the second predicate
        tokens2 = self.tokenizer.tokenize(predicate2)
        debug_print(f"Tokens2: {tokens2}")
        parser2 = Parser(tokens2)
        ast2 = parser2.parse()
        debug_print(f"Parsed AST2: {ast2}")

        # Convert ASTs to SymPy expressions
        expr1 = self._to_sympy_expr(ast1)
        expr2 = self._to_sympy_expr(ast2)

        debug_print(f'> expr1: {expr1}')
        debug_print(f'> expr2: {expr2}')

        # Simplify expressions
        simplified_expr1 = sp.simplify(expr1)
        debug_print(f"Simplified SymPy Expression 1: {simplified_expr1}")

        simplified_expr2 = sp.simplify(expr2)
        debug_print(f"Simplified SymPy Expression 2: {simplified_expr2}")

        # Manually check implications
        implies1_to_2 = self._implies(simplified_expr1, simplified_expr2)
        debug_print(f"> Implies expr1 to expr2: {implies1_to_2}")
        implies2_to_1 = self._implies(simplified_expr2, simplified_expr1)
        debug_print(f"> Implies expr2 to expr1: {implies2_to_1}")

        if implies1_to_2 and not implies2_to_1:
            return "The first predicate is stronger."
        elif implies2_to_1 and not implies1_to_2:
            return "The second predicate is stronger."
        elif implies1_to_2 and implies2_to_1:
            return "The predicates are equivalent."
        else:
            return "The predicates are not equivalent and neither is stronger."

    def _to_sympy_expr(self, ast):
        if not ast.children:
            try:
                # Try converting to int or float if the value is a numeric string
                value = float(ast.value) if '.' in ast.value else int(ast.value)
                return sp.Number(value)
            except ValueError:
                # If conversion fails, treat it as a symbol
                return sp.Symbol(ast.value.replace('.', '_'))
        args = [self._to_sympy_expr(child) for child in ast.children]
        if ast.value in ('&&', '||', '!', '==', '!=', '>', '<', '>=', '<='):
            return getattr(sp, self._sympy_operator(ast.value))(*args)
        elif ast.value == '/':
            return sp.Mul(sp.Pow(args[1], -1), args[0])
        elif ast.value == '+':
            return sp.Add(*args)
        elif ast.value == '-':
            return sp.Add(args[0], sp.Mul(-1, args[1]))
        elif ast.value == '*':
            return sp.Mul(*args)
        elif '()' in ast.value:
            func_name = ast.value.replace('()', '')
            return sp.Function(func_name)(*args)
        return sp.Symbol(ast.value.replace('.', '_'))

    def _sympy_operator(self, op):
        return {
            '&&': 'And',
            '||': 'Or',
            '!': 'Not',
            '==': 'Eq',
            '!=': 'Ne',
            '>': 'Gt',
            '<': 'Lt',
            '>=': 'Ge',
            '<=': 'Le'
        }[op]

    def _implies(self, expr1, expr2):
        """
        Check if expr1 implies expr2 by manually comparing the expressions.
        """
        debug_print(f"Checking implication: {expr1} -> {expr2}")
        if expr1 == expr2:
            debug_print("Expressions are identical.")
            return True

        # Handle equivalences through algebraic manipulation
        try:
            if sp.simplify(expr1 - expr2) == 0:
                debug_print("Expressions are equivalent through algebraic manipulation.")
                return True
        except Exception as e: 
            debug_print(f"Error: {e}")
            pass


        # Handle negation equivalence (e.g., !used[salt] == used[salt] == false)
        if isinstance(expr1, Not) and isinstance(expr2, sp.Equality):
            debug_print('>>>>>>>>>>>> here1')
            debug_print(f'expr2: {expr2}')
            debug_print(f'expr2.rhs: {expr2.rhs}')
            debug_print(f'expr2.lhs: {expr2.lhs}')
            if expr2.rhs == sp.false or expr2.rhs == False or expr2.rhs == sp.Symbol('false'):
                debug_print('>>>>>>>>>>>> here1.1')
                return self._implies(expr1.args[0], expr2.lhs)
            if expr2.lhs == sp.false or expr2.lhs == False or expr2.lhs == sp.Symbol('false'):
                debug_print('>>>>>>>>>>>> here1.2')
                return self._implies(expr1.args[0], expr2.rhs)

        if isinstance(expr2, Not) and isinstance(expr1, sp.Equality):
            debug_print('>>>>>>>>>>>> here2')
            debug_print(f'expr1: {expr1}')
            debug_print(f'expr1.rhs: {expr1.rhs}')
            debug_print(f'expr1.lhs: {expr1.lhs}')
            if expr1.rhs == sp.false or expr1.rhs == False or expr1.rhs == sp.Symbol('false'):
                debug_print('>>>>>>>>>>>> here2.1')
                return self._implies(expr2.args[0], expr1.lhs)
            if expr1.lhs == sp.false or expr1.lhs == False or expr1.lhs == sp.Symbol('false'):
                debug_print('>>>>>>>>>>>> here2.2')
                return self._implies(expr2.args[0], expr1.rhs)

        # Handle equivalence involving `true`
        if isinstance(expr1, sp.Symbol) and isinstance(expr2, sp.Equality):
            if expr2.rhs == sp.true or expr2.rhs == True or expr2.rhs == sp.Symbol('true'):
                return self._implies(expr1, expr2.lhs)
            if expr2.lhs == sp.true or expr2.lhs == True or expr2.lhs == sp.Symbol('true'):
                return self._implies(expr1, expr2.rhs)
        
        if isinstance(expr2, sp.Symbol) and isinstance(expr1, sp.Equality):
            if expr1.rhs == sp.true or expr1.rhs == True or expr1.rhs == sp.Symbol('true'):
                return self._implies(expr2, expr1.lhs)
            if expr1.lhs == sp.true or expr1.lhs == True or expr1.lhs == sp.Symbol('true'):
                return self._implies(expr2, expr1.rhs)
        

        # Handle logical equivalence for AND, OR, NOT operations
        if isinstance(expr1, Not) and isinstance(expr2, Or):
            if len(expr2.args) == 2:
                left, right = expr2.args
                if isinstance(left, sp.Equality) and left.rhs == sp.false:
                    return self._implies(expr1.args[0], left.lhs) and self._implies(right, sp.true)
                if isinstance(right, sp.Equality) and right.rhs == sp.false:
                    return self._implies(expr1.args[0], right.lhs) and self._implies(left, sp.true)

        if isinstance(expr2, Not) and isinstance(expr1, Or):
            if len(expr1.args) == 2:
                left, right = expr1.args
                if isinstance(left, sp.Equality) and left.rhs == sp.false:
                    return self._implies(expr2.args[0], left.lhs) and self._implies(right, sp.true)
                if isinstance(right, sp.Equality) and right.rhs == sp.false:
                    return self._implies(expr2.args[0], right.lhs) and self._implies(left, sp.true)

        if isinstance(expr1, And) and isinstance(expr2, And):
            if len(expr1.args) == len(expr2.args):
                return all(self._implies(arg1, arg2) for arg1, arg2 in zip(expr1.args, expr2.args))

        if isinstance(expr1, Or) and isinstance(expr2, Or):
            if len(expr1.args) == len(expr2.args):
                return all(self._implies(arg1, arg2) for arg1, arg2 in zip(expr1.args, expr2.args))



        # Handle AND expression for expr2
        if isinstance(expr2, And):
            # expr1 should imply all parts of expr2 if expr2 is an AND expression
            results = [self._implies(expr1, arg) for arg in expr2.args]
            debug_print(f"Implication results for And expr2 which was `{expr1} => {expr2}`: {results}")
            return all(results)

        # Handle AND expression for expr1
        if isinstance(expr1, And):
            # All parts of expr1 should imply expr2 if expr1 is an AND expression
            results = [self._implies(arg, expr2) for arg in expr1.args]
            debug_print(f"Implication results for And expr1 which was `{expr1} => {expr2}`: {results}")
            return any(results)

        # Handle OR expression for expr2
        if isinstance(expr2, Or):
            # expr1 should imply at least one part of expr2 if expr2 is an OR expression
            results = [self._implies(expr1, arg) for arg in expr2.args]
            debug_print(f"Implication results for Or expr2 which was `{expr1} => {expr2}`: {results}")
            return any(results)

        # Handle OR expression for expr1
        if isinstance(expr1, Or):
            # All parts of expr1 should imply expr2 if expr1 is an OR expression
            results = [self._implies(arg, expr2) for arg in expr1.args]
            debug_print(f"Implication results for Or expr1 which was `{expr1} => {expr2}`: {results}")
            return all(results)

        # Handle function calls
        if isinstance(expr1, sp.Function) and isinstance(expr2, sp.Function):
            # Ensure the function names and the number of arguments match
            if expr1.func == expr2.func and len(expr1.args) == len(expr2.args):
                return all(self._implies(arg1, arg2) for arg1, arg2 in zip(expr1.args, expr2.args))
            return False

        if isinstance(expr1, sp.Symbol) and isinstance(expr2, sp.Symbol):
            return expr1 == expr2

        # Specific relational operator checks for numerical comparisons
        relational_operators = (sp.Gt, sp.Ge, sp.Lt, sp.Le, sp.Eq, sp.Ne)
        if isinstance(expr1, relational_operators) and isinstance(expr2, relational_operators):
            debug_print(f'we are here!... expr1: {expr1}, expr2: {expr2}')
            # Check for Eq vs non-Eq comparisons; we don't handle this well, let's return False
            if (isinstance(expr1, sp.Eq) and not isinstance(expr2, sp.Eq)) or (not isinstance(expr1, sp.Eq) and isinstance(expr2, sp.Eq)):
                return False  # Handle Eq vs non-Eq cases explicitly

            if all(isinstance(arg, (sp.Float, sp.Integer, sp.Symbol)) for arg in [expr1.lhs, expr1.rhs, expr2.lhs, expr2.rhs]):
                debug_print(f'Inside!... expr1: {expr1}, expr2: {expr2}')
                # Check if the negation of the implication is not satisfiable
                try:
                    negation = sp.And(expr1, Not(expr2))
                    debug_print(f"Negation of the implication {expr1} -> {expr2}: {satisfiable(negation)}; type of {type(satisfiable(negation))}")
                    result = not satisfiable(negation, use_lra_theory=True)
                    debug_print(f"Implication {expr1} -> {expr2} using satisfiable: {result}")
                    return result
                except Exception as e:
                    debug_print(f"Error: {e}")
                    return False
        return False


# Example usage
# predicate1 = "msg.sender != msg.origin" 
# predicate2 = "a >= b" 

# predicate1 = "_addresses.length>0"
# predicate2 = "_addresses.length<=200"

# predicate1 = "ethBalances[_msgSender()]<=9e18"
# predicate2 = "ethBalances[_msgSender()]<=9e10"

# predicate1 = "a < 10"
# predicate2 = "a < 9"

# predicate1 = "a + b < 10"
# predicate2 = "a < 10 - b"

# predicate1 = "msg.sender==governance||msg.sender==controller||msg.sender==address(this)"
# predicate2 = "msg.sender==governance||msg.sender==controller"

# predicate1 = 'msg.sender==_mintRequest.to'
# predicate2 = 'requiredPrice==msg.value'


# predicate1 = 'a==12'
# predicate2 = '12==b'


# predicate1 = "a == b"
# #predicate2 = "b == a"

predicate1 = "!used[salt]"
predicate2 = "used[salt]==false"

predicate1 = "!condition"
predicate2 = "condition==false"



comparator = Comparator()
result = comparator.compare(predicate1, predicate2)
print(result)


In [None]:
if __name__ == "__main__":
    comparator = Comparator()

    predicates = [
        ("!condition", "condition == false"),
        ("!used[salt]", "used[salt] == false"),
        ("!(a && b)", "a == false || b == false"),
        ("!!a", "a == true"),
        ("!a || b", "a == false || b == true"),
        ("!isActive(user)", "isActive(user) == false"),
        ("!(x > 10)", "x <= 10"),
        ("!((a && b) || c)", "(a == false || b == false) && c == false"),
        ("!(a + b > 10)", "a + b <= 10"),
        ("!(a > 5 && b < 3)", "a <= 5 || b >= 3")
    ]

    for predicate1, predicate2 in predicates:
        result = comparator.compare(predicate1, predicate2)
        print(f"Comparing:\n  {predicate1}\n  {predicate2}\nResult: {result}\n")


In [None]:
predicate1 = "ethBalances[_msgSender()]<=9e18"
predicate2 = "ethBalances[_msgSender()]<=9e10"

tokens1 = Tokenizer().tokenize(predicate1)
tokens2 = Tokenizer().tokenize(predicate2)
print(f"Tokens1: {tokens1}")
parser1 = Parser(tokens1)
ast1 = parser1.parse()
print(f"Parsed AST1: {ast1}")

print(f"Tokens2: {tokens2}")
parser2 = Parser(tokens2)
ast2 = parser2.parse()
print(f"Parsed AST2: {ast2}")


In [None]:
import re
from typing import List, Tuple

class Tokenizer:
    def __init__(self):
        self.token_patterns = [
            (r'\b\d+\s*(seconds|minutes|hours|days|weeks)\b', 'TIME_UNIT'),  # Handle time units first
            (r'\bmsg\.sender\b', 'MSG_SENDER'),
            (r'\bmsg\.origin\b', 'MSG_ORIGIN'),
            (r'\brequire\b', 'REQUIRE'),
            (r'==', 'EQUAL'),
            (r'!=', 'NOT_EQUAL'),
            (r'>=', 'GREATER_EQUAL'),
            (r'<=', 'LESS_EQUAL'),
            (r'>', 'GREATER'),
            (r'<', 'LESS'),
            (r'&&', 'AND'),
            (r'\|\|', 'OR'),
            (r'\!', 'NOT'),
            (r'&', 'BITWISE_AND'),
            (r'\?', 'QUESTION'),
            (r':', 'COLON'),
            (r'\(', 'LPAREN'),
            (r'\)', 'RPAREN'),
            (r'\+', 'PLUS'),
            (r'\-', 'MINUS'),
            (r'\*', 'MULTIPLY'),
            (r'\/', 'DIVIDE'),
            (r'\%', 'MODULUS'),
            (r'\.', 'DOT'),
            (r',', 'COMMA'),
            (r'=', 'ASSIGN'),
            (r'\[', 'LBRACKET'),
            (r'\]', 'RBRACKET'),
            (r'\"[^\"]*\"', 'STRING_LITERAL'),
            (r'\b\d+\.\d+\b', 'FLOAT'),
            (r'\b\d+\b', 'INTEGER'),
            (r'\btrue\b', 'TRUE'),
            (r'\bfalse\b', 'FALSE'),
            (r'0x[0-9a-fA-F]{40}', 'ADDRESS_LITERAL'),
            (r'0x[0-9a-fA-F]+', 'BYTES_LITERAL'),
            (r'[a-zA-Z_]\w*', 'IDENTIFIER'),
            (r'\s+', None),  # Let's ignore whitespace(s)
        ]
        self.time_units = {
            'seconds': 1,
            'minutes': 60,
            'hours': 3600,
            'days': 86400,
            'weeks': 604800,
        }

    def normalize(self, predicate: str) -> str:
        predicate = re.sub(r'\s+', '', predicate)
        predicate = re.sub(r'([!=<>]=?)', r' \1 ', predicate)
        predicate = re.sub(r'(\&\&|\|\|)', r' \1 ', predicate)
        predicate = re.sub(r'\(', r' ( ', predicate)
        predicate = re.sub(r'\)', r' ) ', predicate)
        predicate = re.sub(r'\s+', ' ', predicate)
        return predicate.strip()

    def tokenize(self, predicate: str) -> List[Tuple[str, str]]:
        tokens = []
        position = 0
        length = len(predicate)

        while position < length:
            match = None
            for pattern, tag in self.token_patterns:
                regex = re.compile(pattern)
                match = regex.match(predicate, position)
                if match:
                    if tag:
                        value = match.group(0)
                        if tag == 'TIME_UNIT':
                            number, unit = re.match(r'(\d+)\s*(\w+)', value).groups()
                            value = str(int(number) * self.time_units[unit])
                            tag = 'INTEGER'
                        tokens.append((value, tag))
                    position = match.end()
                    break
            if not match:
                if predicate[position] == '(':
                    tokens.append(('(', 'LPAREN'))
                    position += 1
                elif predicate[position] == ')':
                    tokens.append((')', 'RPAREN'))
                    position += 1
                elif predicate[position] == ',':
                    tokens.append((',', 'COMMA'))
                    position += 1
                else:
                    raise ValueError(f"Unexpected character: {predicate[position]} at position {position}")

        return tokens

# Test the tokenizer
tokenizer = Tokenizer()
tokens = tokenizer.tokenize("NS < (1 days)")
tokens = tokenizer.tokenize("msg.sender != msg.origin")
print(tokens)


In [2]:
from datasets import load_dataset
ds_llama = load_dataset("GGmorello/FLAMES_results", "CodeLLaMa", token='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')

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [3]:
df_llama = ds_llama['train'].to_pandas()


In [15]:
df_llama['results CodeLLama'][0:2000]

# put the extracted column in a json file
df_llama['results CodeLLama'].to_json('results.json', orient='records', lines=True) 


In [29]:
import pandas as pd
import re

# Example data
data = {
    'results': [
        "blockLoanAmount[block.number] <= blockLoanLimit, \"BLOCK_LOAN_LIMIT_EXCEEDED\"",
        "s.isLosslessOn, \"LTO\"); require(s.isLosslessTurnOffProposed, \"LTP\"); require(s.losslessTurnOffTimestamp > block.timestamp, \"LTL\"); s.isLosslessOn = false; s.isLosslessTurnOffProposed = false; emit LosslessTurnedOff(); } function proposeLosslessTurnOn() external onlyRecoveryAdmin { } function executeLosslessTurnOn() external onlyRecoveryAdmin { } } // SPDX-License-Identifier: MIT // UNLICENSED // ALL RIGHTS RESERVED // Unicrypt by SDDTech reserves all rights on this code. You may NOT copy these contracts. pragma solidity 0.8.17; contract LosslessFacet is Ownable { Storage internal s; event Admin",
        "toSend > 0); require(receiver1.send(toSend)); toSend = msg.value.mul(denominator).div(numerator); require(toSend > 0",
        "payable(sara).send(_each)",
        "_app != address(0)); require(_app != apps[_namespace][_appId]); _setApp(_namespace, _appId, _app",
        "clearUserScore(msg.sender), \"Not claimable\"); require(userInfo[msg.sender].amount >= value_, \"Not enough balance\"); if (block.number > poolInfo.lastUpdateBlk",
        "getCurrentStatus() == Status.REDEEMING, \"redeeming is not started\" ); require( block.timestamp - redeemingStartedAt <= durationOfRedeeming, \"redeeming is over\" ); require( amount <= maxAmount, \"exceed maxAmount granted by the proof\" ); require( amount <= _addressToMinted[msg.sender] + _reservedSupply, \"exceed maxAmount granted by the proof\" ); require( amount <= maxSupply - totalMinted(), \"exceed maxSupply\" ); require( amount <= mintingCapPerAddress - _addressToMinted[msg.sender], \"exceed mintingCapPerAddress\" ); require( amount <= maxAmount - _addressToMinted[msg.sender], \"exceed maxAmount granted by the proof\" ); require( amount <= maxAmount - _addressToMinted"
    ]
}

df = pd.DataFrame(data)

# Define the regex pattern
pattern = r"(require\()?([^;,\"']+?)\s*(,|\);|;|$)"

def extract_predicate(text):
    # Simplify text to handle edge cases like new lines and comments
    text = re.sub(r"//.*", "", text)  # Remove inline comments
    matches = re.search(pattern, text.replace('\n', ' '))
    if matches:
        return matches.group(2).strip()
    return None

# Apply regex on DataFrame
df['predicate'] = df['results'].apply(extract_predicate)

print(df['predicate'][6])
#print(df[['results', 'predicate']])


getCurrentStatus() == Status.REDEEMING
