In [None]:
import binascii
import cbor2
import base58
import datetime
import json
import glob
import requests
from web3 import Web3
from dotenv import load_dotenv
import os
import pandas as pd
import pprint
import ipywidgets as widgets
from IPython.display import display
from IPython import get_ipython
from git import Repo
import tempfile
import shutil
import subprocess
from IPython.display import display, Markdown
import csv
import warnings

# load .env file
load_dotenv()

In [None]:
diamondLoupeABI = [
    {
        "constant": True,
        "inputs": [],
        "name": "facetAddresses",
        "outputs": [
            {
                "internalType": "address[]",
                "name": "facetAddresses_",
                "type": "address[]",
            }
        ],
        "payable": False,
        "stateMutability": "view",
        "type": "function",
    }
]

In [None]:
class CompareBytecode:
    def __init__(
        self,
        w3: Web3,
        contract_abi: dict,
        build_previous_commits: list = None,
        dir_path_to_broadcasts: str = "broadcast",
        dir_path_to_artifacts: str = "forge-artifacts",
        additional_contracts: dict = None,
        forge_path: str = None,
        repo_path: str = None,
    ):
        self.is_run_locally = False
        self.current_commit_sha = None
        self.current_repo_path = None
        self.set_commit_sha_and_execution_context(repo_path)

        self.build_previous_commits = build_previous_commits

        self.w3 = w3
        self.contract_abi = contract_abi
        self.dir_path_to_broadcasts = dir_path_to_broadcasts
        self.dir_path_to_artifacts = dir_path_to_artifacts

        self.diamond_address = self.get_address_proxy_diamond("contract_addresses.csv")

        self.forge_path = (
            forge_path if forge_path is not None else shutil.which("forge")
        )
        if self.forge_path is None:
            raise Exception(
                "Could not find 'forge' executable. Please ensure it is installed and available in your PATH."
            )

        self.current_facet_addresses: list = None  # keeps track of facet addresses currently active on the proxy contract
        self.contracts = (
            self.match_contract_addresses_to_names()
        )  # if there's no name to match, then this currently removes the contract from the list

        # Add any additional self.contracts - override existing ones if needed
        if additional_contracts:
            for contract_name, address in additional_contracts.items():
                if contract_name not in self.contracts:
                    self.contracts[contract_name] = address
                else:
                    print(
                        f"Contract '{contract_name}' already exists in the dictionary."
                    )

        self.metadata: dict = {}

    def set_commit_sha_and_execution_context(self, repo_path=None):
        """
        Set the current commit SHA and the execution context (local or CI).

        Args:
            repo_path (str): local path to the Git repository. If None, it's assumed the script is running in CI.
        """

        # Check if repo_path is given
        if repo_path:
            # Check if the repo_path exists and is a directory
            if os.path.isdir(repo_path):
                self.current_repo_path = repo_path
                # We're probably running locally, so let's use a git command to get the commit SHA
                try:
                    self.current_commit_sha = (
                        subprocess.check_output(
                            ["git", "-C", repo_path, "rev-parse", "HEAD"]
                        )
                        .strip()
                        .decode("utf-8")
                    )
                    self.is_run_locally = True
                except Exception as e:
                    print(f"Error while trying to get commit SHA: {e}")
            else:
                print(
                    f"Error: The provided path '{repo_path}' is not a valid directory."
                )
        else:
            # We're probably running on GitHub Actions
            self.current_repo_path = os.getenv(
                "GITHUB_WORKSPACE"
            )  # This is the default directory where GitHub Actions checks out your repository
            self.current_commit_sha = os.getenv("GITHUB_SHA")
            if not self.current_repo_path or not self.current_commit_sha:
                print(
                    "Warning: Script is not running in local environment or valid GitHub Actions environment."
                )

    def get_commit_timestamp(self, repo_path, commit_sha=None):
        if commit_sha is None:
            commit_sha = self.current_commit_sha

        try:
            commit_timestamp = (
                subprocess.check_output(
                    ["git", "-C", repo_path, "show", "-s", "--format=%ci", commit_sha]
                )
                .strip()
                .decode("utf-8")
            )

            # Parse the git timestamp format
            dt = datetime.datetime.strptime(commit_timestamp, "%Y-%m-%d %H:%M:%S %z")

            # Format as standard ISO 8601
            iso8601_time = dt.isoformat()
            return iso8601_time, commit_sha
        except subprocess.CalledProcessError:
            print(
                f"Error: Failed to get the commit timestamp. Check if the path '{repo_path}' contains a valid git repository and the commit SHA '{commit_sha}' is valid."
            )
            return None, None

    def get_address_proxy_diamond(self, filename=None, chain_id=1):
        """
        Reads a CSV file and returns the address of the proxy_diamond contract for the given chain id.

        Returns:
            str: The address of the proxy_diamond contract for the given chain id.
        """
        if not os.path.isfile(filename):
            print(f"File '{filename}' does not exist.")
            return None

        # Read the CSV
        df = pd.read_csv(filename)
        # TODO change to all and handle multiple addresses
        address = df.loc[
            (df["chain_id"] == chain_id) & (df["contract_type"] == "proxy_diamond"),
            "proxy_address",
        ].item()

        if address is None:
            print(f"No proxy_diamond found for chain id '{chain_id}' in {filename}.")

        return address

    def perform_build(self, repo, commit):
        """
        Performs the git clone, checkout, and forge build operations for the specified repo and commit.

        Args:
            repo (str): The repository to use.
            commit (str): The commit to checkout.
        """
        # Save the initial working directory
        start_dir = os.getcwd()

        # Create a temporary directory
        with tempfile.TemporaryDirectory() as tempdir:
            # Clone the repository into the temporary directory
            clone_command = ["git", "clone", repo, tempdir]
            subprocess.run(clone_command, check=True)

            # Checkout the specific commit
            os.chdir(tempdir)
            checkout_command = ["git", "checkout", commit]
            subprocess.run(checkout_command, check=True)

            # Deinitialize any existing submodules
            # deinit_command = ["git", "submodule", "deinit", "--force", "."]
            # subprocess.run(deinit_command, check=True)

            # Update and initialize submodules
            update_command = ["git", "submodule", "update", "--init", "--recursive"]
            subprocess.run(update_command, check=True)

            # Create a new directory for these artifacts with the commit hash in the name
            temp_artifacts_dir = os.path.join(tempdir, f"{commit}_artifacts")
            os.makedirs(temp_artifacts_dir, exist_ok=True)

            # Run `forge build`
            build_command = [
                self.forge_path,
                "build",
                "--out",
                temp_artifacts_dir,
                "--skip",
                "script",
                "test",
                "D0",
                "DeploymentHelpers",
            ]
            subprocess.run(build_command, check=True)

            # Switch back to the initial directory
            os.chdir(start_dir)

            # Create a directory for these artifacts with the commit hash in the name in a persistent location
            new_artifacts_dir = os.path.join(
                start_dir, "temp_artifacts", f"{commit}_artifacts"
            )
            os.makedirs(new_artifacts_dir, exist_ok=True)

            # Copy the files from the temporary directory to the new directory
            shutil.copytree(temp_artifacts_dir, new_artifacts_dir, dirs_exist_ok=True)

        return new_artifacts_dir

    def get_current_facet_addresses(self, diamond_address=None):
        if diamond_address is None:
            diamond_address = self.diamond_address

        diamond_loupe = self.w3.eth.contract(
            address=diamond_address, abi=self.contract_abi
        )

        # Call the facetAddresses() method
        try:
            self.current_facet_addresses = (
                diamond_loupe.functions.facetAddresses().call()
            )

            return self.current_facet_addresses

        except:
            print("Failed to call facetAddresses() method")

    def create_contract_address_list(self):
        # Get the facet addresses
        facet_addresses = self.get_current_facet_addresses()

        contract_address_list = [self.diamond_address] + facet_addresses

        return contract_address_list

    def match_contract_addresses_to_names(self, contract_addresses: str = None):
        """
        Create a dictionary of contract names and addresses.

        Args:
            contract_addresses (str): The addresses of the contract.

        Returns:
            dict: A dictionary with contract names as keys and contract addresses as values.
        """
        contract_dict = {}
        reverse_dict = {}

        # Get the contract addresses
        if contract_addresses is None:
            contract_addresses = self.create_contract_address_list()

        # Traverse the directory and find all JSON files
        root = self.dir_path_to_broadcasts
        json_files = []
        for dirpath, dirnames, filenames in os.walk(root):
            # check if directory ends with key_to_read_diamond_address
            # TODO chain_id
            if dirpath.endswith(str(self.w3.eth.chain_id)):
                # check if directory is not 'dry-run'
                if "dry-run" not in dirpath:
                    for filename in filenames:
                        if filename.endswith(".json"):
                            json_path = os.path.join(dirpath, filename)
                            json_files.append(json_path)

        # Sort the files by modification time in reverse order
        sorted_files = sorted(json_files, key=os.path.getmtime, reverse=True)
        for json_path in sorted_files:
            with open(json_path, "r") as f:
                data = json.load(f)
                transactions = data.get("transactions", [])
                for transaction in transactions:
                    # Check if the contractAddress matches any of the facet addresses
                    if transaction.get("contractAddress") in contract_addresses:
                        contract_name = transaction.get("contractName")
                        contract_address = transaction.get("contractAddress")
                        # If address not in contract_dict or is same as existing key
                        if (
                            contract_address not in contract_dict.values()
                            or contract_dict.get(contract_name) == contract_address
                        ):
                            contract_dict[contract_name] = contract_address
                            # Update the reverse dictionary
                            if (
                                contract_address in reverse_dict
                                and contract_name != reverse_dict[contract_address]
                            ):
                                reverse_dict[contract_address].add(contract_name)
                            else:
                                reverse_dict[contract_address] = {contract_name}
                        else:
                            duplicate_keys = [
                                k
                                for k, v in contract_dict.items()
                                if v == contract_address
                            ]
                            print(
                                f"Duplicate address {contract_address} found for contracts: {duplicate_keys} and {contract_name} in {filename}"
                            )
                            duplicate_keys.append(contract_name)
                            print(
                                "Which contract would you like to provide a new address for?"
                            )
                            for i, key in enumerate(duplicate_keys):
                                print(f"{i+1}: {key}")
                            selected_index = (
                                int(
                                    input(
                                        "Enter the number corresponding to your selection: "
                                    )
                                )
                                - 1
                            )
                            selected_key = duplicate_keys[selected_index]
                            new_address = input(
                                "Enter new address for the selected contract: "
                            )
                            contract_dict[selected_key] = new_address

        return contract_dict

    def verify_contracts_are_active(self, contracts: dict = None):
        """
        Verifies that the contract is active by calling the `getFacetAddresses()` method.
        """

        # Get the facet addresses
        facet_addresses = self.get_current_facet_addresses()

        if contracts is None:
            contracts = self.contracts

        active_contracts: dict = {}

        # Check if the contract address is in the list of facet addresses
        for contract_name, contract_address in contracts.items():
            if contract_address in facet_addresses:
                active_contracts[contract_name] = True
            else:
                active_contracts[contract_name] = False

            # Assume the proxy address in self.contracts is active
            if contract_address == self.diamond_address:
                active_contracts[contract_name] = True

        return active_contracts

    def get_bytecodes_from_artifacts(self, contracts, commit_hash=None):
        def determine_path(contract_name, commit_hash):
            # Code to determine the correct path for the given contract name and commit hash
            if commit_hash:
                path = os.path.join(
                    "temp_artifacts",
                    f"{commit_hash}_artifacts",
                    f"{contract_name}.sol",
                )
            else:
                path = os.path.join(
                    self.dir_path_to_artifacts,
                    f"{contract_name}.sol",
                )
            return path

        def find_jsons(path):
            # Use glob to find all json files in the directory
            json_files = glob.glob(os.path.join(path, "*.json"))

            # Check if more than one json file was found
            if len(json_files) > 1:
                print(f"Warning: More than one JSON file found in {path}.")

            # If there's at least one json file, read it and print the name
            if json_files:
                with open(json_files[0]) as f:
                    data = json.load(f)
                print(f"Read data from {json_files[0]}")

                bytecode = data["deployedBytecode"]["object"]
                if bytecode == "":
                    print(f"Bytecode for contract '{contract_name}' is empty.")

            # Return the deployed bytecode and the file name
            return bytecode

            # Get the timestamp of the commit

        timestamp, _ = self.get_commit_timestamp(self.current_repo_path, commit_hash)
        bytecodes = []

        for contract_name in contracts.keys():
            path = determine_path(contract_name, commit_hash)
            bytecode = find_jsons(path)

            # Add the information to the bytecodes list
            bytecodes.append(
                {
                    "contract_name": contract_name,
                    "commit_hash": commit_hash,
                    "timestamp": timestamp,
                    "bytecode": bytecode,
                }
            )

        # Convert the list of dictionaries to a DataFrame
        df_bytecodes = pd.DataFrame(bytecodes)

        return df_bytecodes

    def get_onchain_bytecodes(self, contracts):
        """
        Retrieves on-chain runtime bytecodes for given contracts

        Args:
            contracts (dict): dictionary of contract names and addresses.

        Returns:
            DataFrame: DataFrame with contract names and their on-chain runtime bytecodes
        """
        onchain_bytecodes = {}

        for contract_name, contract_address in contracts.items():
            try:
                # TODO use multicall
                onchain_bytecode = self.w3.eth.get_code(contract_address).hex()
                onchain_bytecodes[contract_name] = onchain_bytecode
            except Exception as e:
                print(
                    f"Failed to get on-chain bytecode for contract '{contract_name}' at address '{contract_address}'. Error: {str(e)}"
                )
                # raise e

        # Convert dictionary to DataFrame
        df_onchain_bytecodes = pd.DataFrame(
            list(onchain_bytecodes.items()), columns=["contract_name", "bytecode"]
        )

        return df_onchain_bytecodes

    def get_metadata_hash_bytecode(self, bytecode):
        # todo fix prefix
        prefix = "a264"
        start_index = bytecode.find(prefix)

        while start_index != -1:
            try:
                metadata_hex = bytecode[start_index:]
                metadata_hash_bytecode = binascii.unhexlify(metadata_hex)

                self.metadata[bytecode] = metadata_hash_bytecode

                return metadata_hash_bytecode, start_index
            except (ValueError, EOFError):
                start_index = bytecode.find(prefix, start_index + 1)

        return None, -1

    def get_ipfs_hash(self, metadata_hash_bytecode):
        cbor_decoded_metadata_hash_bytecode: dict = cbor2.loads(metadata_hash_bytecode)
        hex_ipfs_hash = cbor_decoded_metadata_hash_bytecode["ipfs"]

        ipfs_hash = base58.b58encode(hex_ipfs_hash).decode("utf-8")

        return ipfs_hash

    def get_metadata_from_ipfs(self, ipfs_hash):
        # Use an IPFS gateway to fetch the data
        response = requests.get(f"https://ipfs.io/ipfs/{ipfs_hash}")

        # The actual metadata is usually a JSON file
        metadata = response.json()

        pprint.pprint(metadata)

    def remove_metadata_hash_bytecode(self, bytecode):
        md, idx = self.get_metadata_hash_bytecode(bytecode)
        if idx != -1:
            return bytecode[:idx]
        return bytecode

    def are_bytecodes_matching(self, bytecode1, bytecode2):
        return bytecode1 == bytecode2

    def compare_bytecodes(self, df_bytecodes1, df_bytecodes2):
        """
        Compares bytecodes for each contract in two given dataframes

        Args:
            df_bytecodes1 (DataFrame): dataframe with contract names, commit hashes, timestamps, and their bytecodes
            df_bytecodes2 (DataFrame): dataframe with contract names, commit hashes, timestamps, and their bytecodes

        Returns:
            DataFrame: DataFrame with contract names, commit hashes, timestamps, and boolean values indicating if bytecodes are different
        """
        compare_list = []

        for index, row in df_bytecodes1.iterrows():
            contract_name = row["contract_name"]
            bytecode1 = row["bytecode"]

            # Get the corresponding row from df_bytecodes2
            df_row_bytecodes2 = df_bytecodes2[
                df_bytecodes2["contract_name"] == contract_name
            ]

            if df_row_bytecodes2.empty:
                bytecode2 = ""
            else:
                bytecode2 = df_row_bytecodes2["bytecode"].values[0]

            compare_list.append(
                {
                    "contract_name": contract_name,
                    "commit_hash": row["commit_hash"],
                    "timestamp": row["timestamp"],
                    "are_bytecodes_different_w": self.are_bytecodes_matching(
                        bytecode1, bytecode2
                    ),
                    "are_bytecodes_different_wo": self.are_bytecodes_matching(
                        self.remove_metadata_hash_bytecode(bytecode1),
                        self.remove_metadata_hash_bytecode(bytecode2),
                    ),
                }
            )

        return pd.DataFrame(compare_list)

    def compare_contract_bytecodes(self):
        onchain_bytecodes = self.get_onchain_bytecodes(self.contracts)

        # If bytecode_verification_report.csv exists, read it into a DataFrame
        final_df = (
            pd.read_csv("bytecode_verification_report.csv")
            if os.path.isfile("bytecode_verification_report.csv")
            else None
        )

        for commit in self.build_previous_commits:
            df_comparisons_config = self.compare_bytecodes(
                self.get_bytecodes_from_artifacts(self.contracts, commit),
                onchain_bytecodes,
            )

            # If the results for this commit already exist in the final_df, output a warning
            if final_df is not None and commit in final_df.columns:
                warnings.warn(f"Comparison results for commit {commit} already exist.")
                continue

            # Merge the new comparison results into final_df
            if final_df is None:
                final_df = df_comparisons_config
            else:
                final_df = final_df.merge(
                    df_comparisons_config, on="contract_name", how="outer"
                )

        df_comparisons_current = self.compare_bytecodes(
            self.get_bytecodes_from_artifacts(self.contracts),
            onchain_bytecodes,
        )

        # Merge the current comparison results into final_df
        if final_df is None:
            final_df = df_comparisons_current
        else:
            final_df = final_df.merge(
                df_comparisons_current, on="contract_name", how="outer"
            )

        if final_df is not None:
            final_df.to_csv("bytecode_verification_report.csv", index=False)
        else:
            print("No data to write to 'bytecode_verification_report.csv'.")

    def print_contracts_info(self, merged_dict):
        base_url = "https://etherscan.io/address/"
        active_facet_addresses = self.verify_contracts_are_active()

        chain_id = self.w3.eth.chain_id
        block_number = self.w3.eth.block_number
        timestamp = self.w3.eth.get_block(block_number).timestamp
        # Convert timestamp to datetime
        dt = datetime.datetime.utcfromtimestamp(timestamp)

        # Convert datetime to ISO 8601 format
        iso8601_time = dt.isoformat()

        with open("bytecode_verification_report.md", "w") as file:
            file.write("# Bytecode Verification Report\n\n")

            if self.is_run_locally:
                file.write("This report is generated locally.\n\n")
            else:
                file.write("This report is generated from the CI pipeline.\n\n")

            file.write(
                f"## Network {chain_id}, block number {block_number} ({iso8601_time})\n\n"
            )

            file.write(f"Proxy address: `{self.diamond_address}`\n\n")

            file.write(
                f"Number of active facets: `{len(self.current_facet_addresses)}`\n\n"
            )

            file.write(
                "| Contract Name | Address | Active | Current Commit Hash | Comparison with Metadata (Current Commit) | Comparison without Metadata (Current Commit) | Commit Hash (Config) | Comparison with Metadata (Config Commit) | Comparison without Metadata (Config Commit) |\n"
            )
            file.write(
                "|---------------|---------|--------|---------------------|---------------------------------------|-------------------------------------------|---------------------|-----------------------------------------|---------------------------------------------|\n"
            )

            for contract_name, info in merged_dict.items():
                comparison_with_metadata_current = (
                    "✅" if info["comparison_with_metadata_current"] else "❌"
                )
                comparison_without_metadata_current = (
                    "✅" if info["comparison_without_metadata_current"] else "❌"
                )
                comparison_with_metadata_config = (
                    "✅" if info["comparison_with_metadata_config"] else "❌"
                )
                comparison_without_metadata_config = (
                    "✅" if info["comparison_without_metadata_config"] else "❌"
                )
                clickable_address = base_url + info["address"] + "#code"
                active = "✅" if active_facet_addresses[contract_name] else "❌"
                commit_hash_config = info["commit_hash_config"]

                file.write(
                    f"| {contract_name} | [{info['address']}]({clickable_address}) | {active} | {self.current_commit_sha} | {comparison_with_metadata_current} | {comparison_without_metadata_current} | {commit_hash_config} | {comparison_with_metadata_config} | {comparison_without_metadata_config} |\n"
                )

In [None]:
CURRENT_REPOSITORY = "nayms/contracts-v3"

FORGE_PATH = "/Users/kevinpark/.foundry/bin/forge"
REPO_PATH = os.path.join("/", "Users", "kevinpark", "dev", "nayms", "v3-extoken")

build_previous_commits = ["9de0e394"]

web3_instance = Web3(
    Web3.HTTPProvider(os.getenv("ETH_MAINNET_RPC_URL"))
)  # update with your provider details
contract_abi = diamondLoupeABI
dir_path_to_broadcasts = os.path.join(
    REPO_PATH, "broadcast"
)  # update with your broadcasts directory path
dir_path_to_artifacts = os.path.join(
    REPO_PATH, "forge-artifacts"
)  # update with your artifacts directory path
additional_contracts = {
    "NaymsOwnershipFacet": "0x073C1a072845D1d87f42309af9911bd3c07fC599",
    "DiamondLoupeFacet": "0x0318ff107aFA55E3dc658cEA06748d0c35fbEC73",
}
compare_bytecode = CompareBytecode(
    build_previous_commits=build_previous_commits,
    dir_path_to_broadcasts=dir_path_to_broadcasts,
    dir_path_to_artifacts=dir_path_to_artifacts,
    w3=web3_instance,
    contract_abi=contract_abi,
    additional_contracts=additional_contracts,
    forge_path=FORGE_PATH,
    repo_path=REPO_PATH,
)

compare_bytecode.compare_contract_bytecodes()

Contract address list
Proxy address - optional if contract address list is provided


Check if there has been new upgrades onchain since the last time this report has been run.

Autotag:
Proxy address has changed
Upgrade - Implementation contracts have been added, replaced, removed
Metadata change 


If this is the first time that this report will be generated, 
the user can seed commits to have the script check specific contracts from these commits.
The script should always check the previous commits that match onchain bytecode to validate that this is still relevant for this report.

If mismatching, then the script will check the report history to see the last commit in the report history matches, and rerun verification for these.