In [None]:
import time
import sys
from google.cloud import storage
import os
import pandas as pd
from termcolor import colored
from datetime import datetime

# Set environment variable for Google Application Credentials
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = './ethereum-data-nero.json'

optimize_print = "{:>2} | {:^32} | {:^32}"

In [None]:
def get_last_hash():
    """
    Read the last hash from the configuration file.

    Returns:
        str: The last hash value.
    """
    with open("config/last_upload.txt", "r") as file:
        last_hash = int(file.read().strip())
    return last_hash

def write_last_hash(hash_value):
    """
    Write the last hash to the configuration file.

    Args:
        hash_value (str): The hash value to be written.
    """
    with open("config/last_upload.txt", "w") as file:
        file.write(hash_value)

def get_last_optimization():
    """
    Read the timestamp of the last optimization from the configuration file.

    Returns:
        int: The timestamp of the last optimization.
    """
    with open("config/last_optimization.txt", "r") as file:
        last_opt = int(file.read().strip())
    return last_opt

def write_last_optimization(timestamp):
    """
    Write the timestamp of the last optimization to the configuration file.

    Args:
        timestamp (int): The timestamp of the last optimization.
    """
    with open("config/last_optimization.txt", "w") as file:
        file.write(timestamp)

def get_active_stakers():
    """
    Determine the active stakers.

    Returns:
        set: A set of active stakers' addresses.
    """
    #print("|--- determining stakers...")
    stakers = pd.read_csv("stakers.csv")
    withdrawers = pd.read_csv("withdrawers.csv")
    stakers = stakers.groupby("address")["blockNumber"].max().reset_index()
    withdrawers = withdrawers.groupby("address")["blockNumber"].max().reset_index()
    addresses_to_remove = []
    for _, withdrawer in withdrawers.iterrows():
        active_stakers = stakers[stakers["blockNumber"] > withdrawer["blockNumber"]]
        if withdrawer["address"] in active_stakers:
            continue
        addresses_to_remove.append(withdrawer["address"])

    stakers = stakers[~stakers["address"].isin(addresses_to_remove)]
    #print(colored("|--- finished determining stakers.", "green"))
    return set(stakers["address"])
    

def optimize_announcements(df):
    """
    Optimize the announcements data frame.

    Args:
        df (pd.DataFrame): The announcements data frame to be optimized.

    Returns:
        pd.DataFrame: The optimized announcements data frame.
    """
    knownStakers = get_active_stakers()
    df["staker"] = df["sender"].apply(lambda x: 1 if x in knownStakers else 0)
    df_c = df.groupby("sender")["metadata"].count().reset_index().rename(columns={"metadata": "senderCount"})
    df = pd.merge(df, df_c, on="sender", how="left")
    df = df.sort_values(["staker", "senderCount", "blockNumber"], ascending=[False, True, False])
    df.drop(["senderCount", "staker"], axis=1, inplace=True)
    df = df.reset_index(drop=True)
    df.to_csv("announcements.csv", index=None)
    return df

def blob_upload(announcements):
    """
    Upload the announcements to Google Cloud Storage.

    Args:
        announcements (pd.DataFrame): The announcements data frame to be uploaded.
    """
    #print("|- uploading announcements...")
    client = storage.Client()
    bucket = client.get_bucket("eip5564_data")
    blob = bucket.get_blob("announcements.csv")
    blob.upload_from_string(announcements.to_csv())
    #print(colored("|- finished uploading announcements.", "green"))

def optimize_and_upload():
    """
    Optimize the announcements data frame and upload it to Google Cloud Storage.

    """
    
    def now():
        return datetime.now().strftime('%B-%d %H:%M:%S ')

    #print(optimize_print.format("x", now(), "start optimizing"))
    #print(colored("----optimizer-start----", "green"))
    if int(time.time()) < get_last_optimization() + 12:
        return
    
    #print("start processing announcements:")
    announcements = pd.read_csv("announcements.csv")
    if int(pd.util.hash_pandas_object(announcements).sum()) == get_last_hash():
        print(colored("nothing to do.                    ", "grey"))        
        return
    
    announcements = optimize_announcements(announcements)
    blob_upload(announcements)
    write_last_hash(str(pd.util.hash_pandas_object(announcements).sum()))
    write_last_optimization(str(int(time.time())))
    #print(optimize_print.format("x", now(), "finished optimizing"))
    #print(colored("finished processing announcements.", "green"))
    #print(colored("----optimizer--end-----", "green"))

In [None]:
if __name__ == "__main__":
    optimize_and_upload()