In [None]:
import time
import sys
from google.cloud import storage
import os
import pandas as pd
from termcolor import colored
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = './ethereum-data-nero.json'

In [None]:
def get_last_hash():
    with open("config/last_upload.txt", "r") as file:
        lastHash = str(file.read().strip())
    return lastHash

def write_last_hash(h):
    with open("config/last_upload.txt", "w") as file:
        file.write(h)
    return 

def get_last_optimization():
    with open("config/last_optimization.txt", "r") as file:
        lastOpt = int(file.read().strip())
    return lastOpt

def write_last_optimization(ts):
    with open("config/last_optimization.txt", "w") as file:
        file.write(ts)
    return 

def get_active_stakers():
    print("|--- determining stakers...")
    stakers = pd.read_csv("stakers.csv")
    withdrawers = pd.read_csv("withdrawers.csv")
    stakers = stakers.groupby("address")["blockNumber"].max().reset_index()
    withdrawers = withdrawers.groupby("address")["blockNumber"].max().reset_index()
    for ix, withdrawer in withdrawers.iterrows():
        _stakers = stakers[stakers["blockNumber"] > withdrawer["blockNumber"]]
        if withdrawer["address"] in _stakers:
            continue
        stakers = stakers[stakers["address"] != withdrawer["address"]]
    print(colored("|--- finished determining stakers.", "green"))
    return set(stakers["address"])
    

def optimize_announcements(df):
    print("|- optimizing announcements...")
    knownStakers = get_active_stakers()
    df["staker"] = df["sender"].apply(lambda x: 1 if x in knownStakers else 0)
    df_c = df.groupby("sender")["metadata"].count().reset_index().rename(columns={"metadata": "senderCount"})
    df = pd.merge(df, df_c, on="sender", how="left")
    df = df.sort_values(["staker", "senderCount", "blockNumber"], ascending=[False, True, False])
    df.drop(["senderCount", "staker"], axis=1, inplace=True)
    df.to_csv("announcements.csv", index=None)
    print(colored("|- finished optimizing announcements.", "green"))
    return df

def blob_upload(announcements):
    print("|- uploading announcements...")
    client = storage.Client()
    bucket = client.get_bucket("eip5564_data")
    blob = bucket.get_blob("announcements.csv")
    blob.upload_from_string(announcements.to_csv())
    print(colored("|- finished uploading announcements.", "green"))

def optimize_and_upload():
    print(colored("----optimizer-start----", "green"))
    if int(time.time()) < get_last_optimization() + 12:
        return
    print("start processing announcements:")
    announcements = pd.read_csv("announcements.csv")
    if str(pd.util.hash_pandas_object(announcements).sum()) == get_last_hash():
        print(colored("nothing to do.                    ", "grey"))        
        return
    
    announcements = optimize_announcements(announcements)
    blob_upload(announcements)
    write_last_hash(str(pd.util.hash_pandas_object(announcements).sum()))
    write_last_optimization(str(int(time.time())))
    print(colored("finished processing announcements.", "green"))
    print(colored("----optimizer--end-----", "green"))

In [None]:
if __name__ == "__main__":
    optimize_and_upload()