# Updated Data Transformations

In [2]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from numpy import intersect1d
from datetime import datetime

import awswrangler as wr
import boto3
import io

import sagemaker
from sagemaker import get_execution_role
from sagemaker.session import Session

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
# add ethereum to python path (only do this once)
import sys
sys.path.insert(0, "/home/ec2-user/SageMaker/ethereum")

In [12]:
from ethereum import (
    timestamp_to_datetime,
    EthereumData
)
from utils import (
    latest,
    lead_lag,
    add_latest_avail_block,
    lagged_block_data,
    get_pit_blocks
)

In [13]:
role = get_execution_role()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='arn:aws:iam::971504885040:role/SageMaker')['Role']['Arn']

In [14]:
blocks = wr.s3.read_csv(path="s3://sagemaker-w210-eth/raw_data/infuria/blocks")   

In [15]:
cols = [
    'number', 'difficulty', 'total_difficulty', 'size', 'gas_limit', 
    'gas_used', 'timestamp', 'transaction_count', 'base_fee_per_gas'
]

In [16]:
blocks = blocks[cols]

In [17]:
blocks.head()

Unnamed: 0,number,difficulty,total_difficulty,size,gas_limit,gas_used,timestamp,transaction_count,base_fee_per_gas
0,12962018,7604452341876898,28471525393295665706633,55433,15000000,14994306,1628126126,160,
1,12962019,7604589780830370,28471532997885446537003,79403,14985353,14969945,1628126136,217,
2,12962020,7601014041179921,28471540598899487716924,69949,14970720,14964505,1628126157,155,
3,12962021,7601151480133393,28471548200050967850317,92704,14956102,14944125,1628126167,195,
4,12962022,7597577419340707,28471555797628387191024,65855,14963388,14943452,1628126200,178,


### Point-In-Time Blocks

In [18]:
get_pit_blocks(blocks.loc[:1000,], lag=60).head()

Unnamed: 0_level_0,datetime,lag_cutoff,latest_avail_block
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12961718,2021-08-05 00:00:04,2021-08-04 23:59:04,
12961719,2021-08-05 00:00:07,2021-08-04 23:59:07,
12961720,2021-08-05 00:00:47,2021-08-04 23:59:47,
12961721,2021-08-05 00:01:03,2021-08-05 00:00:03,
12961722,2021-08-05 00:01:25,2021-08-05 00:00:25,12961719.0


### Transaction ID

In [None]:
transactions = wr.s3.read_csv(path="s3://sagemaker-w210-eth/raw_data/infuria/transactions") 

In [None]:
transactions.shape