In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np

In [2]:
data = pd.read_csv("f:\Paper\ResearchData\TxnPrediction Data\TxFeeData\TxFeeData_100K.csv")
data = data.drop(data.columns[0], axis=1)
data = data.drop(columns=['gasUsed', 'gasPrice', 'maxFeePerGas', 'transactionFee'])
data.head()

Unnamed: 0,maxPriorityFeePerGas,baseFees,value,priorityFee,voteCount,activeValidators
0,0.0,5877454.0,0.030827,0.0,23471,753577
1,2.0,3714745.0,0.0,338522.0,23471,753577
2,2.0,4293023.0,0.0,391220.0,23471,753577
3,3.179277,1225183.0,0.0,177483.14595,23471,753577
4,3.0,1226017.0,0.0,167589.0,23471,753577


In [3]:
x = data.drop(columns=['priorityFee'])
y = data['priorityFee']
# x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3, random_state=42)
total_samples = len(x)
split_index = int(0.7 * total_samples)

x_train = x[:split_index]
x_test = x[split_index:]

y_train = y[:split_index]
y_test = y[split_index:]

In [4]:
df = pd.DataFrame({"Actual priorityFee": y_test})

In [5]:
def train_model(model):
    print('Model => {} '.format(type(model).__name__))
    
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    
    df['Predicted priorityFees({})'.format(type(model).__name__)] = y_pred
    # print(df.head())
    
  
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))  
    r2 = r2_score(y_test, y_pred)


    result = {
        'Model': type(model).__name__,
        'MeanAbsoluteError': mae,
        'RootMeanSquaredError': rmse,
        'R2Score': r2
    }

    print('Mean Absolute Error (MAE): "]',mae)
    print('Root Mean Squared Error (RMSE):', rmse)
    print('R2 Score:', r2)
    print()
    
    return result


In [6]:
results = []
models = [XGBRegressor(), ExtraTreesRegressor(), KNeighborsRegressor(),LinearRegression(), RandomForestRegressor(), GradientBoostingRegressor()]
for model in models:
    results.append(train_model(model))

Model => XGBRegressor 
Mean Absolute Error (MAE): "] 6866.977604871176
Root Mean Squared Error (RMSE): 19180.89215014328
R2 Score: 0.9361721972536906

Model => ExtraTreesRegressor 
Mean Absolute Error (MAE): "] 5654.916062730012
Root Mean Squared Error (RMSE): 17158.884534717363
R2 Score: 0.9489200585217653

Model => KNeighborsRegressor 
Mean Absolute Error (MAE): "] 48590.12709260071
Root Mean Squared Error (RMSE): 78209.84162159289
R2 Score: -0.0611947683697065

Model => LinearRegression 
Mean Absolute Error (MAE): "] 29119.900992951538
Root Mean Squared Error (RMSE): 48814.51220972914
R2 Score: 0.5866002637754879

Model => RandomForestRegressor 
Mean Absolute Error (MAE): "] 5896.132922264861
Root Mean Squared Error (RMSE): 17443.875581116274
R2 Score: 0.9472091994169084

Model => GradientBoostingRegressor 
Mean Absolute Error (MAE): "] 6507.8208524653965
Root Mean Squared Error (RMSE): 16897.279436600653
R2 Score: 0.9504657194143629



In [7]:
print(results)

[{'Model': 'XGBRegressor', 'MeanAbsoluteError': 6866.977604871176, 'RootMeanSquaredError': 19180.89215014328, 'R2Score': 0.9361721972536906}, {'Model': 'ExtraTreesRegressor', 'MeanAbsoluteError': 5654.916062730012, 'RootMeanSquaredError': 17158.884534717363, 'R2Score': 0.9489200585217653}, {'Model': 'KNeighborsRegressor', 'MeanAbsoluteError': 48590.12709260071, 'RootMeanSquaredError': 78209.84162159289, 'R2Score': -0.0611947683697065}, {'Model': 'LinearRegression', 'MeanAbsoluteError': 29119.900992951538, 'RootMeanSquaredError': 48814.51220972914, 'R2Score': 0.5866002637754879}, {'Model': 'RandomForestRegressor', 'MeanAbsoluteError': 5896.132922264861, 'RootMeanSquaredError': 17443.875581116274, 'R2Score': 0.9472091994169084}, {'Model': 'GradientBoostingRegressor', 'MeanAbsoluteError': 6507.8208524653965, 'RootMeanSquaredError': 16897.279436600653, 'R2Score': 0.9504657194143629}]


In [8]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Model,MeanAbsoluteError,RootMeanSquaredError,R2Score
0,XGBRegressor,6866.977605,19180.89215,0.936172
1,ExtraTreesRegressor,5654.916063,17158.884535,0.94892
2,KNeighborsRegressor,48590.127093,78209.841622,-0.061195
3,LinearRegression,29119.900993,48814.51221,0.5866
4,RandomForestRegressor,5896.132922,17443.875581,0.947209
5,GradientBoostingRegressor,6507.820852,16897.279437,0.950466


In [9]:
y_naive = []
for i in range(len(y_test)):
    naive_value = (sum(y_train[-1000+i:]) + sum(y_test[:i]))/1000
    y_naive.append(naive_value)

In [10]:
df["Estimated"] = y_naive
df.head()

Unnamed: 0,Actual priorityFee,Predicted priorityFees(XGBRegressor),Predicted priorityFees(ExtraTreesRegressor),Predicted priorityFees(KNeighborsRegressor),Predicted priorityFees(LinearRegression),Predicted priorityFees(RandomForestRegressor),Predicted priorityFees(GradientBoostingRegressor),Estimated
70000,21000.0,21225.255859,21000.0,13860.0,49894.782699,21000.0,24232.580605,45941.587802
70001,100000.0,92476.46875,84173.484157,108270.513528,70839.436639,74934.128105,91141.769729,45716.772647
70002,21000.0,21225.255859,21000.0,17640.0,47100.587177,21000.0,24232.580605,45563.936647
70003,169975.68,152439.375,145015.297495,26078.359579,85644.563328,145124.424305,160488.821446,45424.279347
70004,63962.15,71084.320312,58434.717571,147200.78388,45941.497267,55860.664691,62442.310323,45499.065867


In [11]:
# df.to_csv("priorityFee_500K_prediction.csv")

In [12]:
# results_df.to_csv("priorityFee_500K_results.csv")