In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np

In [2]:
data = pd.read_csv(r"f:\Paper\ResearchData\TxnPrediction Data\TxTimeData\TxTimeData_100K.csv")
data = data.drop(data.columns[0], axis=1)
data.head()

Unnamed: 0,gasUsed,gasPrice,maxFeePerGas,maxPriorityFeePerGas,baseFees,value,transactionFee,timstamp,voteCount,activeValidators
0,0.000268,21.946848,21.946848,0.0,5877454.0,0.030827,5877454.0,1693066907,23471,753577
1,0.000248,24.946848,35.581637,3.0,5438297.0,0.0,6181679.0,1693066907,23471,753577
2,0.000264,24.946848,35.581637,3.0,5792914.0,0.0,6584770.0,1693066907,23471,753577
3,0.000169,23.946848,30.0,2.0,3714745.0,0.0,4053267.0,1693066907,23471,753577
4,0.000196,23.946848,30.0,2.0,4293023.0,0.0,4684243.0,1693066907,23471,753577


In [3]:
x = data.drop(columns=['timstamp'])
y = data['timstamp']
# x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3, random_state=42)
total_samples = len(x)
split_index = int(0.7 * total_samples)

x_train = x[:split_index]
x_test = x[split_index:]

y_train = y[:split_index]
y_test = y[split_index:]

In [4]:
df = pd.DataFrame({"Actual transactionTime": y_test})

In [5]:

def train_model(model):
    print('Model => {} '.format(type(model).__name__))
    
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    y_pred = np.round(y_pred).astype(int)
    
    df['Predicted_transactionTime_{}'.format(type(model).__name__)] = y_pred
    #print(df.head())
    
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))  
    r2 = r2_score(y_test, y_pred)

    result = {
        'Model': type(model).__name__,
        'MeanAbsoluteError': mae,
        'RootMeanSquaredError': rmse,
        'R2Score': r2
    }

    print('Mean Absolute Error (MAE):', mae)
    print('Root Mean Squared Error (RMSE):', rmse)
    print('R2 Score:', r2)
    print('###################- End -###################')
    
    return result


In [6]:
results = []
models = [ExtraTreesRegressor(), KNeighborsRegressor(),LinearRegression(), RandomForestRegressor(), GradientBoostingRegressor(), XGBRegressor()]
for model in models:
    results.append(train_model(model))



# RandomForestRegressor(), GradientBoostingRegressor(), XGBRegressor(), SVR()

Model => ExtraTreesRegressor 
Mean Absolute Error (MAE): 3130.5554666666667
Root Mean Squared Error (RMSE): 3487.0564936633878
R2 Score: -4.1570973004166705
###################- End -###################
Model => KNeighborsRegressor 
Mean Absolute Error (MAE): 7961.358133333333
Root Mean Squared Error (RMSE): 8337.731639053074
R2 Score: -28.48378149296761
###################- End -###################
Model => LinearRegression 
Mean Absolute Error (MAE): 157.0936
Root Mean Squared Error (RMSE): 200.63168526763994
R2 Score: 0.9829279214261146
###################- End -###################
Model => RandomForestRegressor 
Mean Absolute Error (MAE): 2723.7610666666665
Root Mean Squared Error (RMSE): 3126.7087648303077
R2 Score: -3.1463143115850176
###################- End -###################
Model => GradientBoostingRegressor 
Mean Absolute Error (MAE): 2752.2725
Root Mean Squared Error (RMSE): 3152.636397625539
R2 Score: -3.215364454512695
###################- End -###################
Model

In [7]:
print(results)

[{'Model': 'ExtraTreesRegressor', 'MeanAbsoluteError': 3130.5554666666667, 'RootMeanSquaredError': 3487.0564936633878, 'R2Score': -4.1570973004166705}, {'Model': 'KNeighborsRegressor', 'MeanAbsoluteError': 7961.358133333333, 'RootMeanSquaredError': 8337.731639053074, 'R2Score': -28.48378149296761}, {'Model': 'LinearRegression', 'MeanAbsoluteError': 157.0936, 'RootMeanSquaredError': 200.63168526763994, 'R2Score': 0.9829279214261146}, {'Model': 'RandomForestRegressor', 'MeanAbsoluteError': 2723.7610666666665, 'RootMeanSquaredError': 3126.7087648303077, 'R2Score': -3.1463143115850176}, {'Model': 'GradientBoostingRegressor', 'MeanAbsoluteError': 2752.2725, 'RootMeanSquaredError': 3152.636397625539, 'R2Score': -3.215364454512695}, {'Model': 'XGBRegressor', 'MeanAbsoluteError': 2858.0984, 'RootMeanSquaredError': 3248.090801686431, 'R2Score': -3.4744914425514315}]


In [8]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Model,MeanAbsoluteError,RootMeanSquaredError,R2Score
0,ExtraTreesRegressor,3130.555467,3487.056494,-4.157097
1,KNeighborsRegressor,7961.358133,8337.731639,-28.483781
2,LinearRegression,157.0936,200.631685,0.982928
3,RandomForestRegressor,2723.761067,3126.708765,-3.146314
4,GradientBoostingRegressor,2752.2725,3152.636398,-3.215364
5,XGBRegressor,2858.0984,3248.090802,-3.474491


In [9]:
# df.to_csv("txnTime_500K_prediction.csv")

In [10]:
# results_df.to_csv("txnTime_500K_results.csv")