In [1]:
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
from sklearn.linear_model import Ridge,Lasso,LinearRegression
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score,root_mean_squared_error,mean_absolute_percentage_error
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import yfinance as yf
import pandas as pd


bitcoin = yf.download("BTC-USD",start="2010-05-17",end="2025-07-23")['Close']
bitcoin = bitcoin.dropna()
bitcoin = bitcoin.reset_index()
bitcoin['Date'] = pd.to_datetime(bitcoin['Date'])
bitcoin = bitcoin.sort_index()

Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.
  bitcoin = yf.download("BTC-USD",start="2010-05-17",end="2025-07-23")['Close']
[*********************100%***********************]  1 of 1 completed


In [2]:
X = bitcoin.drop("BTC-USD",axis=1)
y = bitcoin['BTC-USD']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.2)

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



def evaluate(y_test,pred,model_name,r2,rmse,cv_scores):


    result = {
        "Model": model_name,
        "R2": r2,
        "RMSE": rmse,
        "Cross-val Scores":cv_scores.mean()
    }

    return result


model_dict = []
models = {
    "LinearRegression":LinearRegression(),
    "lasso":Lasso(),
    "ridge":Ridge(),
    "GradientBoostingRegressor":GradientBoostingRegressor(),
    "xBGboostRegression":XGBRegressor(),
    "randomforest":RandomForestRegressor(),
}


for model_name,model in models.items():
    model.fit(X_train_scaled,y_train)
    pred = model.predict(X_test_scaled)
    mse = root_mean_squared_error(y_test,pred)
    r2 = r2_score(y_test,pred)
    cv_scores = cross_val_score(model, X_train_scaled,y_train,cv=10,scoring="neg_mean_squared_error")
    model_results = evaluate(y_test, pred, model_name,r2,mse,cv_scores)
    model_dict.append(model_results)



df_results = pd.DataFrame(model_dict)
print(df_results.head(10))



                       Model        R2          RMSE  Cross-val Scores
0           LinearRegression  0.677730  15595.623298     -2.498791e+08
1                      lasso  0.677733  15595.556963     -2.498791e+08
2                      ridge  0.677789  15594.201479     -2.498879e+08
3  GradientBoostingRegressor  0.996679   1583.208724     -2.641402e+06
4         xBGboostRegression  0.996279   1675.871366     -3.127302e+06
5               randomforest  0.998991    872.469845     -8.267978e+05


In [3]:
pred_vs_actual = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': pred
}, index=y_test.index)




pred_vs_actual.sort_index(inplace=True)



# Root Mean-Squared Error
rmse = root_mean_squared_error(y_test,pred)
print(f'Root Mean-Squared Error: {rmse:.4}')

# R2 Score
r2 = r2_score(y_test,pred)
print(f'R2 Score: {r2*100:.2f}%')

# mean absolute percentage error
mape = mean_absolute_percentage_error(y_test,pred)
print(f'Mean Absolute Percentage Error: {mape:.4f}')



print('Predicted Vs Actual Prices\n')

print(pred_vs_actual.head(20))
print(pred_vs_actual.tail(20))

Root Mean-Squared Error: 872.5
R2 Score: 99.90%
Mean Absolute Percentage Error: 0.0213
Predicted Vs Actual Prices

         Actual   Predicted
0    457.334015  419.248171
2    394.795990  419.248171
7    423.204987  424.807103
18   320.510010  332.538331
22   365.026001  349.266836
23   361.562012  363.885430
26   390.414001  384.859965
28   394.773010  394.723366
39   354.704010  349.948862
44   338.321014  338.516214
46   325.891998  327.123691
48   330.492004  331.052089
55   367.695007  385.153868
61   387.407990  378.935325
76   381.315002  376.089645
79   376.854004  371.348394
84   346.364990  353.390977
96   331.885986  330.990951
98   322.533997  323.226966
110  274.473999  272.801914
             Actual      Predicted
3824   86742.671875   86626.873516
3828   82862.210938   82416.177422
3838   84043.242188   84508.936016
3851   83102.828125   82922.329219
3864   84033.867188   84055.197969
3868   85174.304688   87115.756172
3875   93754.843750   94607.878984
3876   94978.7500

In [4]:
Best_Model = RandomForestRegressor()
Best_Model.fit(X_train_scaled,y_train)
pred = Best_Model.predict(X_test_scaled)
print(f'R2 Score from Best Model: {r2_score(y_test,pred)*100:.2f}%')
print(f'RMSE from best model: {root_mean_squared_error(y_test,pred):.2f}')

R2 Score from Best Model: 99.90%
RMSE from best model: 887.25


In [5]:
pred_vs_actual = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': pred
}, index=y_test.index)



pred_vs_actual.sort_index(inplace=True)
print('Predicted Vs Actual Closing Prices\n')
print(pred_vs_actual)

print(pred_vs_actual.head(20))
print(pred_vs_actual.tail(20))

Predicted Vs Actual Closing Prices

             Actual      Predicted
0        457.334015     419.004662
2        394.795990     419.004662
7        423.204987     428.036390
18       320.510010     332.656341
22       365.026001     349.885466
...             ...            ...
3919  110257.234375  109294.498750
3922  106090.968750  106021.669688
3949  115987.203125  112215.142344
3953  119849.703125  118184.629531
3958  117939.976562  118066.981172

[793 rows x 2 columns]
         Actual   Predicted
0    457.334015  419.004662
2    394.795990  419.004662
7    423.204987  428.036390
18   320.510010  332.656341
22   365.026001  349.885466
23   361.562012  364.198610
26   390.414001  382.148637
28   394.773010  393.817467
39   354.704010  349.385830
44   338.321014  339.604424
46   325.891998  328.085191
48   330.492004  331.530179
55   367.695007  381.741788
61   387.407990  377.642846
76   381.315002  376.559184
79   376.854004  371.054784
84   346.364990  352.835428
96   331.885986 