In [1]:
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
from sklearn.linear_model import Ridge,Lasso,LinearRegression
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score,root_mean_squared_error,mean_absolute_percentage_error
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import yfinance as yf
import pandas as pd


bitcoin = yf.download("BTC-USD",start="2010-05-17",end="2025-07-23")['Close']
bitcoin = bitcoin.dropna()
bitcoin = bitcoin.reset_index()
bitcoin['Date'] = pd.to_datetime(bitcoin['Date'])
bitcoin = bitcoin.sort_index()

Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.
  bitcoin = yf.download("BTC-USD",start="2010-05-17",end="2025-07-23")['Close']
[*********************100%***********************]  1 of 1 completed


In [2]:
X = bitcoin.drop("BTC-USD",axis=1)
y = bitcoin['BTC-USD']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.2)

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



def evaluate(y_test,pred,model_name,r2,rmse,cv_scores):


    result = {
        "Model": model_name,
        "R2": r2,
        "RMSE": rmse,
        "Cross-val Scores":cv_scores.mean()
    }

    return result


model_dict = []
models = {
    "LinearRegression":LinearRegression(),
    "lasso":Lasso(),
    "ridge":Ridge(),
    "GradientBoostingRegressor":GradientBoostingRegressor(),
    "xBGboostRegression":XGBRegressor(),
    "randomforest":RandomForestRegressor(),
}


for model_name,model in models.items():
    model.fit(X_train_scaled,y_train)
    pred = model.predict(X_test_scaled)
    mse = root_mean_squared_error(y_test,pred)
    r2 = r2_score(y_test,pred)
    cv_scores = cross_val_score(model, X_train_scaled,y_train,cv=10,scoring="neg_mean_squared_error")
    model_results = evaluate(y_test, pred, model_name,r2,mse,cv_scores)
    model_dict.append(model_results)



df_results = pd.DataFrame(model_dict)
print(df_results.head(10))



                       Model        R2          RMSE  Cross-val Scores
0           LinearRegression  0.674351  16509.564369     -2.425225e+08
1                      lasso  0.674342  16509.796253     -2.425224e+08
2                      ridge  0.674117  16515.485996     -2.425287e+08
3  GradientBoostingRegressor  0.996811   1633.692428     -2.589328e+06
4         xBGboostRegression  0.996070   1813.760020     -2.984602e+06
5               randomforest  0.998814    996.403730     -7.832786e+05


In [3]:
pred_vs_actual = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': pred
}, index=y_test.index)




pred_vs_actual.sort_index(inplace=True)



# Root Mean-Squared Error
rmse = root_mean_squared_error(y_test,pred)
print(f'Root Mean-Squared Error: {rmse:.4}')

# R2 Score
r2 = r2_score(y_test,pred)
print(f'R2 Score: {r2*100:.2f}%')

# mean absolute percentage error
mape = mean_absolute_percentage_error(y_test,pred)
print(f'Mean Absolute Percentage Error: {mape:.4f}')



print('Predicted Vs Actual Prices\n')

print(pred_vs_actual.head(20))
print(pred_vs_actual.tail(20))

Root Mean-Squared Error: 996.4
R2 Score: 99.88%
Mean Absolute Percentage Error: 0.0202
Predicted Vs Actual Prices

        Actual   Predicted
12  375.467010  383.783509
18  320.510010  333.046451
23  361.562012  365.164543
25  378.549011  382.455314
30  383.757996  385.628077
33  382.845001  385.860670
44  338.321014  332.428354
52  345.488007  349.503608
56  423.561005  378.831343
59  376.132996  400.105282
60  387.881989  391.434150
62  375.197998  380.090303
65  350.847992  358.690839
81  375.095001  372.853973
82  361.908997  358.663207
85  350.506012  348.260291
90  327.062012  342.664448
91  319.776001  320.660446
95  320.842987  329.688511
98  322.533997  330.395452
             Actual      Predicted
3857   82573.953125   77840.315547
3866   84450.804688   84896.743984
3871   93699.109375   92554.858750
3873   94720.500000   93891.316797
3876   94978.750000   94089.882734
3886  103241.460938   98642.703125
3892  103539.414062  103758.508125
3894  103489.289062  103680.696641
390

In [4]:
Best_Model = RandomForestRegressor()
Best_Model.fit(X_train_scaled,y_train)
pred = Best_Model.predict(X_test_scaled)
print(f'R2 Score from Best Model: {r2_score(y_test,pred)*100:.2f}%')
print(f'RMSE from best model: {root_mean_squared_error(y_test,pred):.2f}')

R2 Score from Best Model: 99.88%
RMSE from best model: 994.36


In [5]:
pred_vs_actual = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': pred
}, index=y_test.index)



pred_vs_actual.sort_index(inplace=True)
print('Predicted Vs Actual Closing Prices\n')
print(pred_vs_actual)

print(pred_vs_actual.head(20))
print(pred_vs_actual.tail(20))

Predicted Vs Actual Closing Prices

             Actual      Predicted
12       375.467010     385.818549
18       320.510010     331.839501
23       361.562012     363.818813
25       378.549011     384.169594
30       383.757996     386.385576
...             ...            ...
3935  106960.000000  107227.341172
3946  108299.851562  109363.163047
3947  108950.273438  112013.057188
3954  117777.187500  119006.191094
3958  117939.976562  117918.295156

[793 rows x 2 columns]
        Actual   Predicted
12  375.467010  385.818549
18  320.510010  331.839501
23  361.562012  363.818813
25  378.549011  384.169594
30  383.757996  386.385576
33  382.845001  386.585391
44  338.321014  332.295724
52  345.488007  348.697588
56  423.561005  380.648732
59  376.132996  401.649461
60  387.881989  393.736360
62  375.197998  378.135454
65  350.847992  359.041870
81  375.095001  373.665523
82  361.908997  358.713087
85  350.506012  348.804312
90  327.062012  342.562167
91  319.776001  320.902516
95  320