In [11]:
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
from sklearn.linear_model import Ridge,Lasso,LinearRegression
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score,root_mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import yfinance as yf
import pandas as pd


bitcoin = yf.download("BTC-USD",start="2010-05-17",end="2025-07-23")['Close']
bitcoin = bitcoin.shift(1)
bitcoin.dropna(inplace=True)
bitcoin = bitcoin.reset_index()
bitcoin['Date'] = pd.to_datetime(bitcoin['Date'])
bitcoin = bitcoin.sort_index()

[*********************100%***********************]  1 of 1 completed


In [14]:
X = bitcoin.drop("BTC-USD",axis=1)
y = bitcoin['BTC-USD']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.2)

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



def evaluate(y_test,pred,model_name,r2,rmse,cv_scores):


    result = {
        "Model": model_name,
        "R2": r2,
        "RMSE": rmse,
        "Cross-val Scores":cv_scores.mean()
    }

    return result


model_dict = []
models = {
    "LinearRegression":LinearRegression(),
    "lasso":Lasso(),
    "ridge":Ridge(),
    "GradientBoostingRegressor":GradientBoostingRegressor(),
    "xBGboostRegression":XGBRegressor(),
    "randomforest":RandomForestRegressor(),
}


for model_name,model in models.items():
    model.fit(X_train_scaled,y_train)
    pred = model.predict(X_test_scaled)
    mse = root_mean_squared_error(y_test,pred)
    r2 = r2_score(y_test,pred)
    cv_scores = cross_val_score(model, X_train_scaled,y_train,cv=10,scoring="neg_mean_squared_error")
    model_results = evaluate(y_test, pred, model_name,r2,mse,cv_scores)
    model_dict.append(model_results)



df_results = pd.DataFrame(model_dict)
print(df_results.head(10))



                       Model        R2          RMSE  Cross-val Scores
0           LinearRegression  0.688263  15356.978768     -2.508100e+08
1                      lasso  0.688258  15357.097959     -2.508100e+08
2                      ridge  0.688134  15360.139568     -2.508186e+08
3  GradientBoostingRegressor  0.996803   1555.306491     -2.623445e+06
4         xBGboostRegression  0.995557   1833.409684     -3.171507e+06
5               randomforest  0.998968    883.403382     -8.367453e+05


In [15]:
pred_vs_actual = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': pred
}, index=y_test.index)



pred_vs_actual.sort_index(inplace=True)
print('Predicted Vs Actual Closing Prices\n')
print(pred_vs_actual)

print(pred_vs_actual.head(20))
print(pred_vs_actual.tail(20))

Predicted Vs Actual Closing Prices

             Actual      Predicted
5        402.152008     404.071358
6        435.790985     412.027608
8        411.574005     411.782089
14       383.614990     381.481912
15       375.071991     370.390101
...             ...            ...
3936  107088.429688  107128.302344
3945  109232.070312  108322.181719
3949  115987.203125  115492.047812
3952  119116.117188  117840.190781
3960  117439.539062  117630.165547

[793 rows x 2 columns]
         Actual   Predicted
5    402.152008  404.071358
6    435.790985  412.027608
8    411.574005  411.782089
14   383.614990  381.481912
15   375.071991  370.390101
16   359.511993  330.383411
26   390.414001  394.354820
38   347.270996  355.635856
41   357.618011  348.511570
47   327.553986  328.072970
49   339.485992  336.780586
59   376.132996  398.207542
61   387.407990  386.901340
62   375.197998  375.309503
67   367.572998  359.497448
88   351.631989  346.681335
107  315.032013  296.417578
109  264.195007 

In [19]:
Best_Model = RandomForestRegressor()
Best_Model.fit(X_train_scaled,y_train)
pred = Best_Model.predict(X_test_scaled)
print(f'R2 Score from Best Model: {r2_score(y_test,pred)*100:.2f}%')
print(f'RMSE from best model: {root_mean_squared_error(y_test,pred):.2f}')

R2 Score from Best Model: 99.90%
RMSE from best model: 879.48


In [20]:
pred_vs_actual = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': pred
}, index=y_test.index)



pred_vs_actual.sort_index(inplace=True)
print('Predicted Vs Actual Closing Prices\n')
print(pred_vs_actual)

print(pred_vs_actual.head(20))
print(pred_vs_actual.tail(20))

Predicted Vs Actual Closing Prices

             Actual      Predicted
5        402.152008     405.497525
6        435.790985     417.150572
8        411.574005     416.855168
14       383.614990     380.556992
15       375.071991     369.760000
...             ...            ...
3936  107088.429688  107152.149063
3945  109232.070312  108420.463281
3949  115987.203125  115587.452813
3952  119116.117188  117891.949844
3960  117439.539062  117549.783672

[793 rows x 2 columns]
         Actual   Predicted
5    402.152008  405.497525
6    435.790985  417.150572
8    411.574005  416.855168
14   383.614990  380.556992
15   375.071991  369.760000
16   359.511993  330.357371
26   390.414001  392.379611
38   347.270996  355.535376
41   357.618011  348.511480
47   327.553986  328.290571
49   339.485992  336.140366
59   376.132996  397.465782
61   387.407990  385.797690
62   375.197998  376.743733
67   367.572998  359.569067
88   351.631989  346.263115
107  315.032013  291.817039
109  264.195007 