# Predicting Bitcoin values with Linear Regression, Decision Tree Regression, and Random Forest Regression Machine Learning

## Import necessary libraries

In [2]:
import pandas as pd  
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

## Loading necessary models 

In [3]:
models = []
models.append(LinearRegression())
models.append(DecisionTreeRegressor())
models.append(RandomForestRegressor())

## Loading and defining our data

In [4]:
btc_data = pd.read_csv("data/BTC-USD.csv")
X = btc_data[["Open", "High", "Low", "Adj Close", "Volume"]] 
y = btc_data["Close"]

print(btc_data)

           Date          Open          High           Low         Close  \
0    2021-01-27  32564.029297  32564.029297  29367.138672  30432.546875   
1    2021-01-28  30441.041016  33858.312500  30023.207031  33466.097656   
2    2021-01-29  34318.671875  38406.261719  32064.814453  34316.386719   
3    2021-01-30  34295.933594  34834.707031  32940.187500  34269.523438   
4    2021-01-31  34270.878906  34288.332031  32270.175781  33114.359375   
..          ...           ...           ...           ...           ...   
361  2022-01-23  35047.359375  36433.312500  34784.968750  36276.804688   
362  2022-01-24  36275.734375  37247.519531  33184.058594  36654.328125   
363  2022-01-25  36654.804688  37444.570313  35779.429688  36954.003906   
364  2022-01-26  36950.515625  38825.410156  36374.906250  36852.121094   
365  2022-01-27  36781.855469  37052.675781  35697.730469  36006.652344   

        Adj Close        Volume  
0    30432.546875   62576762015  
1    33466.097656   76517157706

## Splitting data into test and train 

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

## Running each algorithm to get our results

In [6]:
for m in models:
    clf = m 
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    res = pd.crosstab(y_test, y_pred, rownames=['Actual'], colnames=['Predicted'])
    mepr = mean_absolute_percentage_error(y_test, y_pred)

    print("-"*80)
    print("Model: " + str(m))
    print(y_pred)
    print("r2: " + str(r2))
    print("MEPR: " + str(mepr) + "%")
    print("confusion matrix: ")
    print(res)
    print("-"*80)

--------------------------------------------------------------------------------
Model: LinearRegression()
[61393.61718804 49705.33203095 49199.87109392 38053.50390602
 47793.32031303 46391.42187501 31421.53906305 59057.87890596
 56099.51953094 63326.98828105 46707.01562505 35551.95703102
 40869.55468802 39208.76562502 38402.22265597 37472.08984396
 58730.47656304 58192.35937495 65992.83593801 57828.050781
 57539.94531298 48905.49218801 34649.64453102 36852.12109403
 32186.27734401 42735.85546904 47672.12109403 47243.30468803
 57603.89062496 37304.69140591 56041.05859401 33897.04687501
 58758.55468798 65466.83984405 39995.90625001 54021.75390596
 33155.84765605 34235.19531304 54738.94531297 34434.33593802
 47096.94531303 33798.01171904 34292.44531305 37337.53515598
 47105.51562493 33723.02734399 46339.76171824 61318.95703103
 56048.93749992 59697.19531303 59384.31249996 53906.08984397
 46063.26953104 34269.52343795 32110.69335904 57248.45703104
 66971.828125   47054.98437503 57424.0078