In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#we will load the dataset from file into memory
path_to_dataset = '/kaggle/input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2020-04-22.csv'
bitcoin_dataset = pd.read_csv(path_to_dataset)

In [None]:
#showing the data
bitcoin_dataset

In [None]:
#checking for nulls
bitcoin_dataset.isnull().sum()

In [None]:
#we need to know about how much percent of data is required for proper dataset
bitcoin_dataset.isnull().mean().round(4) * 100

In [None]:
#in each row we have like 28% null values. Even though it's lot of null data, it's still invalid and so safe to delete!!!
bitcoin_dataset.dropna(inplace=True)

In [None]:
#now we can inspect the null free dataser!!
bitcoin_dataset

In [None]:
#for more visualization, we need to import seaborn and matplotlib
import seaborn as sbn
import matplotlib.pyplot as plt

In [None]:
#correlation helps us to find out which of the fields are related to each other..
plt.figure(figsize=(15, 15))
sbn.heatmap(bitcoin_dataset.corr(), annot=True, cmap='rainbow', linewidths=1, linecolor='black')

In [None]:
#we should change the names of column just in case to avoid any feature errors
bitcoin_dataset.rename(columns={"Volume_(BTC)" : "Volume_BTC", "Volume_(Currency)" : "Volume_Currency"}, inplace=True)

In [None]:
#similarly, we need to get the timestamp values to be human understandable
bitcoin_dataset['New_Dates'] = pd.to_datetime(bitcoin_dataset['Timestamp'], unit='s')

bitcoin_dataset

In [None]:
required_features = ['Open', 'High', 'Low', 'Volume_BTC', 'Volume_Currency', 'Weighted_Price']
output_label = 'Close'

In [None]:
#we need to split our data as:
#         70% data will be used as training data
#         30% data will be used as testing data
# To achieve this we will use train_test_split from sklearn

from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    bitcoin_dataset[required_features],
    bitcoin_dataset[output_label],
    test_size = 0.3
)

In [None]:
#we will first try fitting our data using LinearRegression

from sklearn.linear_model import LinearRegression

In [None]:
regression_model = LinearRegression()
regression_model.fit(X_train, y_train)

In [None]:
#our model is ready!! Time to test accuracy!!!
regression_model.score(X_test, y_test)

In [None]:
# We got 99% accuracy on our test data also..that means our model is quite good
#now we can test for actual predictions!!

#we will take some data from test set and try to predict that

#we will take row number 55 from our original dataset, i.e from one before splitting
sample_data = bitcoin_dataset.iloc[55]
sample_data

In [None]:
#we create sample dataframe
sample = [[6.4, 6.4, 6.4, 9.11, 58, 6.4]]
regression_model.predict(sample)

In [None]:
#from above, it's clear that original price was 6.4 and out model predicted it as 6.3902 which is pretty much equal!!

In [None]:
#to make predictions of future values we will need to shift data by 30 days!!
future_set = bitcoin_dataset.shift(periods=30).tail(30)

In [None]:
regression_model.predict(future_set[required_features])

In [None]:
#similarly we can use r2_score to see our accuracy
from sklearn.metrics import r2_score

In [None]:
predictions = regression_model.predict(X_test)
print('Accuracy of model : ', r2_score(predictions, y_test))