In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
sns.set_style('darkgrid')
import cufflinks as cf
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)
cf.go_offline()
%matplotlib inline

In [2]:
data= pd.read_csv('bitstampUSD_1-min_data_2012-01-01_to_2018-11-11.csv')

In [3]:
data.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,1325317980,,,,,,,
2,1325318040,,,,,,,
3,1325318100,,,,,,,
4,1325318160,,,,,,,


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3603136 entries, 0 to 3603135
Data columns (total 8 columns):
Timestamp            int64
Open                 float64
High                 float64
Low                  float64
Close                float64
Volume_(BTC)         float64
Volume_(Currency)    float64
Weighted_Price       float64
dtypes: float64(7), int64(1)
memory usage: 219.9 MB


In [5]:
data.describe()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
count,3603136.0,2388829.0,2388829.0,2388829.0,2388829.0,2388829.0,2388829.0,2388829.0
mean,1433629000.0,2571.487,2573.625,2569.058,2571.462,10.92451,23693.49,2571.32
std,62573960.0,3629.093,3632.869,3624.712,3629.059,35.60609,89219.83,3628.797
min,1325318000.0,3.8,3.8,1.5,1.5,0.0,0.0,3.8
25%,1379365000.0,323.52,323.8,323.29,323.51,0.4509416,226.7963,323.5
50%,1433800000.0,622.79,623.03,622.33,622.82,2.092143,1595.989,622.7381
75%,1487847000.0,4200.93,4204.93,4198.48,4200.88,8.409572,11952.15,4201.0
max,1541894000.0,19665.76,19666.0,19649.96,19665.75,5853.852,5483271.0,19663.3


In [6]:
data.dropna(inplace=True)

In [7]:
data.reset_index(inplace=True)

In [8]:
data.drop('index',inplace=True,axis=1)

In [9]:
data.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,1325346600,4.39,4.39,4.39,4.39,48.0,210.72,4.39
2,1325350740,4.5,4.57,4.5,4.57,37.862297,171.380338,4.526411
3,1325350800,4.58,4.58,4.58,4.58,9.0,41.22,4.58
4,1325391360,4.58,4.58,4.58,4.58,1.502,6.87916,4.58


In [10]:
data['Timestamp'] = pd.to_datetime(data['Timestamp'],unit='s').dt.date

In [11]:
data.tail()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
2388824,2018-11-10,6348.54,6348.54,6348.54,6348.54,0.007997,50.769274,6348.54
2388825,2018-11-10,6348.54,6348.54,6348.54,6348.54,0.007997,50.769274,6348.54
2388826,2018-11-10,6348.54,6349.01,6348.54,6349.01,0.011729,74.466671,6348.93609
2388827,2018-11-10,6349.01,6349.01,6349.01,6349.01,0.068436,434.503642,6349.01
2388828,2018-11-10,6349.17,6349.32,6349.17,6349.32,0.038261,242.92741,6349.214148


In [12]:
grouped = data.groupby('Timestamp')

In [13]:
real_price = grouped.mean()['Weighted_Price']

In [14]:
real_price.shape

(2504,)

In [15]:
real_price.iplot()

## DATA PREPROCESSING

In [16]:
dataset_train = real_price.iloc[:len(real_price)-365]
dataset_test = real_price.iloc[len(dataset_train):]

In [17]:
dataset_train = np.array(dataset_train)
dataset_train = dataset_train.reshape(dataset_train.shape[0],1)

In [18]:
from sklearn.preprocessing import MinMaxScaler

In [19]:
ms = MinMaxScaler(feature_range=(0,1))

In [20]:
dataset_train_scaled = ms.fit_transform(dataset_train)

In [21]:
dataset_train_scaled.shape

(2139, 1)

## CREATE A DATA STRUCTURE OF TIMESTEPS 5 TO 1 OUTPUT

In [22]:
from tqdm import tqdm

In [23]:
X_train = []
y_train = []

In [24]:
for i in tqdm(range(50,dataset_train_scaled.shape[0])):
    X_train.append(dataset_train_scaled[i-50:i,0])
    y_train.append(dataset_train_scaled[i,0])

100%|██████████████████████████████████████████████████████████████████████████| 2089/2089 [00:00<00:00, 113910.75it/s]


In [25]:
X_train, y_train = np.array(X_train),np.array(y_train)

In [26]:
def split_sequence(seq, n_steps_in, n_steps_out):
    """
    Splits the univariate time sequence
    """
    X, y = [], []
    
    for i in range(len(seq)):
        end = i + n_steps_in
        out_end = end + n_steps_out
        
        if out_end > len(seq):
            break
        
        seq_x, seq_y = seq[i:end], seq[end:out_end]
        
        X.append(seq_x)
        y.append(seq_y)
    
    return np.array(X), np.array(y)

In [27]:
#X_train,y_train = split_sequence(dataset_train_scaled,50,10)

### RESHAPE THE MODEL INCASE WE WANNA ADD MORE DIMENSIONS

In [28]:
X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],1)

In [29]:
X_train.shape

(2089, 50, 1)

In [30]:
y_train.shape

(2089,)

In [31]:
from tensorflow import keras
from tensorflow.keras.layers import Dense,LSTM,Dropout,Bidirectional

In [32]:
model = keras.Sequential([
    LSTM(units=128,return_sequences=True,input_shape=(X_train.shape[1],1)),
    Dropout(0.2),
    LSTM(units=128,return_sequences=True),
    Dropout(0.2),
    LSTM(units=128,return_sequences=True),
    Dropout(0.2),
    LSTM(units=128,return_sequences=False),
    Dropout(0.2),
    Dense(1)#10 for 10 days prediction
])

In [33]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 50, 128)           66560     
_________________________________________________________________
dropout (Dropout)            (None, 50, 128)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 50, 128)           131584    
_________________________________________________________________
dropout_1 (Dropout)          (None, 50, 128)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 50, 128)           131584    
_________________________________________________________________
dropout_2 (Dropout)          (None, 50, 128)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 128)               1

In [34]:
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mae','mse']
             )

In [35]:
model.fit(X_train,y_train,epochs=20,verbose=0)

<tensorflow.python.keras.callbacks.History at 0x224de930848>

In [36]:
inputs = real_price[len(real_price)-len(dataset_test)-50:] #time step is 50 thats why we minus 50

In [37]:
"""nxt =model.predict(ms.transform(dataset_train[-50:]).reshape(1,50,1)).tolist()[0]
ms.inverse_transform(np.array(nxt).reshape(-1,1)).tolist()"""

'nxt =model.predict(ms.transform(dataset_train[-50:]).reshape(1,50,1)).tolist()[0]\nms.inverse_transform(np.array(nxt).reshape(-1,1)).tolist()'

In [38]:
'''real_life = dataset_train[len(dataset_train)-50:]
len(real_life)
real_life = ms.transform(np.array(real_life).reshape(-1,1))
real_test = []
for i in range(50,real_life.shape[0]+1):
    real_test.append(real_life[i-50:i,0])
real_test =np.array(real_test)
real_test = real_test.reshape(real_test.shape[0],real_test.shape[1],1)
ms.inverse_transform(model.predict(real_test))
'''

'real_life = dataset_train[len(dataset_train)-50:]\nlen(real_life)\nreal_life = ms.transform(np.array(real_life).reshape(-1,1))\nreal_test = []\nfor i in range(50,real_life.shape[0]+1):\n    real_test.append(real_life[i-50:i,0])\nreal_test =np.array(real_test)\nreal_test = real_test.reshape(real_test.shape[0],real_test.shape[1],1)\nms.inverse_transform(model.predict(real_test))\n'

In [39]:
inputs = inputs.values.reshape(-1,1)

In [40]:
inputs = ms.transform(inputs)

In [41]:
X_test =[]

In [42]:
for i in range(50,inputs.shape[0]):
    X_test.append(inputs[i-50:i,0])

In [43]:
X_test = np.array(X_test)

In [44]:
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [45]:
predicted_data = model.predict(X_test)

In [46]:
predicted_data = ms.inverse_transform(predicted_data)

In [47]:
y_test=np.array(dataset_test)
y_test=y_test.reshape(len(y_test),1)

In [48]:
#plt.figure(figsize=(16,8))
#sns.lineplot(x=dataset_test.index,y=pd.DataFrame(y_test)[0])
#sns.lineplot(x=dataset_test.index,y=pd.DataFrame(predicted_data)[0])
#plt.tight_layout()
myPLOT = pd.DataFrame(index=dataset_test.index,columns=['original','predicted'])
myPLOT['original'] = y_test[:,0]
myPLOT['predicted'] = predicted_data[:,0]

In [49]:
myPLOT.iplot()