<a href="https://colab.research.google.com/github/thedavidemmanuel/BTC-Time-Series-Forecasting/blob/main/btc_forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Load the Bitstamp data without specifying dtype for 'Volume_(BTC)'
bitstamp_data = pd.read_csv(
    '/content/bitstampUSD_1-min_data_2012-01-01_to_2020-04-22.csv',
    on_bad_lines='skip',  # Updated parameter
    low_memory=False
)

# Load the Coinbase data without specifying dtype for 'Volume_(BTC)'
coinbase_data = pd.read_csv(
    '/content/coinbaseUSD_1-min_data_2014-12-01_to_2019-01-09.csv',
    on_bad_lines='skip',  # Updated parameter
    low_memory=False
)

# Display the first few rows of the datasets
print("Bitstamp Data Head:")
print(bitstamp_data.head())

print("\nCoinbase Data Head:")
print(coinbase_data.head())


Bitstamp Data Head:
    Timestamp  Open  High   Low  Close Volume_(BTC)  Volume_(Currency)  \
0  1325317920  4.39  4.39  4.39   4.39   0.45558087                2.0   
1  1325317980   NaN   NaN   NaN    NaN          NaN                NaN   
2  1325318040   NaN   NaN   NaN    NaN          NaN                NaN   
3  1325318100   NaN   NaN   NaN    NaN          NaN                NaN   
4  1325318160   NaN   NaN   NaN    NaN          NaN                NaN   

   Weighted_Price  
0            4.39  
1             NaN  
2             NaN  
3             NaN  
4             NaN  

Coinbase Data Head:
    Timestamp   Open   High    Low  Close  Volume_(BTC)  Volume_(Currency)  \
0  1417411980  300.0  300.0  300.0  300.0          0.01                3.0   
1  1417412040    NaN    NaN    NaN    NaN           NaN                NaN   
2  1417412100    NaN    NaN    NaN    NaN           NaN                NaN   
3  1417412160    NaN    NaN    NaN    NaN           NaN                NaN   
4  1

In [None]:
# Convert 'Volume_(BTC)' to numeric, coercing errors to NaN
bitstamp_data['Volume_(BTC)'] = pd.to_numeric(bitstamp_data['Volume_(BTC)'], errors='coerce')
coinbase_data['Volume_(BTC)'] = pd.to_numeric(coinbase_data['Volume_(BTC)'], errors='coerce')

# Check for missing values
print("\nBitstamp Missing Values:\n", bitstamp_data.isnull().sum())
print("\nCoinbase Missing Values:\n", coinbase_data.isnull().sum())

# Fill missing values using forward fill method
bitstamp_data.fillna(method='ffill', inplace=True)
coinbase_data.fillna(method='ffill', inplace=True)

# Drop any remaining NaN values
bitstamp_data.dropna(inplace=True)
coinbase_data.dropna(inplace=True)

# Verify that there are no more NaN values
print("\nBitstamp Missing Values After Cleaning:\n", bitstamp_data.isnull().sum())
print("\nCoinbase Missing Values After Cleaning:\n", coinbase_data.isnull().sum())



Bitstamp Missing Values:
 Timestamp                 0
Open                 261214
High                 261214
Low                  261214
Close                261214
Volume_(BTC)         261214
Volume_(Currency)    261214
Weighted_Price       261214
dtype: int64

Coinbase Missing Values:
 Timestamp                 0
Open                 109069
High                 109069
Low                  109069
Close                109069
Volume_(BTC)         109069
Volume_(Currency)    109069
Weighted_Price       109069
dtype: int64


  bitstamp_data.fillna(method='ffill', inplace=True)
  coinbase_data.fillna(method='ffill', inplace=True)



Bitstamp Missing Values After Cleaning:
 Timestamp            0
Open                 0
High                 0
Low                  0
Close                0
Volume_(BTC)         0
Volume_(Currency)    0
Weighted_Price       0
dtype: int64

Coinbase Missing Values After Cleaning:
 Timestamp            0
Open                 0
High                 0
Low                  0
Close                0
Volume_(BTC)         0
Volume_(Currency)    0
Weighted_Price       0
dtype: int64


In [None]:
# Convert and set index for Bitstamp data
bitstamp_data['Timestamp'] = pd.to_datetime(bitstamp_data['Timestamp'], unit='s')
bitstamp_data.set_index('Timestamp', inplace=True)

# Convert and set index for Coinbase data
coinbase_data['Timestamp'] = pd.to_datetime(coinbase_data['Timestamp'], unit='s')
coinbase_data.set_index('Timestamp', inplace=True)


In [None]:
# Resample to hourly data by taking the mean of each hour
bitstamp_data_hourly = bitstamp_data.resample('H').mean()
coinbase_data_hourly = coinbase_data.resample('H').mean()

# Fill any remaining NaN values after resampling
bitstamp_data_hourly.fillna(method='ffill', inplace=True)
coinbase_data_hourly.fillna(method='ffill', inplace=True)


  bitstamp_data_hourly.fillna(method='ffill', inplace=True)
  coinbase_data_hourly.fillna(method='ffill', inplace=True)


In [None]:
# Combine the datasets
combined_data = pd.concat(
    [bitstamp_data_hourly['Close'], coinbase_data_hourly['Close']],
    axis=1,
    join='inner'  # Ensure only overlapping timestamps are considered
)
combined_data.columns = ['Bitstamp_Close', 'Coinbase_Close']

# Fill missing values if any
combined_data.fillna(method='ffill', inplace=True)

# Use the average of the two exchanges
combined_data['Average_Close'] = combined_data.mean(axis=1)

# Display the combined data
print("\nCombined Data Head:")
print(combined_data.head())



Combined Data Head:
Empty DataFrame
Columns: [Bitstamp_Close, Coinbase_Close, Average_Close]
Index: []


  combined_data.fillna(method='ffill', inplace=True)


In [None]:
data = combined_data[['Average_Close']]


In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Display the scaled data
print("\nScaled Data Head:")
print(scaled_data[:5])


ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by MinMaxScaler.