In [None]:
import kagglehub
import pandas as pd
import numpy as np
import os
from matplotlib import pyplot as plt

# Download latest version
path = kagglehub.dataset_download("jkraak/bitcoin-price-dataset")

print("Path to dataset files:", path)
print("Files in dataset directory:", os.listdir(path)) # Getting the list of file present in the directory

Path to dataset files: /kaggle/input/bitcoin-price-dataset
Files in dataset directory: ['bitcoin_2017_to_2023.csv', '.nfs000000000999dedc000000e9']


In [None]:
df = pd.read_csv(os.path.join(path, "bitcoin_2017_to_2023.csv"))
df = df[:100000]
df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
0,2023-08-01 13:19:00,28902.48,28902.49,28902.48,28902.49,4.68658,135453.8,258,0.89391,25836.224836
1,2023-08-01 13:18:00,28902.48,28902.49,28902.48,28902.49,4.77589,138035.1,317,2.24546,64899.385195
2,2023-08-01 13:17:00,28908.52,28908.53,28902.48,28902.49,11.52263,333053.2,451,2.70873,78290.170121
3,2023-08-01 13:16:00,28907.41,28912.74,28907.41,28908.53,15.8961,459555.6,483,10.22981,295738.166916
4,2023-08-01 13:15:00,28896.0,28907.42,28893.03,28907.41,37.74657,1090761.0,686,16.50452,476955.246611


In [None]:
print(df['close'].nunique())  

71084
count    100000.000000
mean      28694.226620
std        1849.109425
min       24856.980000
25%       26766.497500
50%       29352.910000
75%       30298.152500
max       31798.000000
Name: close, dtype: float64


In [None]:
print(df['close'].describe())  

In [None]:
df['close'] += np.random.normal(0, 0.01, len(df))  

In [None]:
df.columns

Index(['timestamp', 'open', 'high', 'low', 'close', 'volume',
       'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume',
       'taker_buy_quote_asset_volume'],
      dtype='object')

#Data Preprocessing

In [None]:
df["timestamp"] = pd.to_datetime(df["timestamp"])
df.set_index('timestamp', inplace = True)

Spliting the data for ARIMA and LSTM

In [None]:
#ARIMA
df_arima = df[['close']]
train_size = int(len(df_arima) * 0.8)
train_arima, test_arima = df_arima[0:train_size], df_arima[train_size:len(df_arima)]

#LSTM
df_lstm = df[["open",	"high",	"low", "volume",	"quote_asset_volume",	"number_of_trades",	"taker_buy_base_asset_volume",	"taker_buy_quote_asset_volume"]]

#Normalising the dataset for lstm
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df_lstm = scaler.fit_transform(df_lstm)

train_size = int(len(df_lstm) * 0.8)
train_lstm, test_lstm = df_lstm[0:train_size], df_lstm[train_size:len(df_lstm)]

Creating Sequentil data for lstm

In [None]:
import warnings
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error

warnings.filterwarnings("ignore")

# Check and preprocess data
train_arima = train_arima.dropna()
test_arima = test_arima.dropna()

# Define seasonal order
seasonal_order = (1, 1, 1, 12)

# Train SARIMA model with optimizations
sarima_model = SARIMAX(train_arima['close'],
                       order=(1, 1, 1),
                       seasonal_order=seasonal_order,
                       initialization='approximate_diffuse',
                       low_memory=True)

sarima_model_fit = sarima_model.fit(maxiter=50, disp=False)

# Make predictions
sarima_pred = sarima_model_fit.forecast(steps=len(test_arima))

# Evaluate model
mae_sarima = mean_absolute_error(test_arima['close'], sarima_pred)
print(f"SARIMA Model MAE: {mae_sarima}")

SARIMA Model MAE: 409.33000713250135


In [None]:
#Sorting for predicting the future values
train_arima = train_arima.sort_index()
print(train_arima.index[:5])

DatetimeIndex(['2023-06-07 00:00:00', '2023-06-07 00:01:00',
               '2023-06-07 00:02:00', '2023-06-07 00:03:00',
               '2023-06-07 00:04:00'],
              dtype='datetime64[ns]', name='timestamp', freq=None)


In [None]:
train_arima = train_arima.resample('1D').mean()

In [None]:
print(train_arima.index[:5])

DatetimeIndex(['2023-06-07', '2023-06-08', '2023-06-09', '2023-06-10',
               '2023-06-11'],
              dtype='datetime64[ns]', name='timestamp', freq='D')


In [None]:
def predict_sarima_future(model,inp, steps=1):
    """
    Function to take user input and predict future values using the trained SARIMA model.

    Parameters:
        model: Trained SARIMA model
        steps: Number of future time steps to predict (default = 1)

    Returns:
        Predicted values
    """
    try:
        steps = inp
        if steps < 1:
            print("Please enter a positive integer.")
            return

        future_predictions = model.forecast(steps=steps)

        final=0;
        for i, pred in enumerate(future_predictions, 1):

            final=pred
        return final;

    except ValueError:
        print("Invalid input! Please enter a valid number.")

# Call the function after training SARIMA



In [None]:
!pip install flask flask-cors pyngrok



Collecting flask-cors
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Collecting pyngrok
  Downloading pyngrok-7.2.4-py3-none-any.whl.metadata (8.7 kB)
Downloading flask_cors-5.0.1-py3-none-any.whl (11 kB)
Downloading pyngrok-7.2.4-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok, flask-cors
Successfully installed flask-cors-5.0.1 pyngrok-7.2.4


In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok
import json
# Replace with your actual ML model
app = Flask(__name__)
CORS(app)
ngrok.set_auth_token("2ujFnfmrq1WPuzj4pn3OSTy23Ez_khzcubqDjPFpY9hZbjWx")

# 2. Now connect
public_url = ngrok.connect(5000)
print("Public URL:", public_url)
  # Enable cross-origin requests

@app.route('/predict', methods=['POST'])
def predict():
    try:
        data = request.get_json()
        print("Received data:", data)

        interval = int(data.get("interval", 1))
        if not interval:
            return jsonify({"error": "No interval provided"}), 400


        prediction = predict_sarima_future(sarima_model_fit, interval)

        print("Prediction:", prediction)
        return jsonify({"prediction": prediction})

    except ValueError:
        return jsonify({"error": "Invalid interval format"}), 400
    except Exception as e:
        print("Error:", str(e))  # Print error
        return jsonify({"error": "Prediction failed"}), 500

app.run(port=5000, debug=True, use_reloader=False)


Public URL: NgrokTunnel: "https://f38d-34-126-72-144.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:38:16] "OPTIONS /predict HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:38:17] "POST /predict HTTP/1.1" 200 -


Received data: {'interval': '6'}
Prediction: 27236.064092152694


INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:38:35] "OPTIONS /predict HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:38:36] "POST /predict HTTP/1.1" 200 -


Received data: {'interval': '56'}
Prediction: 27234.18541305449


INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:45:30] "OPTIONS /predict HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:45:31] "POST /predict HTTP/1.1" 200 -


Received data: {'interval': '6'}
Prediction: 27236.064092152694


INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:45:54] "OPTIONS /predict HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:45:55] "POST /predict HTTP/1.1" 200 -


Received data: {'interval': '56'}
Prediction: 27234.18541305449


INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:46:33] "OPTIONS /predict HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:46:33] "OPTIONS /predict HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:46:34] "POST /predict HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:46:34] "POST /predict HTTP/1.1" 200 -


Received data: {'interval': '4'}
Prediction: 27236.14321150357
Received data: {'interval': '4'}
Prediction: 27236.14321150357


INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:46:45] "OPTIONS /predict HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Apr/2025 16:46:45] "POST /predict HTTP/1.1" 200 -


Received data: {'interval': '7'}
Prediction: 27235.858369395566


In [None]:
!pip install pyngrok
from pyngrok import ngrok

# 1. Get your authtoken from https://dashboard.ngrok.com/get-started/your-authtoken
# and replace "YOUR_AUTHTOKEN" with your actual token
ngrok.set_auth_token("2ujFnfmrq1WPuzj4pn3OSTy23Ez_khzcubqDjPFpY9hZbjWx")

# 2. Now connect
public_url = ngrok.connect(5000)
print("Public URL:", public_url)

Public URL: NgrokTunnel: "https://d6dd-34-126-72-144.ngrok-free.app" -> "http://localhost:5000"
