In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
import plotly.graph_objects as go

In [None]:
# Load your dataset
df = pd.read_csv('/content/drive/MyDrive/Crypto_currency_data/bitcoin_final_data.csv')
df.head()

Unnamed: 0,Date,Close(BTC-USD),High(BTC-USD),Low(BTC-USD),Open(BTC-USD),Volume(BTC-USD),Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,17-09-2014,457.334015,468.174011,452.421997,465.864014,21056800.0,,,,
1,18-09-2014,424.440002,456.859985,413.104004,456.859985,34483200.0,,,,
2,19-09-2014,394.79599,427.834992,384.532013,424.102997,37919700.0,,,,
3,20-09-2014,408.903992,423.29599,389.882996,394.673004,36863600.0,,,,
4,21-09-2014,398.821014,412.425995,393.181,408.084992,26580100.0,,,,


In [None]:
# Removing column contained unknown
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.head()

Unnamed: 0,Date,Close(BTC-USD),High(BTC-USD),Low(BTC-USD),Open(BTC-USD),Volume(BTC-USD)
0,17-09-2014,457.334015,468.174011,452.421997,465.864014,21056800.0
1,18-09-2014,424.440002,456.859985,413.104004,456.859985,34483200.0
2,19-09-2014,394.79599,427.834992,384.532013,424.102997,37919700.0
3,20-09-2014,408.903992,423.29599,389.882996,394.673004,36863600.0
4,21-09-2014,398.821014,412.425995,393.181,408.084992,26580100.0


In [None]:
# Convert the 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Set the 'Date' column as the index
df.index = df['Date']

# Resampling to daily frequency
df_daily = df.resample('D').mean()

# Resampling to monthly frequency
df_month = df.resample('M').mean()

# Resampling to annual frequency
df_year = df.resample('A-DEC').mean()

# Resampling to quarterly frequency
df_Q = df.resample('Q-DEC').mean()




'M' is deprecated and will be removed in a future version, please use 'ME' instead.


'A-DEC' is deprecated and will be removed in a future version, please use 'YE-DEC' instead.


'Q-DEC' is deprecated and will be removed in a future version, please use 'QE-DEC' instead.



In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create a subplot figure with 2 rows and 2 columns
fig = make_subplots(rows=2, cols=2, subplot_titles=("By Days", "By Months", "By Quarters", "By Years"))

# Plot daily data
fig.add_trace(
    go.Scatter(x=df_daily.index, y=df_daily['Open(BTC-USD)'], mode='lines', name='By Days'),
    row=1, col=1
)

# Plot monthly data
fig.add_trace(
    go.Scatter(x=df_month.index, y=df_month['Open(BTC-USD)'], mode='lines', name='By Months'),
    row=1, col=2
)

# Plot quarterly data
fig.add_trace(
    go.Scatter(x=df_Q.index, y=df_Q['Open(BTC-USD)'], mode='lines', name='By Quarters'),
    row=2, col=1
)

# Plot annual data
fig.add_trace(
    go.Scatter(x=df_year.index, y=df_year['Open(BTC-USD)'], mode='lines', name='By Years'),
    row=2, col=2
)

# Update layout
fig.update_layout(
    title_text='Bitcoin exchanges, mean USD',
    height=700,
    width=1000,
    showlegend=False
)

# Show the figure
fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

# STL decomposition
result = sm.tsa.seasonal_decompose(df_month['Open(BTC-USD)'], model='additive', period=12)

# Plotting STL decomposition using Plotly
fig = make_subplots(rows=4, cols=1, subplot_titles=("Observed", "Trend", "Seasonal", "Residual"))

fig.add_trace(go.Scatter(x=result.observed.index, y=result.observed, mode='lines', name='Observed'), row=1, col=1)
fig.add_trace(go.Scatter(x=result.trend.index, y=result.trend, mode='lines', name='Trend'), row=2, col=1)
fig.add_trace(go.Scatter(x=result.seasonal.index, y=result.seasonal, mode='lines', name='Seasonal'), row=3, col=1)
fig.add_trace(go.Scatter(x=result.resid.index, y=result.resid, mode='lines', name='Residual'), row=4, col=1)

fig.update_layout(height=800, title_text='STL Decomposition of Open(BTC-USD)')
fig.show()

# Dickey-Fuller test
p_value = adfuller(df_month['Open(BTC-USD)'])[1]
print("Dickey–Fuller test: p=%f" % p_value)

Dickey–Fuller test: p=0.979307


In [None]:
from scipy import stats
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

# Box-Cox Transformation
df_month['Open_BTC_USD_box'], lmbda = stats.boxcox(df_month['Open(BTC-USD)'])

# Dickey-Fuller test
p_value = adfuller(df_month['Open_BTC_USD_box'])[1]
print("Dickey–Fuller test: p=%f" % p_value)

Dickey–Fuller test: p=0.833398


In [None]:
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

# Seasonal Differentiation
df_month['Open_BTC_USD_box_diff'] = df_month['Open_BTC_USD_box'] - df_month['Open_BTC_USD_box'].shift(12)

# Dickey-Fuller test
p_value = adfuller(df_month['Open_BTC_USD_box_diff'][12:])[1]
print("Dickey–Fuller test: p=%f" % p_value)

Dickey–Fuller test: p=0.197584


In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

# Regular Differentiation
df_month['Open_BTC_USD_box_diff2'] = df_month['Open_BTC_USD_box_diff'] - df_month['Open_BTC_USD_box_diff'].shift(1)

# STL Decomposition
result = sm.tsa.seasonal_decompose(df_month['Open_BTC_USD_box_diff2'][13:], model='additive', period=12)

# Plotting STL decomposition using Plotly
fig = make_subplots(rows=4, cols=1, subplot_titles=("Observed", "Trend", "Seasonal", "Residual"))

fig.add_trace(go.Scatter(x=result.observed.index, y=result.observed, mode='lines', name='Observed'), row=1, col=1)
fig.add_trace(go.Scatter(x=result.trend.index, y=result.trend, mode='lines', name='Trend'), row=2, col=1)
fig.add_trace(go.Scatter(x=result.seasonal.index, y=result.seasonal, mode='lines', name='Seasonal'), row=3, col=1)
fig.add_trace(go.Scatter(x=result.resid.index, y=result.resid, mode='lines', name='Residual'), row=4, col=1)

fig.update_layout(height=800, title_text='STL Decomposition of Differentiated Data')
fig.show()

# Dickey-Fuller test
p_value = adfuller(df_month['Open_BTC_USD_box_diff2'][13:])[1]
print("Dickey–Fuller test: p=%f" % p_value)

Dickey–Fuller test: p=0.000061


# Model **Selection**

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from statsmodels.tsa.stattools import acf, pacf

# Calculate ACF and PACF values
acf_values = acf(df_month['Open_BTC_USD_box_diff2'][13:].dropna(), nlags=29)
pacf_values = pacf(df_month['Open_BTC_USD_box_diff2'][13:].dropna(), nlags=29)

# Create Plotly figure
fig = make_subplots(rows=2, cols=1, subplot_titles=("ACF", "PACF"))

# Add ACF plot
fig.add_trace(go.Bar(x=list(range(len(acf_values))), y=acf_values, name='ACF'), row=1, col=1)

# Add PACF plot
fig.add_trace(go.Bar(x=list(range(len(pacf_values))), y=pacf_values, name='PACF'), row=2, col=1)

fig.update_layout(height=700, title_text='ACF and PACF Plots')
fig.show()

In [None]:
import warnings
from itertools import product
import statsmodels.api as sm

# Define parameter ranges
Qs = range(0, 2)
qs = range(0, 3)
Ps = range(0, 3)
ps = range(0, 3)
D = 1
d = 1

# Generate all combinations of parameters
parameters = product(ps, qs, Ps, Qs)
parameters_list = list(parameters)

# Model Selection
results = []
best_aic = float("inf")
warnings.filterwarnings('ignore')

for param in parameters_list:
    try:
        model = sm.tsa.statespace.SARIMAX(
            df_month['Open_BTC_USD_box'],  # Use the Box-Cox transformed column
            order=(param[0], d, param[1]),
            seasonal_order=(param[2], D, param[3], 12)
        ).fit(disp=False)
    except ValueError:
        print('wrong parameters:', param)
        continue

    aic = model.aic
    if aic < best_aic:
        best_model = model
        best_aic = aic
        best_param = param

    results.append([param, model.aic])

# Output the best model parameters and AIC
print(f"Best model parameters: {best_param}")
print(f"Best AIC: {best_aic}")

Best model parameters: (0, 1, 0, 1)
Best AIC: 345.5398685804152


In [None]:
import pandas as pd

result_table = pd.DataFrame(results, columns=['parameters', 'aic'])
print(result_table.sort_values(by='aic', ascending=True).head())
print(best_model.summary())

      parameters         aic
7   (0, 1, 0, 1)  345.539869
19  (1, 0, 0, 1)  346.090344
25  (1, 1, 0, 1)  346.828835
13  (0, 2, 0, 1)  346.975278
37  (2, 0, 0, 1)  347.085962
                                     SARIMAX Results                                      
Dep. Variable:                   Open_BTC_USD_box   No. Observations:                  125
Model:             SARIMAX(0, 1, 1)x(0, 1, 1, 12)   Log Likelihood                -169.770
Date:                            Fri, 24 Jan 2025   AIC                            345.540
Time:                                    21:28:53   BIC                            353.695
Sample:                                09-30-2014   HQIC                           348.849
                                     - 01-31-2025                                         
Covariance Type:                              opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
-------------------

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

# Residuals and ACF Plot using Plotly
residuals = best_model.resid[13:]

fig = make_subplots(rows=2, cols=1, subplot_titles=("Residuals", "ACF of Residuals"))

# Residuals plot
fig.add_trace(go.Scatter(x=residuals.index, y=residuals, mode='lines', name='Residuals'), row=1, col=1)

# ACF plot
acf_values = sm.tsa.acf(residuals.values.squeeze(), nlags=48)
fig.add_trace(go.Bar(x=list(range(len(acf_values))), y=acf_values, name='ACF'), row=2, col=1)

fig.update_layout(height=700, title_text='Residual Analysis')
fig.show()

# Dickey-Fuller test
p_value = adfuller(residuals)[1]
print("Dickey–Fuller test: p=%f" % p_value)

Dickey–Fuller test: p=0.000000


In [None]:
import numpy as np

def invboxcox(y, lmbda):
    if lmbda == 0:
        return np.exp(y)
    else:
        return np.exp(np.log(lmbda * y + 1) / lmbda)

In [None]:
df_month[['Open(BTC-USD)']]

Unnamed: 0_level_0,Open(BTC-USD)
Date,Unnamed: 1_level_1
2014-09-30,412.654003
2014-10-31,365.748000
2014-11-30,364.850235
2014-12-31,343.074836
2015-01-31,251.799905
...,...
2024-09-30,60213.192448
2024-10-31,65361.905244
2024-11-30,85696.216928
2024-12-31,98341.197077


In [None]:
df_month.columns

Index(['Date', 'Close(BTC-USD)', 'High(BTC-USD)', 'Low(BTC-USD)',
       'Open(BTC-USD)', 'Volume(BTC-USD)', 'Open_BTC_USD_box',
       'Open_BTC_USD_box_diff', 'Open_BTC_USD_box_diff2'],
      dtype='object')

In [None]:
import pandas as pd
from datetime import datetime

df_month2 = df_month[['Open(BTC-USD)']]

date_list = [
    datetime(2023, 6, 30), datetime(2023, 7, 31), datetime(2023, 8, 31),
    datetime(2023, 9, 30), datetime(2023, 10, 31), datetime(2023, 11, 30)
]

future = pd.DataFrame(index=date_list, columns=df_month.columns)

df_month2 = pd.concat([df_month2, future])

df_month2['forecast'] = invboxcox(best_model.predict(start=0, end=len(df_month2)-1), lmbda)

In [None]:
import numpy as np

max_price = df_month2['forecast'].max()
print(max_price)

df_month2.loc[df_month2['forecast'] == max_price, 'forecast'] = np.nan

101719.50168551406


In [None]:
#removing the outlier prediction
cleaned = df_month2[df_month2['forecast'] != max_price]

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df_month2.index,
    y=df_month2['Open(BTC-USD)'],
    mode='lines',
    name='Actual Price'
))

fig.add_trace(go.Scatter(
    x=df_month2.index,
    y=df_month2['forecast'],
    mode='lines',
    line=dict(color='red', dash='dash'),
    name='Predicted Price'
))

fig.update_layout(
    title='Bitcoin Exchanges, by Months',
    xaxis_title='Date',
    yaxis_title='Mean USD',
    legend_title='Legend',
    height=700,
    width=1000
)

fig.show()

In [None]:
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Ensure there are no NaN values in the actual and forecast columns
actual = df_month2['Open(BTC-USD)'].dropna()
predicted = df_month2['forecast'].dropna()

# Align the actual and predicted series
actual, predicted = actual.align(predicted, join='inner')

# Calculate MAE
mae = mean_absolute_error(actual, predicted)

# Calculate MSE
mse = mean_squared_error(actual, predicted)

# Calculate RMSE
rmse = np.sqrt(mse)

# Calculate MAPE
mape = np.mean(np.abs((actual - predicted) / actual)) * 100

# Calculate R-squared
r2 = r2_score(actual, predicted)

# Print the accuracy metrics
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Percentage Error (MAPE): {mape}%")
print(f"R-squared (R²): {r2}")

Mean Absolute Error (MAE): 2346.5993620937293
Mean Squared Error (MSE): 16897852.306310154
Root Mean Squared Error (RMSE): 4110.69973438953
Mean Absolute Percentage Error (MAPE): 17.851197198102984%
R-squared (R²): 0.9636409166975922


In [None]:
import plotly.graph_objects as go

# Convert R-squared to percentage
r2_percentage = 0.9636409166975922 * 100

# Create a gauge chart
fig = go.Figure(go.Indicator(
    mode="gauge+number",
    value=r2_percentage,
    title={'text': "R-squared (R²) Percentage"},
    gauge={
        'axis': {'range': [0, 100]},
        'bar': {'color': "darkblue"},
        'steps': [
            {'range': [0, 50], 'color': "lightgray"},
            {'range': [50, 100], 'color': "lightgreen"}
        ],
        'threshold': {
            'line': {'color': "red", 'width': 4},
            'thickness': 0.75,
            'value': r2_percentage
        }
    }
))

fig.update_layout(height=400, width=600)
fig.show()