## 0. Preparation

- Importing the libraries and the dataset.


In [157]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import statsmodels.formula.api as sm
from scipy import stats


# Importing data:
df=pd.read_csv('Crypto_historical_data.csv')
df


Unnamed: 0,Date,Open,High,Low,Close,Volume,ticker,name
0,2017-11-09 00:00:00+00:00,0.002304,0.002438,0.002285,0.002344,2924350,TRX-USD,TRON
1,2017-11-10 00:00:00+00:00,0.002335,0.002350,0.001989,0.002013,2193620,TRX-USD,TRON
2,2017-11-11 00:00:00+00:00,0.002026,0.002298,0.001890,0.002003,1748460,TRX-USD,TRON
3,2017-11-12 00:00:00+00:00,0.002006,0.002006,0.001684,0.001783,2174370,TRX-USD,TRON
4,2017-11-13 00:00:00+00:00,0.001795,0.002146,0.001771,0.002112,2889150,TRX-USD,TRON
...,...,...,...,...,...,...,...,...
348932,2025-11-15 00:00:00+00:00,0.000013,0.000014,0.000013,0.000013,9183119,XEC-USD,eCash
348933,2025-11-16 00:00:00+00:00,0.000013,0.000013,0.000013,0.000013,8596172,XEC-USD,eCash
348934,2025-11-17 00:00:00+00:00,0.000013,0.000013,0.000012,0.000012,9196514,XEC-USD,eCash
348935,2025-11-18 00:00:00+00:00,0.000012,0.000013,0.000012,0.000013,6922388,XEC-USD,eCash


- Transforming data field into a more derirable format

In [158]:
df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].dt.date
df

Unnamed: 0,Date,Open,High,Low,Close,Volume,ticker,name
0,2017-11-09,0.002304,0.002438,0.002285,0.002344,2924350,TRX-USD,TRON
1,2017-11-10,0.002335,0.002350,0.001989,0.002013,2193620,TRX-USD,TRON
2,2017-11-11,0.002026,0.002298,0.001890,0.002003,1748460,TRX-USD,TRON
3,2017-11-12,0.002006,0.002006,0.001684,0.001783,2174370,TRX-USD,TRON
4,2017-11-13,0.001795,0.002146,0.001771,0.002112,2889150,TRX-USD,TRON
...,...,...,...,...,...,...,...,...
348932,2025-11-15,0.000013,0.000014,0.000013,0.000013,9183119,XEC-USD,eCash
348933,2025-11-16,0.000013,0.000013,0.000013,0.000013,8596172,XEC-USD,eCash
348934,2025-11-17,0.000013,0.000013,0.000012,0.000012,9196514,XEC-USD,eCash
348935,2025-11-18,0.000012,0.000013,0.000012,0.000013,6922388,XEC-USD,eCash


- Selecting the desired ticker, default - 'BTC-USD'

In [159]:
ticker = 'BTC-USD' #change to see different data

df = df[df['ticker'] == ticker]
df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'ticker', 'name']]
df



Unnamed: 0,Date,Open,High,Low,Close,Volume,ticker,name
18508,2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800,BTC-USD,Bitcoin
18509,2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200,BTC-USD,Bitcoin
18510,2014-09-19,424.102997,427.834991,384.532013,394.795990,37919700,BTC-USD,Bitcoin
18511,2014-09-20,394.673004,423.295990,389.882996,408.903992,36863600,BTC-USD,Bitcoin
18512,2014-09-21,408.084991,412.425995,393.181000,398.821014,26580100,BTC-USD,Bitcoin
...,...,...,...,...,...,...,...,...
22585,2025-11-15,94420.468750,96728.468750,94420.468750,95549.148438,38500716654,BTC-USD,Bitcoin
22586,2025-11-16,95556.867188,96564.187500,92971.164062,94177.078125,71086235862,BTC-USD,Bitcoin
22587,2025-11-17,94180.875000,95928.367188,91214.757812,92093.875000,94186165724,BTC-USD,Bitcoin
22588,2025-11-18,92094.531250,93745.078125,89300.460938,92948.875000,101333569062,BTC-USD,Bitcoin


In [160]:
# 4. Calculate Daily Log Returns (The "Shift" trick)
# Formula: ln(Today's Price / Yesterday's Price)
# .shift(1) takes the column and moves it down 1 row, giving us "Yesterday's Close"
df['Log_Returns'] = np.log(df['Close'] / df['Close'].shift(1))

# 5. Calculate Log Volume (For your Regression later)
# We add 1 to avoid errors if volume is ever 0 (ln(0) is undefined)
df['Log_Volume'] = np.log(df['Volume'].replace(0, 1))

# 6. Cleanup: The first row will now have NaN (empty) values because 
# there is no "yesterday" for the first day. We drop it. For regression purposes mostly
df_reg = df.dropna()

df_reg


Unnamed: 0,Date,Open,High,Low,Close,Volume,ticker,name,Log_Returns,Log_Volume
18509,2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200,BTC-USD,Bitcoin,-0.074643,17.355983
18510,2014-09-19,424.102997,427.834991,384.532013,394.795990,37919700,BTC-USD,Bitcoin,-0.072402,17.450981
18511,2014-09-20,394.673004,423.295990,389.882996,408.903992,36863600,BTC-USD,Bitcoin,0.035111,17.422735
18512,2014-09-21,408.084991,412.425995,393.181000,398.821014,26580100,BTC-USD,Bitcoin,-0.024968,17.095673
18513,2014-09-22,399.100006,406.915985,397.130005,402.152008,24127600,BTC-USD,Bitcoin,0.008317,16.998867
...,...,...,...,...,...,...,...,...,...,...
22585,2025-11-15,94420.468750,96728.468750,94420.468750,95549.148438,38500716654,BTC-USD,Bitcoin,0.012123,24.373943
22586,2025-11-16,95556.867188,96564.187500,92971.164062,94177.078125,71086235862,BTC-USD,Bitcoin,-0.014464,24.987160
22587,2025-11-17,94180.875000,95928.367188,91214.757812,92093.875000,94186165724,BTC-USD,Bitcoin,-0.022368,25.268539
22588,2025-11-18,92094.531250,93745.078125,89300.460938,92948.875000,101333569062,BTC-USD,Bitcoin,0.009241,25.341684


## 1. Unconditional and conditional probabilities

- Unconditional probability of whether or not on a chosen day a token will go UP

In [161]:
up = (df['Log_Returns'] > 0).mean() * 100
down = (df['Log_Returns'] <= 0).mean() * 100

print(f"Unconditional probability UP:   {up:.2f}%")
print(f"Unconditional probability DOWN: {down:.2f}%")

Unconditional probability UP:   52.65%
Unconditional probability DOWN: 47.33%


- Conditional probability of UP being followed by UP

In [162]:
up_after_up = (
    ((df['Log_Returns'] > 0) & (df['Log_Returns'].shift(1) > 0)).sum()
    / (df['Log_Returns'].shift(1) > 0).sum()
) * 100

up_after_down = (
    ((df['Log_Returns'] > 0) & (df['Log_Returns'].shift(1) <= 0)).sum()
    / (df['Log_Returns'].shift(1) <= 0).sum()
) * 100

print(f"P(UP today | UP yesterday):   {up_after_up:.2f}%")
print(f"P(UP today | DOWN yesterday): {up_after_down:.2f}%")

P(UP today | UP yesterday):   50.35%
P(UP today | DOWN yesterday): 55.26%
