In [1]:
import pandas as pd
import os
import numpy as np

## Combining all the CSV's together

In [20]:
for f in os.listdir("/Users/RyanHaver/Projects/cryptoStrategies/data/"):
    csv = open("/Users/RyanHaver/Projects/cryptoStrategies/data/"+f, mode='r')
    try:
        tmp = pd.read_csv(csv, encoding="utf-8")
        tmp.columns=["open_time","open","high","low","close","volume","close_time","quote_volume","count","taker_buy_base_volume","taker_buy_quote_volume"]
        tmp.to_csv("full_data.csv", mode='a', header=False)
    except UnicodeDecodeError:
        print(f)
        break

## Model
Try to model the closing prices as a discrete time markov chain

In [2]:
full_df = pd.read_csv(open("/Users/RyanHaver/Projects/cryptoStrategies/full_data.csv"))
full_df.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_volume,count,taker_buy_base_volume,taker_buy_quote_volume
0,1643587200000,37903.79,37975.18,37888.67,37889.39,2.554087,1643587499999,96854.0703,134,0.660383,25047.2682
1,1643587500000,37888.28,37888.28,37686.82,37729.56,12.950465,1643587799999,488764.8137,374,3.971217,149819.6045
2,1643587800000,37782.08,37792.44,37713.05,37726.36,2.429988,1643588099999,91781.5867,150,2.250003,84984.6178
3,1643588100000,37729.12,37797.08,37665.32,37686.17,2.535778,1643588399999,95671.782,152,0.607303,22918.7153
4,1643588400000,37673.0,37729.72,37663.63,37675.18,0.937727,1643588699999,35346.8969,119,0.229773,8659.2036


In [3]:
closePrices = full_df['close']

In [4]:
# Calculate price changes
price_changes = np.diff(closePrices)

print(price_changes)

[-159.83   -3.2   -40.19 ...  -15.05   55.11  -46.33]


In [5]:
# Define bins
bins = range(0,100000,1)
labels = range(0,100000,1)

# Discretize the price changes into states
states = np.digitize(price_changes, bins, right=True) - 1

In [6]:
print(states)

[-1 -1 -1 ... -1 55 -1]


In [6]:
# Number of states
num_states = len(labels)

# Initialize transition matrix
transition_matrix = np.zeros((num_states, num_states))

In [7]:
# Count the transitions
for i in range(len(states) - 1):
    current_state = int(states[i])
    next_state = int(states[i + 1])
    transition_matrix[current_state, next_state] += 1

# Normalize the rows to get probabilities
transition_probabilities = transition_matrix / transition_matrix.sum(axis=1, keepdims=True)

print("Price Changes:", price_changes)
print("States:", states)
print("Transition Matrix (Counts):\n", transition_matrix)
print("Transition Probabilities:\n", transition_probabilities)

: 