In [6]:
import pandas as pd
import seaborn as sns
import scipy.stats as st
import numpy as np
import matplotlib.pyplot as plt
import urllib.request
import os
from scipy.optimize import fsolve
import math

from bayesian.train_bn import structure_learning, parameter_learning
from preprocess.discretization import get_nodes_type, discretization, code_categories, get_nodes_sign
from bayesian.save_bn import save_structure, save_params, read_structure, read_params
from bayesian.sampling import generate_synthetics
from bayesian.calculate_accuracy import calculate_acc
from external.libpgm.hybayesiannetwork import HyBayesianNetwork
from visualization.visualization import draw_BN

from functools import partial



In [7]:
data = pd.read_csv('../btc2.csv').drop(['dt'],axis=1).dropna()
for col in ['quantile_Volume','quantile_RSI','quantile_STOCHRSI-K','quantile_STOCHRSI-D']:
    col_map = {}

    for value,index in enumerate(data[col].unique()):
        col_map[index] = value

    data[col] = data[col].map(col_map)



In [8]:
targets = ['MACD','Volume','RSI']
data.corr()[targets]

Unnamed: 0,MACD,Volume,RSI
Open,0.175072,-0.163601,0.12921
High,0.170554,-0.146901,0.135135
Low,0.185774,-0.190688,0.145587
Close,0.177824,-0.167481,0.14713
Volume,-0.26266,1.0,-0.170368
STOCHRSI-K,0.076995,-0.049003,0.579636
STOCHRSI-D,0.141531,-0.046398,0.546452
RSI,0.717709,-0.170368,1.0
MACD,1.0,-0.26266,0.717709
MACD Signal,0.951774,-0.234993,0.540135


In [5]:
categories = ['month','quantile_STOCHRSI-D','quantile_STOCHRSI-K','quantile_RSI','quantile_Volume']
cont = [x for x in data.columns if x not in categories]


for column in categories:
    data[column] = data[column].apply(lambda x: str(x))

nodes_type = get_nodes_type(data)
nodes_sign = get_nodes_sign(data)


coded_data, coder = code_categories(
    data, 'label', categories )

discrete_data, coder = discretization(coded_data, 'equal_frequency',
                                      cont)

In [None]:
bayes_manual = dict(
    {'V': list(data.columns.values)],
     'E': [
            ['MACD Signal','MACD']
            ['RSI','MACD'],
            ['MACD Diff','MACD'],
            ['month','MACD'],
            ['Low','MACD'],
            ['Open','MACD'],
            ['High','MACD'],
            ['quantile_RSI','MACD'],

            ['month','Volume'],
            ['MACD Signal','Volume'],
            ['MACD','Volume'],
            ['RSI','Volume'],
            ['Low','Volume'],
            ['Open','Volume'],
            ['Close','Volume'],
            ['quantile_Volume','Volume'],

            
            ['quantile_STOCHRSI-D','RSI'],
            ['quantile_STOCHRSI-K','RSI'],
            ['quantile_RSI','RSI'],
            ['STOCHRSI-K','RSI'],
            ['STOCHRSI-D','RSI'],
            ['Open','RSI'],
            ['MACD Signal','RSI'],
            
    ]
    }
)

# draw_BN(bayes_manual, nodes_type, 's&p_bayes_manual')

params = parameter_learning(data, nodes_type, bayes_manual, 'simple')
save_structure(bayes_manual, 'bayes_manual_structure')
skel = read_structure('bayes_manual_structure')
save_params(params, 'bayes_manual_params')
params = read_params('bayes_manual_params')
bayes_manual = HyBayesianNetwork(skel, params)
synth = generate_synthetics(bayes_manual, nodes_sign, 'simple', 800)

fix, ax = plt.subplots(3, 1, figsize=(10, 15))

sns.distplot(data['RSI'], ax=ax[0])
sns.distplot(synth['RSI'], ax=ax[0])
ax[0].legend(['real', 'synthesized'])

sns.distplot(data['MACD'], ax=ax[1])
sns.distplot(synth['MACD'], ax=ax[1])
ax[1].legend(['real', 'syntesized'])

sns.distplot(data['Volume'], ax=ax[2])
sns.distplot(synth['Volume'], ax=ax[2])
ax[2].legend(['real', 'synthesized'])

print(calculate_acc(bayes_manual, data, target, 'simple')[1])

In [14]:

bayes_hc = structure_learning(data, 'HC', nodes_type, 'K2')
draw_BN(bayes_hc, nodes_type, 's&p_bayes_hc')


params = parameter_learning(discrete_data, nodes_type, bayes_hc, 'simple')
save_structure(bayes_hc, 'bayes_hc_structure')
skel = read_structure('bayes_hc_structure')
save_params(params, 'bayes_hc_params')
params = read_params('bayes_hc_params')
bayes_hc = HyBayesianNetwork(skel, params)
synth_df = generate_synthetics(bayes_hc, nodes_sign, 'simple', 500)



fix, ax = plt.subplots(3, 1, figsize=(10, 15))

sns.distplot(data['RSI'], ax=ax[0])
sns.distplot(synth['RSI'], ax=ax[0])
ax[0].legend(['real', 'synthesized'])

sns.distplot(data['MACD'], ax=ax[1])
sns.distplot(synth['MACD'], ax=ax[1])
ax[1].legend(['real', 'syntesized'])

sns.distplot(data['Volume'], ax=ax[2])
sns.distplot(synth['Volume'], ax=ax[2])
ax[2].legend(['real', 'synthesized'])

print(calculate_acc(bayes_hc, data[:10], targets,'simple')[1])


In [None]:
bayes_evo = structure_learning(discrete_data, 'evo', nodes_type, 'MI')
draw_BN(bayes_evo, nodes_type, 's&p_bayes_evo')

params = parameter_learning(data, nodes_type, bayes_evo, 'simple')
save_structure(bayes_evo, 'bayes_evo_structure')
skel = read_structure('bayes_evo_structure')
save_params(params, 'bayes_evo_params')
params = read_params('bayes_evo_params')
bayes_evo = HyBayesianNetwork(skel, params)
synth_df = generate_synthetics(bayes_evo, nodes_sign, 'simple', 500)


fix, ax = plt.subplots(3, 1, figsize=(10, 15))

sns.distplot(data['RSI'], ax=ax[0])
sns.distplot(synth['RSI'], ax=ax[0])
ax[0].legend(['real', 'synthesized'])

sns.distplot(data['MACD'], ax=ax[1])
sns.distplot(synth['MACD'], ax=ax[1])
ax[1].legend(['real', 'syntesized'])

sns.distplot(data['Volume'], ax=ax[2])
sns.distplot(synth['Volume'], ax=ax[2])
ax[2].legend(['real', 'synthesized'])


print(calculate_acc(bayes_evo, data[:10], targets, 'simple')[0:2])