In [23]:
# Jupyter Notebook libraries
from IPython.display import display
from ipywidgets import widgets

# Python native libraries
import datetime as dt
# from path import Path
from time import sleep
import sqlalchemy

# Conda libraries
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from bokeh.models import HoverTool

# External libraries
# import mplfinance as mpf
import hvplot.pandas

# Local libraries
from utils.alpaca import alpaca as alp

In [24]:
output = widgets.Output()
log_output = widgets.Output()
display(output)

Output()

In [25]:
database_connection_string = 'sqlite:///market_db.db'
engine = sqlalchemy.create_engine(database_connection_string)
exchange_list = sqlalchemy.inspect(engine).get_table_names()

In [26]:
stock_dict = {}
scaled_dict = {}
for exchange in exchange_list:
    stock_dict[exchange] = pd.read_sql_table(table_name=exchange, con=engine, index_col='index').drop(columns=['close']).dropna()

    # Convert column name type from sqlalchemy object to string
    new_col_list = []
    column_list = stock_dict[exchange].columns
    for c in column_list:
        # display(c, type(c))
        new_col_list.append(str(c))
    stock_dict[exchange].columns = new_col_list   

    # Scale datasets using sklearn StandardScaler
    scaled_data = StandardScaler().fit_transform(stock_dict[exchange])
    col_list = stock_dict[exchange].columns
    scaled_df = pd.DataFrame(
        scaled_data,
        columns=col_list,
    )
    scaled_df['ticker'] = stock_dict[exchange].index
    scaled_df = scaled_df.set_index('ticker')
    scaled_dict[exchange] = scaled_df


In [27]:
exchange_menu = widgets.Dropdown(description="Exchange", options=exchange_list)
with output:
    display(exchange_menu)

In [28]:
def gen_exchange_detail(exchange):
    display(scaled_dict[exchange])
    pass

In [29]:
scaled_data_output = widgets.interactive_output(
    gen_exchange_detail,
    {
        'exchange': exchange_menu,
    }
)

In [30]:
display(log_output)

Output()

In [31]:
pca_dict = {}
def gen_pca(exchange, pca=None):
    pca_max = len(scaled_dict[exchange].columns)
    if pca is None:
        pca = pca_max

    pca_slider.max = pca_max
    with log_output:
        display(f"max pca: {pca_max}")
    pca_list = []
    for i in range(pca):
        pca_list.append(f'PC{i}')
    with log_output:
        display(pca_list)

    pca = PCA(n_components=pca, random_state=0)
    market_pca_data = pca.fit_transform(scaled_dict[exchange])
    pca_df = pd.DataFrame(market_pca_data, columns=pca_list)
    with log_output:
        display("PCA DF:", pca_df)
    # Copy the crypto names from the original data
    pca_df['ticker'] = scaled_dict[exchange].index
    # Set the coinid column as index
    pca_df.set_index('ticker', inplace=True)   

    global pca_dict
    pca_dict[exchange] = pca_df 

    exp_var = pca.explained_variance_ratio_
    variance_string = f'{exp_var.sum()*100:.2f}%'
    pca_variance.value = (variance_string)
    # display(scaled_dict[exchange])
    pass

In [32]:
pca_slider = widgets.IntSlider(value=3, min=2, max=len(scaled_dict[exchange_menu.value].columns))
pca_variance = widgets.Label()
pca_output = widgets.interactive_output(
    gen_pca,
    {
        'exchange': exchange_menu,
        'pca': pca_slider,
    }
)
pca_tab = widgets.HBox([pca_slider, pca_variance])

In [33]:
for e in exchange_list:
    pca_dict[e] = gen_pca(e)


'max pca: 6'

'max pca: 6'

In [34]:
display(pca_dict)

{'AMEX': None, 'NASDAQ': None, 'NYSE': None}

In [35]:
k = list(range(1,12))
elbow_dicts = {}
exchange = exchange_menu.value
inertia=[]
for i in k:
    model = KMeans(n_clusters=i, random_state=0)
    model.fit(pca_dict[exchange])
    inertia.append(model.inertia_)
elbow_dict = {
    'k': k,
    'inertia': inertia,
}
# Create a DataFrame with the data to plot the Elbow curve
elbow_df = pd.DataFrame(elbow_dict)
elbow_dicts[exchange] = elbow_df


ValueError: Expected 2D array, got scalar array instead:
array=nan.
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [None]:
tab_titles = ['Scaled Data', 'PCA']
children = [scaled_data_output, pca_tab ]
tab = widgets.Tab()
tab.children = children
for i, t in enumerate(tab_titles):
    tab.set_title(i, t)

In [None]:
with output:
    display(tab)