In [246]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import os
from os import listdir
from os.path import isfile, join

import datetime
import time

from pathlib import Path

import IPython
import IPython.display
import numpy as np
import pandas as pd
import tensorflow as tf

import random
from random import shuffle

import numpy as np
from IPython.display import display
from bqplot import (
    OrdinalScale, LinearScale, Bars, Lines, Axis, Figure, PanZoom, Toolbar
)

import ipywidgets as widgets
from ipywidgets import Layout
import matplotlib.colors as colors

import pickle

import json

# Given an item and a list, if that item is in the list, returns the appropriate index
def return_idx(item, l):
    i = 0
    for thing in l:
        if item == thing:
            return i
        else:
            i += 1
    # If it's not in the list, then it's failed
    return None

In [254]:
class Graph_Strategies():
    def __init__(self, path):
        self.path = path
        self.path_strat = path + '\strategy_results'

        # If there isn't a strategy_results folder yet, make one
        try:
            os.mkdir(self.path_strat)
        except:
            None
        
        # Load the names of each strategy
        p_files = self.generate_p_files()
        # Load the values of each strategy 
        values = self.values_from_p(p_files)
        # Combine them into a dictionary
        self.strat_dict = {p_files[i]:values[i] for i in range(len(values))}
        
        # Used when you want to graph everything
        self.all_black = False
        
    def generate_p_files(self):
        files  = [f for f in listdir(self.path_strat) if isfile(join(self.path_strat, f))]
        files  = [f for f in files if f.endswith('.p')]
        files.sort()
        return(files)
    
    '''Takes in a list of files, returns a list of list of values'''
    def values_from_p(self, file):
        file_l= []
        for f in file:
            with open(self.path_strat + '\\' + f, 'rb') as read:
                file_l.append(pickle.load(read))
        return file_l
    
    def strat_graph(self, strat_sel):
        # Create a class variable for the selected feature
        self.strat_sel = strat_sel
        
        colors_list = ['f30000', 'f38c00', 'edf300', '7af300', '00f3ce', '0076f3', '9700f3', 'f300c3', '990000', 'd08800', '05aa00', '00a0aa', '7600aa']
        colors_list = [f'#{i}' for i in colors_list]
        
        if self.all_black:
            colors_list = []
            colors_list.append('#f30000')
            for i in range(100):
                colors_list.append('#000000')
        
        self.max_l = 0
        for key in strat_sel:
            value = len(self.strat_dict[key])
            if value > self.max_l:
                self.max_l = value
        
        x_data = list(range(self.max_l))
        y_data = [self.strat_dict[key] for key in strat_sel]
        
            
        x_scale = LinearScale()
        y_scale = LinearScale()

        ax_x = Axis(scale=x_scale, label='Trading Days', grid_lines='solid')
        ax_y = Axis(scale=y_scale, label='Portfolio Total (USD)', orientation='vertical', label_offset='50px', )

        line = [Lines(labels=[strat_sel[i]], x=x_data, y=y_data[i], scales={'x': x_scale, 'y': y_scale}, colors=[colors_list[i]], display_legend=True) for i in range(len(strat_sel))]

        panzoom = PanZoom(scales={'x': [x_scale], 'y': [y_scale]})

        fig = Figure(marks=line, axes=[ax_x, ax_y], title='Comparing Trading Strategies Over the Same Test Set', colors=['red'], legend_location='top-left', legend_text={'font-size':12})
        fig.layout.height = '950px'

        toolbar = Toolbar(figure = fig)

        display(fig, toolbar)
        
    def display_graph(self):
        
        strat_sel = widgets.SelectMultiple(
            options=self.strat_dict.keys(),
            rows=10,
            description='Strategies',
            disabled=False,
            layout=Layout(width='40%')
        )

        widgets.interact(self.strat_graph, strat_sel = strat_sel)

In [255]:
GS = Graph_Strategies(path=r'G:\Shared drives\Max Huffman - ECEN 403 404 URS Research 2020 2021\Datasets')
GS.display_graph()

interactive(children=(SelectMultiple(description='Strategies', layout=Layout(width='40%'), options=('0_Average…

In [249]:
'''Given a folder with entities, graphs all the files'''
class Graph_Time_Series_Data():
    
    def __init__(self, path):
        # The directory where all the entities time_series CSV are stored
        self.path = path
        self.entities, self.entities_idx = self.generate_list_of_entities()
        self.relations = self.generate_relations()
        self.Normalized_Adjacency_Matrix = self.generate_normalized_ajacency_matrix()
    
    '''Returns the list of entities and declares the relationship file if its in the directory'''
    def generate_list_of_entities(self):
        files  = [f for f in listdir(self.path) if isfile(join(self.path, f))]
        ents = [i.replace('.csv','') for i in files if i.endswith('.csv')]
        ents.sort()
        ents_idx = {item:idx for idx, item in enumerate(ents)}
    
        return ents, ents_idx
    
    def generate_relations(self):
        files  = [f for f in listdir(self.path) if isfile(join(self.path, f))]
        relation_file = [i for i in files if i.endswith('.json')]
        
        if len(relation_file) == 1:
            # Load the relationship dictionary
            with open(self.path + '\\' + relation_file[0]) as read_file:
                relations_dict = json.load(read_file)
            
            self.relations_dict = relations_dict
            
        elif len(relation_file) == 0:
            print('Directory does not contain an entity relationship .json file')
            
        else:
            sys.exit('There are multiple .json files in the directory, remove all or leave 1')
    
    '''Given a list of entities, return a list of their values averaged over the time-period'''
    '''Will be useful in detecting when a Neighboring Group moves NOT in accordance with the average'''
    def generate_average_entity(self, entities):
        None
            
    
    '''Generates the normalized adjacency matrix from the relations file'''
    def generate_normalized_ajacency_matrix(self):  
        companies = self.entities
        new_industry_relations = self.relations_dict

        # Iterate through each company ticker and replace it with a tuple that contains its index and ticker
        for key, value in new_industry_relations.items():
            new_value = []
            for v in value:
                new_value.append((return_idx(v, companies), v))
            new_industry_relations[key] = new_value

        # Iterate through each industry relationship and create an N x N adjacency matrix
        # Combine them all to create the final adjacency matrix in the same format as Paper #2
        RR_t = []
        for sector in new_industry_relations.keys():
            # Create an empty relationship matrix
            all_zeroes = tf.zeros([len(companies), len(companies)])
            relation_slice = all_zeroes.numpy()

            # Gather all the companies that exist in this sector
            siblings = new_industry_relations[sector]
            for i in siblings:
                for j in siblings:
                    relation_slice[i[0], j[0]] = 1
                    relation_slice[j[0], i[0]] = 1
            RR_t.append(relation_slice)
            
        RR_tf = tf.constant(RR_t)
        RR_tf = tf.transpose(RR_tf)
        relation_encoding = RR_tf.numpy()
        rel_shape = [relation_encoding.shape[0], relation_encoding.shape[1]]
        mask_flags = np.equal(np.zeros(rel_shape, dtype=int), np.sum(relation_encoding, axis=2))

        ajacent = np.where(mask_flags, np.zeros(rel_shape, dtype=float), np.ones(rel_shape, dtype=float))

        degree = np.sum(ajacent, axis=0)
        for i in range(len(degree)):
            degree[i] = 1.0 / degree[i]
        np.sqrt(degree, degree)
        deg_neg_half_power = np.diag(degree)

        GCN_mat = np.dot(np.dot(deg_neg_half_power, ajacent), deg_neg_half_power)
        
        GCN_mat = np.nan_to_num(GCN_mat)
        GCN_mat = tf.constant(GCN_mat) 
        
        return GCN_mat
    
    '''Returns a list of neighboring entities given an entity, includes the given entity'''
    def return_neighbors(self, ent):
        # List containing the idx of all entities that are neighbors to the given entity
        neighbors = [self.entities[idx] for idx, item in enumerate(self.Normalized_Adjacency_Matrix[self.entities_idx[ent]]) if item > 0]
        return neighbors
    
    
    def clean_csv_files(self):
        
        data_folder = Path(self.path)
        list_of_data_csv_files = [f for f in listdir(data_folder) if isfile(join(data_folder, f))]

        clean_names = [i[7:].replace('_1','') for i in list_of_data_csv_files]

        # Number of companies
        N = len(list_of_data_csv_files)

        # In this example we're cleaning up the csv files that are being read.
        # We don't need the first column or the last row of the provided data set
        df = pd.read_csv(data_folder / list_of_data_csv_files[0], header=None, usecols=[1, 2, 3, 4, 5])
        df.columns = ['Close', '5D MA', '10D MA', '20D MA', '30D MA']

        # Removethe bottom 5 rows and top 1 row to avoid -1234 empty spaces
        df = df[1:-5]

        # Number of sequences
        S = (len(df.index))

        # Number of features (columns)
        D = len(df.columns)

        for stock in range(0, N):
            # Read its CSV values, skipping the index column and the last row which has -1234 as a value
            df = pd.read_csv(data_folder / list_of_data_csv_files[stock], header=None, usecols=[1, 2, 3, 4, 5], engine='python')
            df.columns = ['Close', '5D MA', '10D MA', '20D MA', '30D MA']

            # Removethe bottom 5 rows and top 1 row to avoid -1234 empty spaces
            df = df[1:-5]
            
            df.to_csv(data_folder / clean_names[stock], index=False)
            
        
    def entity_graph(self, sel_feature, x_range, n_range, show_rel):
        # Create a class variable for the selected feature
        self.sel_feature = sel_feature
        self.x_range = x_range
        self.show_rel = show_rel
        self.n_range = n_range
        
        # Create some colors for the graphed lines to cycle through
        colors_list = [hex(0x8A0808 - i *0xB0D0) for i in range(50)]
        colors_list = [str(i) for i in colors_list]
        colors_list = ['#' + i[2:] for i in colors_list]
        shuffle(colors_list)
        colors_list.insert(0, '#f30000')
        
        # There will always be one entity selected, so it sets the X-axis
        ent_df = pd.read_csv(self.path + '\\' + self.sel_ent + ".csv")
        x_data = list(range(len(ent_df[ent_df.columns[self.feature_key[self.sel_feature]]].values)))[x_range[0]:x_range[1]]
        
        if not self.show_rel:
            y_data = ent_df[ent_df.columns[self.feature_key[self.sel_feature]]].values
        else:
            # Key names for the legned
            keys = self.return_neighbors(self.sel_ent)
            # List of DataFrames containing all the entities related to each other
            list_of_dfs = [pd.read_csv(self.path + '\\' + entity + ".csv") for entity in keys]
            # List of values from those DataFrames
            values = [df[df.columns[self.feature_key[self.sel_feature]]].values for df in list_of_dfs]
            
        x_scale = LinearScale()
        y_scale = LinearScale()

        ax_x = Axis(scale=x_scale, label='Time Steps', grid_lines='solid')
        ax_y = Axis(scale=y_scale, label='Value', orientation='vertical', label_offset='50px', )

        if not self.show_rel:
            line = [Lines(labels=[self.sel_ent], x=x_data, y=y_data, scales={'x': x_scale, 'y': y_scale}, display_legend=True)]
        else:
            n_range_of_entities = list(range(len(values)))[n_range[0]:n_range[1]]
            n_range_of_entities.insert(0,0)
            line = [Lines(labels=[keys[i]], x=x_data, y=values[i], scales={'x': x_scale, 'y': y_scale}, colors=[colors_list[i]], display_legend=True) for i in n_range_of_entities]

        panzoom = PanZoom(scales={'x': [x_scale], 'y': [y_scale]})
        

        fig = Figure(marks=line, axes=[ax_x, ax_y], title='Value of Entity(s) Over Time', colors=['red'], legend_location='top-left')
        fig.layout.height = '850px'

        toolbar = Toolbar(figure = fig)

        display(fig, toolbar)
        
    def display_graph(self):
        
        
        # Create a dropdown menu to select which entity you would like to view
        ent_drop = widgets.Dropdown(
            options = self.entities,
            description = 'Entities: ',
            layout=Layout(justify_content='flex-start')
        )
        
        
        
        def sel_feature(sel_ent):
            # Declare as a class variables
            self.sel_ent = sel_ent
            
            # Load in the data for the selected entity
            ent_df = pd.read_csv(self.path + '\\' + self.sel_ent + ".csv")
            # Create a dropdown menu to select which feature you would like to view
            ent_features = [f'{i}' for i in ent_df.columns]
            
            self.feature_key = {name:i for i, name in enumerate(ent_df.columns)}
            
            feat_drop = widgets.Dropdown(
                options = ent_features,
                description = 'Features: ',
                layout=Layout(justify_content='flex-end')
                
            )
            
            # Changes the default slider values to be the previous iteration if the main entity is changed
            try:
                start = self.x_range[0]
                end = self.x_range[1]
            except:
                start = 0
                end = 300
            
            x_range = widgets.IntRangeSlider(
                value=[start, end],
                min=0,
                max=ent_df.shape[0],
                step=10,
                description='Time Range:',
                continuous_update=False,
                orientation='horizontal',
                readout=True,
                readout_format='d',
                layout=Layout(width='100%', align_items='stretch')
            )
            
            
            n_range = widgets.IntRangeSlider(
                value=[0, 5],
                min=1,
                max=len(self.return_neighbors(sel_ent)),
                step=1,
                description='Neighbors:',
                continuous_update=False,
                orientation='horizontal',
                readout=True,
                readout_format='d',
                layout=Layout(width='40%', align_items='stretch')
            )
            
            # Create a checkbox boolean
            show_rel = widgets.Checkbox(
                value=False,
                description = 'Show Neighbors',
                disabled=False,
            )
            
            widgets.interact(self.entity_graph, sel_feature = feat_drop, x_range = x_range, n_range = n_range, show_rel = show_rel)
               
        widgets.interact(sel_feature, sel_ent = ent_drop)
        

In [250]:
test = Graph_Time_Series_Data(path=r'C:\Users\Maxwell\PycharmProjects\TAMU-ECEN-403-IFPTSND\Temporal_Relational_Stock_Ranking-master\data\2013-01-01 - NASDAQ - Demo')
test.display_graph()

  degree[i] = 1.0 / degree[i]


interactive(children=(Dropdown(description='Entities: ', layout=Layout(justify_content='flex-start'), options=…

In [245]:
GCN = test.Normalized_Adjacency_Matrix.numpy()
GCN_t = np.transpose(GCN)

# for i in range(100):
#     sumf = 0
#     for n in GCN[i]:
#         sumf = sumf + n
#     print(sumf)

print(GCN_t[0])

[0.03125 0.      0.      0.      0.      0.      0.      0.      0.
 0.      0.      0.      0.      0.      0.03125 0.      0.      0.03125
 0.      0.      0.03125 0.      0.      0.      0.      0.      0.
 0.      0.      0.      0.      0.03125 0.      0.      0.      0.
 0.      0.      0.      0.      0.      0.      0.      0.      0.
 0.      0.      0.      0.      0.      0.      0.      0.      0.
 0.      0.      0.      0.      0.      0.      0.      0.      0.
 0.      0.      0.      0.      0.      0.      0.      0.      0.
 0.      0.      0.      0.      0.      0.      0.      0.      0.
 0.      0.      0.      0.      0.      0.03125 0.      0.      0.
 0.      0.      0.      0.      0.      0.      0.      0.      0.
 0.      0.      0.      0.      0.      0.      0.      0.      0.
 0.      0.      0.      0.      0.      0.      0.      0.      0.
 0.      0.      0.      0.      0.      0.      0.      0.      0.
 0.      0.      0.      0.      0.      0.