# Industry Relation

In [53]:
import numpy as np

In [54]:
# Set the stock market (NASDAQ or NYSE)
market = "NASDAQ"

In [55]:
a = np.load("../Temporal_Relational_Stock_Ranking/data/relation/sector_industry/{}_industry_relation.npy".format(market))

In [56]:
a.shape

(1026, 1026, 97)

## Consistency Tests

Consider R(A, B) is the relation vector between stock A and B. The relation vector has size `num_relations` and value 1 in the index of the type of relation. So a relation vector must be something like this: `[1 0 ... 0]` with all but one value of the vector being 0.

For some reason, the relation vector of the same stock always has its last value as 1, so R(A, A) might have the following values, in this case: `[1 0 ... 0 1].` 

### Apparently, the last relation type is always 0 in R(A,B) relations

In [57]:
for i in range(a.shape[0]):
    for j in range(a.shape[1]):
        if a[i][j][-1] == 1 and i != j:
            print("ERROR: last relation type value 1 in R(A,B)")

### The last relation type is always 1 in R(A,A) relations

In [58]:
for i in range(a.shape[0]):
        if a[i][i][-1] == 0:
            print("ERROR: last relation type value 0 in R(A,B)")

### R(A, A) needs to have at least one value 1

Since the stock is of the same industry sector of itself and it must have at least one industry sector.

In [59]:
for i in range(a.shape[0]):
    sum = np.sum(a[i][i])
    if sum <= 0:
        print("ERROR: condition not met!")

### R(A, B) must have only 1 relation type

In [60]:
for i in range(a.shape[0]):
    for j in range(a.shape[1]):
        if np.sum(a[i][j]) > 1 and i != j:
            print("ERROR: 2 relations in R(A,B)")

### R(A, B) = R(B, A)

Since the order shouldn't matter: if A is of the same industry sector than B, the opposite is also true.

In [61]:
for i in range(a.shape[0]):
    for j in range(a.shape[1]):
        equal = np.array_equal(a[i][j], a[j][i])
        if not equal:
            print("ERROR: Not equal!")

### R(A, B) ⊂ R(A, A)

Since R(A,B) is basically the intersection of the industry sectors of both A and B.

In [62]:
for i in range(a.shape[0]):
    for j in range(a.shape[1]):
        result_1 = a[i][j] * a[i][i]
        result_2 = a[j][i] * a[i][i]
        if not np.array_equal(result_1, a[i][j]):
            print("ERROR: Not equal!")
        if not np.array_equal(result_2, a[j][i]):
            print("ERROR: Not equal!")

## Conversion to edge_index

In [63]:
# function to add edge to edge_index
def add_edge(edge_index, orig_node, dest_node, undirected=True):
    array_to_add = np.array([[orig_node], [dest_node]])
    if edge_index is None:
        new_edge_index = array_to_add.copy()
    else:
        new_edge_index = np.hstack((edge_index, array_to_add))
    if undirected:
        array_to_add = np.array([[dest_node], [orig_node]])
        new_edge_index = np.hstack((new_edge_index, array_to_add))
    return new_edge_index

In [64]:
# loop array
edges = 0
edge_index = None
edge_type = []

# since sector industry data is commutative (R(A, B) = R(B, A)),
# we dont need to loop through all the the matrix. We are also
# disconsidering edges from R(A, A).
for i in range(a.shape[0]):
    for j in range(i + 1, a.shape[1]):
        if a[i][j].sum() > 0:
            edge_index = add_edge(edge_index, i, j)
            type_ = np.where(a[i][j] == 1)[0][0]
            # append twice because two items are added to
            # edge_index (undirected graph).
            edge_type.append(type_)
            edge_type.append(type_)
            edges += 1

edge_type = np.array(edge_type)

print(edges)
print(edge_index.shape)
print(edge_type.shape)
print(np.max(edge_index))

26293
(2, 52586)
(52586,)
1025


## Save

In [65]:
np.save("../relational_data/edge_indexes/{}_sector_industry_edge_index.npy".format(market), edge_index)
np.save("../relational_data/edge_types/{}_sector_industry_edge_type.npy".format(market), edge_type)

## Visualization

In [19]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor(edge_index, dtype=torch.long)
x = torch.zeros(1, a.shape[0])

data = Data(x=x, edge_index=edge_index, num_nodes=a.shape[0])

In [20]:
import networkx as nx
from torch_geometric.utils import to_networkx

g = to_networkx(data, to_undirected=True)
nx.write_gexf(g, "../relational_data/gephi_visualizations/{}_sector_industry.gexf".format(market))