In [1]:
import pandas as pd
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from torch.nn.functional import sigmoid, relu

### Load Data

In [2]:
with open('1016data/sem_items.txt','r') as fid:
    names_items = np.array([l.strip() for l in fid.readlines()])
with open('1016data/sem_relations.txt','r') as fid:
    names_relations = np.array([l.strip() for l in fid.readlines()])
with open('1016data/sem_attributes.txt','r') as fid:
    names_attributes = np.array([l.strip() for l in fid.readlines()])
        
nobj = len(names_items)
nrel = len(names_relations)
nattributes = len(names_attributes)
print(f'num of objects: {nobj}')
print(f'num of relations: {nrel}')
print(f'num of attributes: {nattributes}')

num of objects: 75
num of relations: 14
num of attributes: 218


In [3]:
D = np.loadtxt('1016data/sem_data.txt')
input_pats = D[:,:nobj+nrel]
input_pats = torch.tensor(input_pats,dtype=torch.float)
output_pats = D[:,nobj+nrel:]
output_pats = torch.tensor(output_pats,dtype=torch.float)
N = input_pats.shape[0] # number of training patterns
print(f'num of training examples: {N}')

input_v = input_pats[0,:].numpy().astype('bool')
output_v = output_pats[0,:].numpy().astype('bool')
print('Example input pattern:')
print(input_v.astype('int'))
print('Example output pattern:')
print(output_v.astype('int'))
print("")
print("Which encodes...")
print('Item ',end='')
print(names_items[input_v[:75]])
print('Relation ',end='')
print(names_relations[input_v[75:]])
print('Attributes ',end='')
print(names_attributes[output_v])
print(f'num of attributes: {nattributes}')

num of training examples: 250
Example input pattern:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
Example output pattern:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Which encodes...
Item ['exercise']
Relation ['hasprerequisite']
Attributes ['danc' 'jog' 'lift weight' 'move' 'muscl' 'run']
num of attributes: 218


In [4]:
class Net(nn.Module):
    def __init__(self, rep_size, hidden_size, layer_1_size, hidden_rel_size):
        super(Net, self).__init__()
        # Input
        #  layer_size: number of units before representation layer
        #  rep_size : number of hidden units in "Representation Layer"
        #  hidden_Size : number of hidden units in "Hidden Layer"
        #
        # TODO : YOUR CODE GOES HERE
        self.itol1 = nn.Linear(nobj, layer_1_size)
        self.l1tor = nn.Linear(layer_1_size, rep_size)
        self.reltohr = nn.Linear(nrel, hidden_rel_size)
        self.cattoh = nn.Linear(rep_size+hidden_rel_size, hidden_size)
        self.htoa = nn.Linear(hidden_size, nattributes)

    def forward(self, x):
        # Defines forward pass for the network on input patterns x
        #
        # Input can take these two forms:
        #
        #   x: [nobj+nrel 1D Tensor], which is a single input pattern as a 1D tensor
        #      (containing both object and relation 1-hot identifier) (batch size is B=1)
        #   OR
        #   x : [B x (nobj+nrel) Tensor], which is a batch of B input patterns (one for each row)
        #
        # Output
        #   output [B x nattribute Tensor], which is the output pattern for each input pattern B on the Attribute Layer
        #   hidden [B x hidden_size Tensor], which are activations in the Hidden Layer
        #   rep [B x rep_size Tensor], which are the activations in the Representation LAyer
        x = x.view(-1,nobj+nrel) # reshape as size [B x (nobj+nrel) Tensor] if B=1
        x_pat_item = x[:,:nobj] # input to Item Layer [B x nobj Tensor]
        x_pat_rel = x[:,nobj:] # input to Relation Layer [B x nrel Tensor]
        # TODO : YOUR CODE GOES HERE
        layer1 = relu(self.itol1(x_pat_item))
        rep = relu(self.l1tor(layer1))
        rel = relu(self.reltohr(x_pat_rel))
        reprel = torch.cat((rep, rel), dim=1)
        hidden = relu(self.cattoh(reprel))
        output = self.htoa(hidden)
        return output, hidden, rep

In [5]:
mynet = Net(rep_size=75,hidden_size=200, layer_1_size=75, hidden_rel_size=30)
PATH = 'model_complex.pt'
mynet.load_state_dict(torch.load(PATH))
mynet.eval()

Net(
  (itol1): Linear(in_features=75, out_features=75, bias=True)
  (l1tor): Linear(in_features=75, out_features=75, bias=True)
  (reltohr): Linear(in_features=14, out_features=30, bias=True)
  (cattoh): Linear(in_features=105, out_features=200, bias=True)
  (htoa): Linear(in_features=200, out_features=218, bias=True)
)

In [6]:
df = pd.read_csv('bio_concepts.csv')
df.head(5)

Unnamed: 0,relation,head,tail
0,hasprerequisite,exercise,run
1,hassubevent,exercise,lose weight
2,isa,mammal,person
3,atlocation,food,restur
4,atlocation,food,cupboard


### Explore df

In [7]:
df_isa = df[df['relation']=='isa']
groupbyhead = df_isa.groupby(['head'])
counter = groupbyhead['tail'].agg([lambda x: len(x)])
counter.columns=['# isa']
top5 = counter.sort_values(by='# isa',ascending=False).iloc[:5]
top5

Unnamed: 0_level_0,# isa
head,Unnamed: 1_level_1
food,8
animal,7
alcohol,6
mammal,4
brain,4


In [8]:
filt = (df['head']=='alcohol') & (df['relation']=='isa')
df[filt]

Unnamed: 0,relation,head,tail
88,isa,alcohol,liquid
90,isa,alcohol,fuel
99,isa,alcohol,depress
116,isa,alcohol,sed pain-kil
175,isa,alcohol,drink
228,isa,alcohol,sometim


### One-Hot Encoding

In [9]:
head_index = {}
for idx, i in enumerate(names_items):
    head_index[i]=idx
    
tail_index = {}
for idx, i in enumerate(names_attributes):
    tail_index[i]=idx
    
relation_index = {}
for idx, i in enumerate(names_relations):
    relation_index[i]=idx
    
assert len(head_index)==75
assert len(tail_index)==218
assert len(relation_index)==14

In [10]:
def one_hot_encoding(head, relation, tail):
    vector = np.zeros(75+14)
    
    h_idx = head_index[head]
    r_idx = relation_index[relation]+75
    t_idx = tail_index[tail]  # used for identify the index of attributes
    
    vector[h_idx] = 1
    vector[r_idx] = 1
    vector = torch.tensor(vector,dtype=torch.float)
    return vector, t_idx

### Random Noise to Represent Dementia

In [11]:
def add_random_noise_hook(noisy_coeff, seed):
    def hook(module, input, output):
        # Get the activations of the layer
        linear_out = output.data

        # Generate random noise with the same shape as the activations, scaled by noisy_coeff
        torch.manual_seed(seed)
        noise = torch.randn_like(linear_out) * noisy_coeff

        # Add the noise to the activations
        noisy_linear_out = linear_out + noise
        output.data = noisy_linear_out
    
    return hook

In [12]:
def dementia(head, relation, tail, noisy_coeff_lst, seed):
    prediction_lst = []
    input_v, output_idx = one_hot_encoding(head, relation, tail)
    for noisy_coeff in noisy_coeff_lst:
        rep_layer = mynet.l1tor
        handle = rep_layer.register_forward_hook(add_random_noise_hook(noisy_coeff, seed))
        pred = sigmoid(mynet(input_v)[0]).detach().numpy().reshape(218)[output_idx]
        print(f'with noise level {noisy_coeff}, sigmoid output is {pred}')
        prediction_lst.append(pred)
        try: 
            handle.remove()
        except:
            print('handle removal failed')
    return prediction_lst

In [13]:
head = 'alcohol'
relation = 'isa'
tail = 'liquid'
seed = 5
noisy_coeff_lst = [0,0.5,1,1.5,2]

prediction_lst = dementia(head, relation, tail, noisy_coeff_lst, seed)
print(prediction_lst)

with noise level 0, sigmoid output is 0.9979329109191895
with noise level 0.5, sigmoid output is 0.9970124959945679
with noise level 1, sigmoid output is 0.9899798631668091
with noise level 1.5, sigmoid output is 0.9280300140380859
with noise level 2, sigmoid output is 0.2935791015625
[0.9979329, 0.9970125, 0.98997986, 0.92803, 0.2935791]


In [14]:
head = 'alcohol'
relation = 'isa'
tail = 'sed pain-kil'
seed = 5
noisy_coeff_lst = [0,0.5,1,1.5,2]

prediction_lst = dementia(head, relation, tail, noisy_coeff_lst, seed)
print(prediction_lst)

with noise level 0, sigmoid output is 0.9997372031211853
with noise level 0.5, sigmoid output is 0.7836465239524841
with noise level 1, sigmoid output is 0.0010879221372306347
with noise level 1.5, sigmoid output is 2.0027208336159674e-07
with noise level 2, sigmoid output is 8.008195495623482e-11
[0.9997372, 0.7836465, 0.0010879221, 2.0027208e-07, 8.0081955e-11]
