In [None]:
# ライブラリの読み込み
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot  as plt
import torch
import torch.nn as nn
import torch.optim as optimizers
import torch.nn.functional as F 
import torchtext
import random

np.random.seed(9837)
torch.manual_seed(9837)
pd.set_option("display.max_rows", 250)
pd.set_option("display.max_columns", 100)

In [None]:
# 多項分布の乱数を生成する関数
def rmnom(pr, n, k, pattern):
    if pattern==1:
        z_id = np.array(np.argmax(np.cumsum(pr, axis=1) >= np.random.uniform(0, 1, n)[:, np.newaxis], axis=1), dtype="int")
        Z = np.diag(np.repeat(1, k))[z_id, ]
        return z_id, Z
    z_id = np.array(np.argmax((np.cumsum(pr, axis=1) >= np.random.uniform(0, 1, n)[:, np.newaxis]), axis=1), dtype="int")
    return z_id

# データの生成

## 学習用の入力データの定義

In [None]:
# データの生成
# パラメータ数を定義
syntax = 8
max_seq = 100
mask_id = 0
k1 = 25
k2 = 15
k = k1 + k2
d = 5000
v1 = 800
v2 = 200
v = v1 + v2

# 文書に対するidを定義
min_word = 2
min_phrase = 3
min_pt = 5
pt = np.random.poisson(np.random.gamma(10.0, 1.0, d), d)
pt[pt < min_pt] = min_pt
L = np.sum(pt)
d_id = np.repeat(np.arange(d), pt) 
sentence_id = np.arange(L)
phrase_id = np.zeros((L, max_seq), dtype="int")
phrase_no = np.zeros((L, max_seq), dtype="int")
word_no = np.zeros((L, max_seq), dtype="int")
n = np.repeat(0, L)

# 文章ごとにフレーズのidを作成
for i in range(L):
    ph = np.random.poisson(np.random.gamma(17.5, 0.5, 1), 1)
    ph[ph < min_phrase] = min_phrase
    w = np.random.poisson(np.random.gamma(75.0, 0.05, ph), ph)
    w[w < min_word] = min_word
    if i==0:
        phrase = np.repeat(np.arange(ph), w)[:max_seq] + 1
    else:
        phrase = np.repeat(np.arange(ph), w)[:max_seq] + max_phrase + 1
    max_phrase = np.max(phrase)
    n[i] = phrase.shape[0]
    phrase_id[i, :n[i]] = phrase
    phrase_no[i, :n[i]] = phrase - np.min(phrase) + 1
    word_no[i, :n[i]] = np.arange(n[i]) + 1
    
# 文書の統計量
max_phrase_no = np.max(phrase_no) 
max_word_no = np.max(word_no) 
pt = np.array([np.sum(n[d_id==i]) for i in range(d)])
M = np.max(phrase_id) - 1
N = np.sum(n)

In [None]:
# 単語を生成
# パラメータを定義
topic = 50
topic_arange = np.arange(topic)
theta = np.random.dirichlet(np.repeat(0.5, topic), d)
phi = np.random.dirichlet(np.repeat(0.025, v), topic)

# 文章ごとに単語を生成
word_id = np.zeros((L, max_seq), dtype="int")
for i in range(L):
    if i%10000==0:
        print(i)
    z = np.dot(np.random.multinomial(1, theta[d_id[i], ], n[i]), topic_arange)
    word_id[i, :n[i]] = rmnom(phi[z, ], n[i], topic, 0) + 1

## 検証用の入力データを定義

In [None]:
# データの生成
# 文書に対するidを定義
d0 = 2500
pt0 = np.random.poisson(np.random.gamma(10.0, 1.0, d0), d0)
pt0[pt0 < min_pt] = min_pt
L0 = np.sum(pt0)
d_id0 = np.repeat(np.arange(d0), pt0) 
sentence_id0 = np.arange(L0)
phrase_id0 = np.zeros((L0, max_seq), dtype="int")
phrase_no0 = np.zeros((L0, max_seq), dtype="int")
word_no0 = np.zeros((L0, max_seq), dtype="int")
n0 = np.repeat(0, L0)

# 文章ごとにフレーズのidを作成
for i in range(L0):
    ph = np.random.poisson(np.random.gamma(17.5, 0.5, 1), 1)
    ph[ph < min_phrase] = min_phrase
    w = np.random.poisson(np.random.gamma(75.0, 0.05, ph), ph)
    w[w < min_word] = min_word
    if i==0:
        phrase = np.repeat(np.arange(ph), w)[:max_seq] + 1
    else:
        phrase = np.repeat(np.arange(ph), w)[:max_seq] + max_phrase + 1
    max_phrase = np.max(phrase)
    n0[i] = phrase.shape[0]
    phrase_id0[i, :n0[i]] = phrase
    phrase_no0[i, :n0[i]] = phrase - np.min(phrase) + 1
    word_no0[i, :n0[i]] = np.arange(n0[i]) + 1
    
# 文書の統計量
max_phrase_no0 = np.max(phrase_no0) 
max_word_no0 = np.max(word_no0) 
pt0 = np.array([np.sum(n0[d_id0==i]) for i in range(d0)])
M0 = np.max(phrase_id0) - 1
N0 = np.sum(n0)

In [None]:
# 単語を生成
# パラメータを定義
topic_arange = np.arange(topic)
theta = np.random.dirichlet(np.repeat(0.5, topic), d0)

# 文章ごとに単語を生成
word_id0 = np.zeros((L0, max_seq), dtype="int")
for i in range(L0):
    if i%10000==0:
        print(i)
    z = np.dot(np.random.multinomial(1, theta[d_id0[i], ], n0[i]), topic_arange)
    word_id0[i, :n0[i]] = rmnom(phi[z, ], n0[i], topic, 0) + 1

## パラメータと応答変数を生成

In [None]:
# パラメータの生成
# 事前分布の定義
fearture = 1024
alpha = np.repeat(0.0, k)
alpha_f1 = np.repeat(0.0, fearture)
alpha_f2 = np.repeat(0.0, k)
Cov_v1 = np.diag(np.append(np.repeat(0.25, k1), np.repeat(0.01, k2)))
Cov_v2 = np.diag(np.append(np.repeat(0.01, k1), np.repeat(0.25, k2)))
Cov_g = np.diag(np.repeat(0.25, k))
Cov_w = np.diag(np.repeat(0.25, k))
Cov_f1 = np.diag(np.repeat(0.2, fearture))
Cov_f2 = np.diag(np.repeat(0.2, k))
Cov = np.diag(np.repeat(0.1, k))

# 埋め込みベクトルを定義
theta_mask = np.repeat(0.0, k)
theta_v1 = np.random.multivariate_normal(alpha, Cov_v1, v1)
theta_v2 = np.random.multivariate_normal(alpha, Cov_v2, v2)
theta_v = np.vstack((theta_mask, theta_v1, theta_v2))
theta_w = np.vstack((theta_mask, np.random.multivariate_normal(theta_mask, Cov_w, max_word_no)))
theta_g = np.vstack((theta_mask, np.random.multivariate_normal(theta_mask, Cov_g, max_phrase_no)))

# Self-Attention層の全結合回帰パラメータ
beta_k = np.random.multivariate_normal(alpha, Cov, k)
beta_q = np.random.multivariate_normal(alpha, Cov, k)
beta_v = np.random.multivariate_normal(alpha, Cov, k)

# Feed-Forward層の全結合回帰パラメータ
beta_f1 = np.random.multivariate_normal(alpha_f1, Cov_f1, k)
beta_f2 = np.random.multivariate_normal(alpha_f2, Cov_f2, fearture)

# Classification Head層の全結合回帰パラメータ
beta = np.random.multivariate_normal(alpha, Cov, max_seq)

In [None]:
# 応答変数の生成
# Embedding層のパラメータ
Embedding = theta_v[word_id, ] + theta_w[word_no, ] + theta_g[phrase_no, ]


In [None]:
# 埋め込みベクトルの全結合層のパラメータ
Linear_k = np.dot(Embedding[i, ], beta_k)
Linear_q = np.dot(Embedding[i, ], beta_q)
Linear_v = np.dot(Embedding[i, ], beta_v)

# Self-Attention層のパラメータ
weights = np.dot(Linear_k, Linear_q.T) / np.sqrt(k)
weights_exp = np.exp(weights)
weights_exp[weights==0] = 1e-300
normalized_weights = weights_exp / np.sum(weights_exp, axis=1)[:, np.newaxis]
normalized_weights[weights==0] = 0.0
Attention = np.dot(normalized_weights, Linear_v)

In [None]:
# Feed Forward層で特徴量を変換



In [None]:
normalized_weights.shape

In [None]:
normalized_weights[weights==0]

In [None]:
weights