In [1]:
# coding: utf-8
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from tqdm import tqdm

%matplotlib inline

-----------
### データ作成

In [2]:
# ルールの数
RuleN = 100

In [21]:
def makeData(N, Pattern, PatternApperDiff, RuleMax, RuleMin, Clabel):
    # データ数は何個か
    #N = 1000000
    # 何パターンあるのか
    #Pattern = 5
    # パターンに含まれるルールの最大個数、最小個数はいくつか
    #RuleMax = 10
    #RuleMin = 2
    # 各パターンの最低出現割合(%)
    #PatternApperDiff = 5

    global RuleN
    
    # パターン情報の格納先
    PatternInfo = []

    # 各パターンはどのようか傾向を持つのか
    for i in range(Pattern):
        ruleNum = np.random.randint(RuleMin, RuleMax + 1)
        ruleIndex = np.random.choice(list(range(RuleN)), ruleNum, replace=False)
        ruleFlag = np.random.randint(0, 1 + 1, ruleNum)
        rulePatternApper = np.random.randint(PatternApperDiff, 100 + 1)

        PatternInfo.append({"RuleIndex":ruleIndex, "RuleFlag":ruleFlag, "RulePatternApper": rulePatternApper})

    # 各パターンの出現確率を計算
    PatternInfo = pd.DataFrame(PatternInfo)
    PatternInfo["RulePatternApper"] = PatternInfo["RulePatternApper"] / sum(PatternInfo["RulePatternApper"])

    # データ生成
    Data = np.random.randint(0, 1 + 1, (N, 2 + RuleN))

    for i in tqdm(range(N)):
        patternNum = np.random.choice(Pattern, p=PatternInfo["RulePatternApper"])
        Data[i, 0] = Clabel
        Data[i, 1] = patternNum
        Data[i, PatternInfo.loc[patternNum, "RuleIndex"] + 2] = PatternInfo.loc[patternNum, "RuleFlag"]
        
    return Data, PatternInfo

In [24]:
tD, tPInfo = makeData(1000000, 100, 5, 10, 2, -1)

100%|██████████| 1000000/1000000 [01:14<00:00, 13458.64it/s]


In [22]:
fD, fPInfo = makeData(10000, 10, 5, 4, 2, 1)

100%|██████████| 10000/10000 [00:00<00:00, 13530.90it/s]


-----------
### 学習

In [70]:
rejectValue = -50.0
reviewValue = 0.0

def score2class(x):
    global rejectValue
    global reviewValue
    
    if x <= rejectValue:
        return 1.0
    elif rejectValue < x and x <= reviewValue:
        return (2.0 * x - rejectValue - reviewValue) / (rejectValue - reviewValue)
    elif reviewValue < x:
        return -1.0
    else:
        return 0.0
    
s2c = np.frompyfunc(score2class, 1, 1)

In [116]:
def nijoloss(target, estimate):
    return target - estimate

njloss = np.frompyfunc(nijoloss, 2, 1)

In [72]:
def limitW(x):
    if x < -100.0:
        return -100.0
    elif x > 100.0:
        return 100.0
    else:
        return x
limitw = np.frompyfunc(limitW, 1, 1)

In [112]:
lAlpha = 0.1

def ruleLearning(X, Y, epoch, minibatch):
    datasize = float(len(X))
    minibatchLoop = math.ceil(datasize / minibatch)
    
    W = np.random.randint(-100, 100 + 1,RuleN)
    
    for i in range(epoch):
        for j in range(minibatchLoop):
            batchX = X[j*minibatch:min((j+1)*minibatch, datasize), :]
            batchY = Y[j*minibatch:min((j+1)*minibatch, datasize)]

            y_hat = s2c(np.dot(batchX, W))
            lossY = njloss(batchY, y_hat)
            print(lossY)
            
            W = limitw(W - lAlpha * np.dot(batchX.T, lossY))
    return W

In [102]:
np.random.shuffle(tD)
np.random.shuffle(fD)

Data = np.vstack([tD[:10000, :], fD[:10000, :]])

In [115]:
learnX = Data[:10000, 2:]
learnY = Data[:10000, 0]

w = ruleLearning(learnX, learnY, 1, 1000)
s2c(np.dot(learnX, w))

[2.0 -0.0 1.8399999999999999 2.0 2.0 2.0 -0.0 -0.0 -0.0 2.0 -0.0 2.0 0.8
 -0.0 2.0 -0.0 2.0 -0.0 -0.0 -0.0 2.0 -0.0 -0.0 2.0 -0.0 -0.0 2.0 -0.0 2.0
 2.0 -0.0 2.0 -0.0 1.16 -0.0 2.0 2.0 2.0 0.36 -0.0 -0.0 2.0 2.0 -0.0 -0.0
 2.0 2.0 -0.0 2.0 -0.0 -0.0 2.0 -0.0 -0.0 2.0 -0.0 1.48 -0.0 -0.0 -0.0
 0.96 2.0 1.04 2.0 -0.0 2.0 2.0 2.0 -0.0 2.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0
 -0.0 -0.0 -0.0 2.0 -0.0 2.0 -0.0 1.6800000000000002 -0.0 2.0 1.76 2.0
 -0.0 -0.0 2.0 2.0 -0.0 -0.0 -0.0 -0.0 2.0 -0.0 -0.0 -0.0 2.0 -0.0 2.0
 -0.0 -0.0 -0.0 -0.0 2.0 -0.0 -0.0 2.0 -0.0 2.0 2.0 2.0 2.0 -0.0 2.0 2.0
 -0.0 -0.0 2.0 -0.0 -0.0 -0.0 2.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 2.0
 -0.0 -0.0 2.0 -0.0 2.0 2.0 2.0 -0.0 -0.0 2.0 2.0 2.0 -0.0 2.0 -0.0 -0.0
 2.0 -0.0 -0.0 -0.0 2.0 0.4 -0.0 -0.0 -0.0 -0.0 2.0 0.52 -0.0 -0.0 -0.0
 2.0 -0.0 2.0 -0.0 -0.0 -0.0 1.52 2.0 0.6799999999999999 2.0 2.0
 0.07999999999999996 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 2.0 -0.0 2.0 -0.0 -0.0
 2.0 -0.0 1.16 -0.0 -0.0 -0.0 -0.0 2.0 2.0 -0.0 -0.0 -0.0 2.

array([1.0, 1.0, 1.0, ..., 1.0, 1.0, 1.0], dtype=object)