### TeSIA (ver2.1)

In [20]:
from __future__ import absolute_import, division, print_function, unicode_literals

import pathlib

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.python.keras import backend as K
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops

print(tf.__version__)

2.1.0


In [21]:
def n_mode_product(x, u, n):# x, uはデータ型がfloat32である必要がある
    n = int(n)
    # We need one letter per dimension
    # (maybe you could find a workaround for this limitation)
    if n > 26:
        raise ValueError('n is too large.')
    ind = ''.join(chr(ord('a') + i) for i in range(n))
    exp = f'{ind}K...,JK->{ind}J...'
    return tf.einsum(exp, x, u)

In [114]:
class CustomLoss(tf.keras.losses.Loss):
    def __init__(self, name="custom_loss", C = 1, epsilon = 0.5, **kwargs):
        super(CustomLoss, self).__init__(name=name, **kwargs)
        self.C = C
        self.epsilon = epsilon

    def call(self, y_true, y_pred):
        y_pred = ops.convert_to_tensor(y_pred)  # 念のためTensor化
        y_true = math_ops.cast(y_true, y_pred.dtype) # 念のため同じデータ型化
        loss = self.C*K.mean(K.maximum(0., K.abs(y_true - y_pred) - self.epsilon))
        #loss = self.C*K.mean(K.abs(y_true - y_pred))
        #loss = self.C*K.mean(K.square(y_true - y_pred))
        return loss**2

# エポックが終わるごとにドットを一つ出力することで進捗を表示
class PrintDot(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch % 100 == 0: print('')
        print('.', end='')

### 3階テンソルの時系列データのみ
### n_mode_product関数を定義しておくこと
class TeSIA(object): 
    def __init__(self, epsilon=0.5, C=1):
        self.epsilon = epsilon
        self.C = C
    
    def mk_kernel_com(self, shape, dtype=None): 
        return K.constant(self.W_com, shape = shape, dtype=dtype)
    
    def mk_kernel_eve(self, shape, dtype=None): 
        return K.constant(self.W_eve, shape = shape, dtype=dtype)
    
    def mk_kernel_sen(self, shape, dtype=None): 
        return K.constant(self.W_sen, shape = shape, dtype=dtype)
    
    def mk_bias(self, shape, dtype=None):
        return K.constant(self.b ,shape = shape, dtype=dtype)
    
    def fit(self, X, y, epochs=100, learning_rate=0.1, alternate_num = 5): #Xは4階テンソル（データの個数）＊（企業データ）＊（イベント）＊（センチメント）
        
        if len(y.shape) == 1:
            self.y = y.reshape(-1, 1)
        else:
            self.y = y
        
        #交互最適を止めるための検証用データ作成
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2, random_state=1)
        self.val_score = -float('inf')
        
        #企業データの特徴量の次元数
        feature_len_train = self.X_train.shape[0]
        feature_len_com = X.shape[1] 
        feature_len_eve = X.shape[2]
        feature_len_sen = X.shape[3]
        
        # それぞれのデータの係数
        self.W_com = np.random.rand(feature_len_com, 1)
        self.W_eve = np.random.rand(feature_len_eve, 1)
        self.W_sen = np.random.rand(feature_len_sen, 1)
        self.b = np.random.rand(1, 1)
        
#         self.W_com = np.array([[1], [2], [-1]])
#         self.W_eve = np.array([[-1], [1], [1]])
#         self.W_sen = np.array([[3], [1], [3]])
#         self.b = np.array([[2]])
        
#         self.W_com = np.array([[1.01], [3.], [-1.01]])
#         self.W_eve = np.array([[-2.01], [1.01], [1.01]])
#         self.W_sen = np.array([[3.01], [1.01], [6.01]])
#         self.b = np.array([[2.01]])
        
        
        # 後のfor文で数字とそれぞれの変数を認識するための辞書
        dic_len = {1:feature_len_com, 2:feature_len_eve, 3:feature_len_sen}
        dic_W = {1:self.W_com, 2:self.W_eve, 3:self.W_sen} 
        dic_Kernel = {1:self.mk_kernel_com, 2:self.mk_kernel_eve, 3:self.mk_kernel_sen}
        list_W = ['com', 'eve', 'sen']
        
        for num in range(alternate_num):
            print(str(num+1)+"回目")
            for m in [1,2,3]: # １：企業、２：イベント、３：センチメント
                print()
                print('\n'+['企業', 'イベント', 'センチメント'][m-1])
                exec("self.beta = np.array(tf.math.reduce_sum(tf.math.square(self.W_{0})))*np.array(tf.math.reduce_sum(tf.math.square(self.W_{1})))".format(list_W[2-m//3], list_W[1-m//2]))
                exec("self.model = keras.Sequential([layers.Dense(1, input_shape=[dic_len[m]], kernel_regularizer=keras.regularizers.l2(0.25*self.beta), kernel_initializer = self.mk_kernel_{}, bias_initializer = self.mk_bias)])".format(list_W[m-1]))

                self.model.compile(loss=CustomLoss(C = self.C, epsilon = self.epsilon), 
                                   optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False))
#                 self.model.compile(loss=CustomLoss(C = self.C, epsilon = self.epsilon), 
#                                     optimizer=tf.keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-06))
                
    
                early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
                
                # n_mode_product展開でxを取得
                #self.X = n_mode_product(n_mode_product(self.X_train, dic_W[3-m//3].T, 3-m//3), dic_W[2-m//2].T, 2-m//2)
                exec("self.X = n_mode_product(n_mode_product(self.X_train, self.W_{0}.T, 3-m//3), self.W_{1}.T, 2-m//2)".format(list_W[2-m//3], list_W[1-m//2]))
                #exec("self.X = n_mode_product(n_mode_product(self.X_train, self.W_{0}, 3-m//3), self.W_{1}, 2-m//2)".format(list_W[2-m//3], list_W[1-m//2]))
                self.X = tf.reshape(self.X, [feature_len_train, dic_len[m]])
                
                self.model.fit(
                    self.X, self.y_train, validation_split = 0.2,
                    epochs=epochs, verbose=0, callbacks=[early_stop, PrintDot()])
                
                #dic_W[m] = self.model.layers[0].get_weights()[0]
                exec("self.W_{} = self.model.layers[0].get_weights()[0]".format(list_W[m-1]))
                self.b = self.model.layers[0].get_weights()[1]
                print('\n企業 : {}'.format(self.W_com))
                print('イベント : {}'.format(self.W_eve))
                print('センチメント : {}'.format(self.W_sen))
                print('intercept : {}'.format(self.b))
                print('='*50)
            #交互最適化を止めるか判断
            if (self.score(self.X_test, self.y_test) -self.val_score)/self.val_score<0.01:
                self.num = num
                return self
            self.val_score = self.score(self.X_test, self.y_test)
        self.num = num
        return self
    
    def predict(self, X, y=None):
        # tf.transpose()で置換されている
        y_pred = n_mode_product(n_mode_product(n_mode_product(X, tf.transpose(self.W_com), 1), tf.transpose(self.W_eve), 2), tf.transpose(self.W_sen), 3) + self.b
        y_pred = tf.reshape(y_pred,[len(X), 1]).numpy()
        
        return y_pred
    
    def coef_(self):
        result_W_com = self.W_com
        result_W_eve = self.W_eve
        result_W_sen = self.W_sen
        
        return result_W_com, result_W_eve, result_W_sen
    
    def intercept_(self):
        result_b = self.b
        
        return result_b
    
    def score(self, X, y):
        
        
        if len(y.shape) == 1:
            print('変換')
            y = y.reshape(-1, 1)
        # tf.transpose()で置換されている
        y_pred = n_mode_product(n_mode_product(n_mode_product(X, tf.transpose(self.W_com), 1), tf.transpose(self.W_eve), 2), tf.transpose(self.W_sen), 3) + self.b
        y_pred = tf.reshape(y_pred,[len(X), 1]).numpy()
        u = ((y_pred - y)**2).sum()
        v = ((y - y.mean())**2).sum()
        R2 = 1 - u/v
        
        return R2


## データをダウンロード

In [84]:
df = pd.read_table('train.tsv')

In [85]:
df.head()

Unnamed: 0,id,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,1


In [86]:
df.describe()

Unnamed: 0,id,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
count,8645.0,8645.0,8645.0,8645.0,8645.0,8645.0,8645.0,8645.0,8645.0,8645.0,8645.0,8645.0,8645.0,8645.0
mean,4323.0,2.513592,0.0,6.573973,11.573626,0.027646,3.012724,0.683748,1.437594,0.489069,0.469,0.64343,0.191172,143.794448
std,2495.740872,1.105477,0.0,3.428147,6.907822,0.163966,2.00637,0.46504,0.653859,0.197943,0.17676,0.196293,0.123191,133.797854
min,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.02,0.0,0.0,0.0,1.0
25%,2162.0,2.0,0.0,4.0,6.0,0.0,1.0,0.0,1.0,0.32,0.3182,0.49,0.1045,31.0
50%,4323.0,3.0,0.0,7.0,12.0,0.0,3.0,1.0,1.0,0.5,0.4848,0.65,0.194,109.0
75%,6484.0,3.0,0.0,10.0,18.0,0.0,5.0,1.0,2.0,0.66,0.6212,0.81,0.2836,211.0
max,8645.0,4.0,0.0,12.0,23.0,1.0,6.0,1.0,4.0,0.96,1.0,1.0,0.8507,651.0


In [87]:
df = df[['season', 'hr', 'holiday', 'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'cnt']]

In [88]:
df.hr = df.hr//4
df = pd.get_dummies(df,columns=['season', 'weathersit', 'hr'])

In [89]:
print(df.shape)
df.head()

(8645, 20)


Unnamed: 0,holiday,temp,atemp,hum,windspeed,cnt,season_1,season_2,season_3,season_4,weathersit_1,weathersit_2,weathersit_3,weathersit_4,hr_0,hr_1,hr_2,hr_3,hr_4,hr_5
0,0,0.24,0.2879,0.81,0.0,16,1,0,0,0,1,0,0,0,1,0,0,0,0,0
1,0,0.22,0.2727,0.8,0.0,40,1,0,0,0,1,0,0,0,1,0,0,0,0,0
2,0,0.22,0.2727,0.8,0.0,32,1,0,0,0,1,0,0,0,1,0,0,0,0,0
3,0,0.24,0.2879,0.75,0.0,13,1,0,0,0,1,0,0,0,1,0,0,0,0,0
4,0,0.24,0.2879,0.75,0.0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0


In [90]:
df.columns

Index(['holiday', 'temp', 'atemp', 'hum', 'windspeed', 'cnt', 'season_1',
       'season_2', 'season_3', 'season_4', 'weathersit_1', 'weathersit_2',
       'weathersit_3', 'weathersit_4', 'hr_0', 'hr_1', 'hr_2', 'hr_3', 'hr_4',
       'hr_5'],
      dtype='object')

In [91]:
#5*6*8のテンソルを作成
df_season_holiday = df[['season_1', 'season_2', 'season_3', 'season_4', 'holiday']]
df_hr = df[['hr_0', 'hr_1', 'hr_2', 'hr_3', 'hr_4','hr_5']]
df_weather = df[['temp', 'atemp', 'hum', 'windspeed', 'weathersit_1','weathersit_2', 'weathersit_3', 'weathersit_4']]
df_cnt = df[['cnt']]

In [92]:
#[[[[*0000000][00000000]...[00000000]][[*0000000][00000000]...[00000000]]...[[*0000000][00000000]...[00000000]]]...]の配列を作成
A  = df_season_holiday.values
A1 = np.insert(A, [1],np.zeros(47), axis=1)
A2 = np.insert(A1, [49],np.zeros(47), axis=1)
A3 = np.insert(A2, [97],np.zeros(47), axis=1)
A4 = np.insert(A3, [145],np.zeros(47), axis=1)
A5 = np.insert(A4, [193],np.zeros(47), axis=1)
arr_season_holiday = A5.reshape(8645,5,6,8)

In [93]:
#[[[[00000000][00000000]...[00000000]][[0*000000][0*0000000]...[0*0000000]]...[[00000000][00000000]...[00000000]]]...]の配列を作成
B = df_hr.values
B1 = np.insert(B, [0],np.zeros(49), axis=1)
B2 = np.insert(B1, [50],np.zeros(7), axis=1)
B3 = np.insert(B2, [58],np.zeros(7), axis=1)
B4 = np.insert(B3, [66],np.zeros(7), axis=1)
B5 = np.insert(B4, [74],np.zeros(7), axis=1)
B6 = np.insert(B5, [82],np.zeros(7), axis=1)
B7 = np.insert(B6, [90],np.zeros(150), axis=1)
arr_hr = B7.reshape(8645,5,6,8)

In [94]:
#[[[[00000000][00000000]...[00000000]][[00000000][000000000]...[000000000]][[00000000][000000000][********]...[000000000]]...[[00000000][00000000]...[00000000]]]...]の配列を作成
C = df_weather.values
C1 = np.insert(C,[0], np.zeros(112), axis=1)
C2 = np.insert(C1,[120], np.zeros(120), axis=1)
arr_weather = C2.reshape(8645,5,6,8)

In [95]:
# A = np.arange(15).reshape(3,5)
# A1 = np.insert(A, [1],np.zeros(47), axis=1)
# A2 = np.insert(A1, [49],np.zeros(47), axis=1)
# A3 = np.insert(A2, [97],np.zeros(47), axis=1)
# A4 = np.insert(A3, [145],np.zeros(47), axis=1)
# A5 = np.insert(A4, [193],np.zeros(47), axis=1)

# B = np.arange(1,19).reshape(3,6)
# B1 = np.insert(B, [0],np.zeros(49), axis=1)
# B2 = np.insert(B1, [50],np.zeros(7), axis=1)
# B3 = np.insert(B2, [58],np.zeros(7), axis=1)
# B4 = np.insert(B3, [66],np.zeros(7), axis=1)
# B5 = np.insert(B4, [74],np.zeros(7), axis=1)
# B6 = np.insert(B5, [82],np.zeros(7), axis=1)
# B7 = np.insert(B6, [90],np.zeros(150), axis=1)

# C = np.arange(1,25).reshape(3,8)
# C1 = np.insert(C,[0], np.zeros(112), axis=1)
# C2 = np.insert(C1,[120], np.zeros(120), axis=1)

In [96]:
X = arr_season_holiday  + arr_hr + arr_weather
y = df['cnt'].values

In [97]:
X

array([[[[1.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ]],

        [[0.    , 1.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ]],

        [[0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..., 0.    , 0.    , 0.    ],
         [0.24  , 0.2879, 0.81  , ..., 0.    , 0.    , 0.    ],
         [0.    , 0.    , 0.    , ..

In [98]:
y = y.reshape(len(y), 1)

#### テストデータを用意

In [99]:
df_test = pd.read_table('test.tsv')

In [100]:
df_test.head()

Unnamed: 0,id,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed
0,8646,2012-01-01,1,1,1,0,0,0,0,1,0.36,0.3788,0.66,0.0
1,8647,2012-01-01,1,1,1,1,0,0,0,1,0.36,0.3485,0.66,0.1343
2,8648,2012-01-01,1,1,1,2,0,0,0,1,0.32,0.3485,0.76,0.0
3,8649,2012-01-01,1,1,1,3,0,0,0,1,0.3,0.3333,0.81,0.0
4,8650,2012-01-01,1,1,1,4,0,0,0,1,0.28,0.303,0.81,0.0896


In [101]:
#df_test = df_test[['season', 'hr', 'holiday', 'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'cnt']]
df_test = df_test[['season', 'hr', 'holiday', 'weathersit', 'temp', 'atemp', 'hum', 'windspeed']]

In [102]:
df_test.hr = df_test.hr//4
df_test = pd.get_dummies(df_test,columns=['season', 'weathersit', 'hr'])

In [103]:
print(df_test.shape)
df_test.head()

(8734, 19)


Unnamed: 0,holiday,temp,atemp,hum,windspeed,season_1,season_2,season_3,season_4,weathersit_1,weathersit_2,weathersit_3,weathersit_4,hr_0,hr_1,hr_2,hr_3,hr_4,hr_5
0,0,0.36,0.3788,0.66,0.0,1,0,0,0,1,0,0,0,1,0,0,0,0,0
1,0,0.36,0.3485,0.66,0.1343,1,0,0,0,1,0,0,0,1,0,0,0,0,0
2,0,0.32,0.3485,0.76,0.0,1,0,0,0,1,0,0,0,1,0,0,0,0,0
3,0,0.3,0.3333,0.81,0.0,1,0,0,0,1,0,0,0,1,0,0,0,0,0
4,0,0.28,0.303,0.81,0.0896,1,0,0,0,1,0,0,0,0,1,0,0,0,0


In [104]:
df_test.columns

Index(['holiday', 'temp', 'atemp', 'hum', 'windspeed', 'season_1', 'season_2',
       'season_3', 'season_4', 'weathersit_1', 'weathersit_2', 'weathersit_3',
       'weathersit_4', 'hr_0', 'hr_1', 'hr_2', 'hr_3', 'hr_4', 'hr_5'],
      dtype='object')

In [105]:
#5*6*8のテンソルを作成
df_season_holiday = df_test[['season_1', 'season_2', 'season_3', 'season_4', 'holiday']]
df_hr = df_test[['hr_0', 'hr_1', 'hr_2', 'hr_3', 'hr_4','hr_5']]
df_weather = df_test[['temp', 'atemp', 'hum', 'windspeed', 'weathersit_1','weathersit_2', 'weathersit_3', 'weathersit_4']]

In [106]:
#[[[[*0000000][00000000]...[00000000]][[*0000000][00000000]...[00000000]]...[[*0000000][00000000]...[00000000]]]...]の配列を作成
A  = df_season_holiday.values
A1 = np.insert(A, [1],np.zeros(47), axis=1)
A2 = np.insert(A1, [49],np.zeros(47), axis=1)
A3 = np.insert(A2, [97],np.zeros(47), axis=1)
A4 = np.insert(A3, [145],np.zeros(47), axis=1)
A5 = np.insert(A4, [193],np.zeros(47), axis=1)
arr_season_holiday = A5.reshape(8734,5,6,8)

In [107]:
#[[[[00000000][00000000]...[00000000]][[0*000000][0*0000000]...[0*0000000]]...[[00000000][00000000]...[00000000]]]...]の配列を作成
B = df_hr.values
B1 = np.insert(B, [0],np.zeros(49), axis=1)
B2 = np.insert(B1, [50],np.zeros(7), axis=1)
B3 = np.insert(B2, [58],np.zeros(7), axis=1)
B4 = np.insert(B3, [66],np.zeros(7), axis=1)
B5 = np.insert(B4, [74],np.zeros(7), axis=1)
B6 = np.insert(B5, [82],np.zeros(7), axis=1)
B7 = np.insert(B6, [90],np.zeros(150), axis=1)
arr_hr = B7.reshape(8734,5,6,8)

In [108]:
#[[[[00000000][00000000]...[00000000]][[00000000][000000000]...[000000000]][[00000000][000000000][********]...[000000000]]...[[00000000][00000000]...[00000000]]]...]の配列を作成
C = df_weather.values
C1 = np.insert(C,[0], np.zeros(112), axis=1)
C2 = np.insert(C1,[120], np.zeros(120), axis=1)
arr_weather = C2.reshape(8734,5,6,8)

In [109]:
# A = np.arange(15).reshape(3,5)
# A1 = np.insert(A, [1],np.zeros(47), axis=1)
# A2 = np.insert(A1, [49],np.zeros(47), axis=1)
# A3 = np.insert(A2, [97],np.zeros(47), axis=1)
# A4 = np.insert(A3, [145],np.zeros(47), axis=1)
# A5 = np.insert(A4, [193],np.zeros(47), axis=1)

# B = np.arange(1,19).reshape(3,6)
# B1 = np.insert(B, [0],np.zeros(49), axis=1)
# B2 = np.insert(B1, [50],np.zeros(7), axis=1)
# B3 = np.insert(B2, [58],np.zeros(7), axis=1)
# B4 = np.insert(B3, [66],np.zeros(7), axis=1)
# B5 = np.insert(B4, [74],np.zeros(7), axis=1)
# B6 = np.insert(B5, [82],np.zeros(7), axis=1)
# B7 = np.insert(B6, [90],np.zeros(150), axis=1)

# C = np.arange(1,25).reshape(3,8)
# C1 = np.insert(C,[0], np.zeros(112), axis=1)
# C2 = np.insert(C1,[120], np.zeros(120), axis=1)

In [110]:
X_test = arr_season_holiday  + arr_hr + arr_weather

## モデルで学習・予測

In [112]:
result = pd.DataFrame(index = ['W_com_1', 'W_com_2', 'W_com_3', 'W_com_4', 'W_com_5', 'W_eve_1', 'W_eve_2', 'W_eve_3', 'W_eve_4', 'W_eve_5', 'W_eve_6', 'W_sen_1', 'W_sen_2', 'W_sen_3', 'W_sen_4', 'W_sen_5', 'W_sen_6', 'W_sen_7', 'W_sen_8', 'int', 'train_score', 'test_score', 'alternate_num'])
for c in [10000000, 1000000, 100000, 10000, 1000, 100, 10, 1]:
    for i in range(3):
        model = TeSIA(epsilon=0.2, C=c)
        model.fit(X, y, epochs = 100, alternate_num = 10)
        result[str(c)+'-'+str(i)] = model.W_com.reshape(5).tolist() + model.W_eve.reshape(6).tolist() + model.W_sen.reshape(8).tolist() + model.b.tolist() + [model.score(X, y)] + ['nan'] + [int(model.num)]

1回目


企業

....................................................................................................
企業 : [[-1367.6437  ]
 [  308.1508  ]
 [   31.258255]
 [  393.41968 ]
 [  -89.89887 ]]
イベント : [[0.08456675]
 [0.24667436]
 [0.97051367]
 [0.30631029]
 [0.6883551 ]
 [0.40800867]]
センチメント : [[0.0202901 ]
 [0.68349851]
 [0.83021251]
 [0.49328082]
 [0.82623985]
 [0.04175847]
 [0.55898402]
 [0.97659862]]
intercept : [-31.530737]


イベント

..........................
企業 : [[-1367.6437  ]
 [  308.1508  ]
 [   31.258255]
 [  393.41968 ]
 [  -89.89887 ]]
イベント : [[0.18209831]
 [0.16232765]
 [0.862225  ]
 [0.88701516]
 [1.3365903 ]
 [0.6097562 ]]
センチメント : [[0.0202901 ]
 [0.68349851]
 [0.83021251]
 [0.49328082]
 [0.82623985]
 [0.04175847]
 [0.55898402]
 [0.97659862]]
intercept : [-67.20927]


センチメント

..........
企業 : [[-1367.6437  ]
 [  308.1508  ]
 [   31.258255]
 [  393.41968 ]
 [  -89.89887 ]]
イベント : [[0.18209831]
 [0.16232765]
 [0.862225  ]
 [0.88701516]
 [1.3365903 ]
 [0.6097562 ]]
センチメント


.....
企業 : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]]
イベント : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
センチメント : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
intercept : [87.24996]
3回目


企業

.....
企業 : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]]
イベント : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
センチメント : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
intercept : [87.24996]


イベント

.....
企業 : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]]
イベント : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
センチメント : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
intercept : [87.24996]


センチメント

.....
企業 : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]]
イベント : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
センチメント : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
intercept : [87.24996]
4回目


企業

.....
企業 : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]]
イベント : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
センチメント : [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
intercept : [87.24996]


イベント

.....
企業 : [[nan]
 [nan


................
企業 : [[-58.025238]
 [  5.411221]
 [ 71.26535 ]
 [ 35.818863]
 [ -9.090371]]
イベント : [[  0.25085437]
 [-11.090833  ]
 [  0.8013446 ]
 [ 21.66828   ]
 [ 62.51699   ]
 [ 10.924689  ]]
センチメント : [[ 1.887349  ]
 [ 0.57949525]
 [-1.5095335 ]
 [ 0.17558868]
 [ 1.3430494 ]
 [ 1.5520779 ]
 [ 0.7784717 ]
 [-0.24845266]]
intercept : [-10.740737]
3回目


企業

...........................
企業 : [[-64.322525]
 [  4.399942]
 [ 68.419556]
 [ 49.1399  ]
 [-13.983772]]
イベント : [[  0.25085437]
 [-11.090833  ]
 [  0.8013446 ]
 [ 21.66828   ]
 [ 62.51699   ]
 [ 10.924689  ]]
センチメント : [[ 1.887349  ]
 [ 0.57949525]
 [-1.5095335 ]
 [ 0.17558868]
 [ 1.3430494 ]
 [ 1.5520779 ]
 [ 0.7784717 ]
 [-0.24845266]]
intercept : [-8.132115]


イベント

.......
企業 : [[-64.322525]
 [  4.399942]
 [ 68.419556]
 [ 49.1399  ]
 [-13.983772]]
イベント : [[  0.17326608]
 [-11.238105  ]
 [  0.8088918 ]
 [ 21.940176  ]
 [ 64.93467   ]
 [ 10.762508  ]]
センチメント : [[ 1.887349  ]
 [ 0.57949525]
 [-1.5095335 ]
 [ 0.17558868]
 [ 1.34304


........
企業 : [[-33.142487]
 [ 38.06588 ]
 [ 58.51303 ]
 [ 43.59005 ]
 [-13.933736]]
イベント : [[ 0.5741803]
 [-1.1415542]
 [ 1.2780269]
 [ 4.875918 ]
 [11.739113 ]
 [ 2.6844122]]
センチメント : [[ 1.1176827 ]
 [ 0.40569672]
 [-1.6492448 ]
 [-0.48159808]
 [ 1.8805127 ]
 [ 2.072535  ]
 [ 1.1888453 ]
 [ 0.06212868]]
intercept : [-40.434998]


センチメント

.........
企業 : [[-33.142487]
 [ 38.06588 ]
 [ 58.51303 ]
 [ 43.59005 ]
 [-13.933736]]
イベント : [[ 0.5741803]
 [-1.1415542]
 [ 1.2780269]
 [ 4.875918 ]
 [11.739113 ]
 [ 2.6844122]]
センチメント : [[ 1.109407  ]
 [ 0.3245956 ]
 [-1.0808846 ]
 [ 0.04310098]
 [ 1.3179629 ]
 [ 1.227068  ]
 [ 1.0255852 ]
 [ 1.1153262 ]]
intercept : [-41.140633]
1回目


企業

....................................................................................................
企業 : [[-1337.904  ]
 [  351.63275]
 [  -23.41696]
 [  373.05615]
 [ -137.5603 ]]
イベント : [[0.11234153]
 [0.72261462]
 [0.36597154]
 [0.9470932 ]
 [0.9756966 ]
 [0.42603964]]
センチメント : [[0.17713065]
 [0.51199978]
 [0


.......................
企業 : [[-635.143   ]
 [-218.90843 ]
 [ 126.517044]
 [-148.85864 ]
 [ -63.231342]]
イベント : [[  0.42975965]
 [  2.6163619 ]
 [ -0.11082431]
 [ -4.543166  ]
 [-12.227923  ]
 [ -2.1877706 ]]
センチメント : [[ 0.3389095 ]
 [ 0.065623  ]
 [ 5.670467  ]
 [-0.22415762]
 [-1.0384687 ]
 [-1.7218461 ]
 [ 0.79936934]
 [ 9.058531  ]]
intercept : [153.36061]


イベント

......
企業 : [[-635.143   ]
 [-218.90843 ]
 [ 126.517044]
 [-148.85864 ]
 [ -63.231342]]
イベント : [[  0.42481026]
 [  2.2978444 ]
 [ -0.19264793]
 [ -4.4777393 ]
 [-12.478497  ]
 [ -2.0304399 ]]
センチメント : [[ 0.3389095 ]
 [ 0.065623  ]
 [ 5.670467  ]
 [-0.22415762]
 [-1.0384687 ]
 [-1.7218461 ]
 [ 0.79936934]
 [ 9.058531  ]]
intercept : [155.72461]


センチメント

............
企業 : [[-635.143   ]
 [-218.90843 ]
 [ 126.517044]
 [-148.85864 ]
 [ -63.231342]]
イベント : [[  0.42481026]
 [  2.2978444 ]
 [ -0.19264793]
 [ -4.4777393 ]
 [-12.478497  ]
 [ -2.0304399 ]]
センチメント : [[ 0.23494679]
 [ 0.07355492]
 [ 3.4393075 ]
 [-0.09542909]
 [-0.


..........
企業 : [[-98.470505 ]
 [ -3.3713782]
 [ 16.684082 ]
 [-15.502832 ]
 [-10.435567 ]]
イベント : [[  1.1307503]
 [ 16.898685 ]
 [ -0.3504551]
 [-34.654125 ]
 [-86.94566  ]
 [-17.16128  ]]
センチメント : [[ 0.762905  ]
 [ 0.42924362]
 [15.953453  ]
 [-1.5214875 ]
 [-6.2997017 ]
 [-6.941364  ]
 [ 0.02101097]
 [10.7661495 ]]
intercept : [131.9286]
1回目


企業

....................................................................................................
企業 : [[-170.67032 ]
 [ 321.55276 ]
 [  47.786232]
 [  81.71695 ]
 [ -26.108042]]
イベント : [[0.30666143]
 [0.06750806]
 [0.552926  ]
 [0.87699934]
 [0.57135784]
 [0.5422211 ]]
センチメント : [[0.69123122]
 [0.49933718]
 [0.94369707]
 [0.20411664]
 [0.8345442 ]
 [0.8565085 ]
 [0.85240265]
 [0.67996853]]
intercept : [-34.955265]


イベント

...........
企業 : [[-170.67032 ]
 [ 321.55276 ]
 [  47.786232]
 [  81.71695 ]
 [ -26.108042]]
イベント : [[0.0475063 ]
 [0.04167757]
 [0.85446435]
 [0.95415485]
 [1.4940754 ]
 [0.39188546]]
センチメント : [[0.69123122]
 [0.49933


.............
企業 : [[-1123.938    ]
 [   -2.7304955]
 [   52.29203  ]
 [  164.3283   ]
 [ -164.08047  ]]
イベント : [[  0.6275926 ]
 [ 12.694601  ]
 [  0.8900557 ]
 [ -9.926719  ]
 [-17.193974  ]
 [  0.02150203]]
センチメント : [[0.0829698 ]
 [0.56462613]
 [0.94895186]
 [0.93819576]
 [0.41578807]
 [0.54948074]
 [0.05896449]
 [0.77335779]]
intercept : [71.122734]


センチメント

..............
企業 : [[-1123.938    ]
 [   -2.7304955]
 [   52.29203  ]
 [  164.3283   ]
 [ -164.08047  ]]
イベント : [[  0.6275926 ]
 [ 12.694601  ]
 [  0.8900557 ]
 [ -9.926719  ]
 [-17.193974  ]
 [  0.02150203]]
センチメント : [[ 0.11497834]
 [ 2.0685775 ]
 [-0.3861658 ]
 [ 0.18965961]
 [ 0.51285017]
 [ 0.08264478]
 [-0.57552665]
 [ 0.12085937]]
intercept : [81.605125]
2回目


企業

..............................................................................
企業 : [[-380.62363 ]
 [  -2.987451]
 [  73.063896]
 [  75.13743 ]
 [ -12.99513 ]]
イベント : [[  0.6275926 ]
 [ 12.694601  ]
 [  0.8900557 ]
 [ -9.926719  ]
 [-17.193974  ]
 [  0.0215020


....................................................................................................
企業 : [[-139.94084  ]
 [ 296.81436  ]
 [  82.34471  ]
 [  27.6781   ]
 [  -2.4035962]]
イベント : [[0.00358204]
 [0.61622308]
 [0.72919155]
 [0.30511995]
 [0.53451664]
 [0.54598738]]
センチメント : [[0.6513953 ]
 [0.54037184]
 [0.81134525]
 [0.50367348]
 [0.80421281]
 [0.78825524]
 [0.77557696]
 [0.46808483]]
intercept : [-81.97528]


イベント

......
企業 : [[-139.94084  ]
 [ 296.81436  ]
 [  82.34471  ]
 [  27.6781   ]
 [  -2.4035962]]
イベント : [[0.04250555]
 [0.07927078]
 [0.7703078 ]
 [1.0603228 ]
 [1.4717991 ]
 [0.42226264]]
センチメント : [[0.6513953 ]
 [0.54037184]
 [0.81134525]
 [0.50367348]
 [0.80421281]
 [0.78825524]
 [0.77557696]
 [0.46808483]]
intercept : [-96.05526]


センチメント

.......
企業 : [[-139.94084  ]
 [ 296.81436  ]
 [  82.34471  ]
 [  27.6781   ]
 [  -2.4035962]]
イベント : [[0.04250555]
 [0.07927078]
 [0.7703078 ]
 [1.0603228 ]
 [1.4717991 ]
 [0.42226264]]
センチメント : [[ 1.8745365 ]
 [ 0.5487119 ]



.............
企業 : [[-477.85822 ]
 [ 139.04395 ]
 [  46.358112]
 [   6.600228]
 [ -81.78393 ]]
イベント : [[-0.0108809 ]
 [-0.01076628]
 [ 1.316117  ]
 [ 1.3439023 ]
 [ 2.5537665 ]
 [ 1.0395832 ]]
センチメント : [[1.3937968e+00]
 [5.0047064e-01]
 [1.0508457e-01]
 [9.2491359e-02]
 [6.2615407e-01]
 [3.1728983e-01]
 [1.9273162e-03]
 [3.4021468e-06]]
intercept : [-60.079643]
2回目


企業

..................................................
企業 : [[ 64.93068   ]
 [166.71042   ]
 [ 68.81105   ]
 [-60.802227  ]
 [  0.54527986]]
イベント : [[-0.0108809 ]
 [-0.01076628]
 [ 1.316117  ]
 [ 1.3439023 ]
 [ 2.5537665 ]
 [ 1.0395832 ]]
センチメント : [[1.3937968e+00]
 [5.0047064e-01]
 [1.0508457e-01]
 [9.2491359e-02]
 [6.2615407e-01]
 [3.1728983e-01]
 [1.9273162e-03]
 [3.4021468e-06]]
intercept : [-96.440445]


イベント

......
企業 : [[ 64.93068   ]
 [166.71042   ]
 [ 68.81105   ]
 [-60.802227  ]
 [  0.54527986]]
イベント : [[0.05105723]
 [0.06548614]
 [1.4160461 ]
 [1.5822254 ]
 [2.9070852 ]
 [0.7603052 ]]
センチメント : [[1.3937968e+00]



.............
企業 : [[-83.70852   ]
 [-59.21202   ]
 [ 44.163593  ]
 [ -4.0589724 ]
 [ -0.83075583]]
イベント : [[ 1.3524168 ]
 [ 1.2563756 ]
 [ 0.39090127]
 [-0.10225597]
 [-1.2765708 ]
 [ 0.47199488]]
センチメント : [[ 4.1020417e-01]
 [ 1.3832040e+00]
 [-1.5229060e-01]
 [-5.0272483e-02]
 [ 3.8245253e-02]
 [-7.3463172e-03]
 [-8.8843763e-02]
 [ 9.7408949e-04]]
intercept : [154.74557]


センチメント

............................
企業 : [[-83.70852   ]
 [-59.21202   ]
 [ 44.163593  ]
 [ -4.0589724 ]
 [ -0.83075583]]
イベント : [[ 1.3524168 ]
 [ 1.2563756 ]
 [ 0.39090127]
 [-0.10225597]
 [-1.2765708 ]
 [ 0.47199488]]
センチメント : [[ 0.17944564]
 [ 1.4825554 ]
 [ 0.12633607]
 [-0.07829404]
 [ 0.53539187]
 [ 0.38190833]
 [-0.5000549 ]
 [-0.00237145]]
intercept : [148.2774]
3回目


企業

....................
企業 : [[-49.478016 ]
 [-62.840595 ]
 [ 64.03563  ]
 [  4.73007  ]
 [ -1.4360335]]
イベント : [[ 1.3524168 ]
 [ 1.2563756 ]
 [ 0.39090127]
 [-0.10225597]
 [-1.2765708 ]
 [ 0.47199488]]
センチメント : [[ 0.17944564]
 [ 1.4825554 


..........
企業 : [[ 7.3845416e-02]
 [ 4.6096754e+00]
 [ 2.2532315e+00]
 [ 5.6304112e-02]
 [-3.3466453e-03]]
イベント : [[-0.38262656]
 [-1.5731066 ]
 [ 2.1167862 ]
 [ 1.3819536 ]
 [ 1.745354  ]
 [-0.01016459]]
センチメント : [[ 0.37914583]
 [ 3.1615064 ]
 [-0.72150475]
 [-0.09890315]
 [ 0.3793597 ]
 [-0.38380346]
 [-0.31435633]
 [-0.05326426]]
intercept : [94.34079]


イベント

..................
企業 : [[ 7.3845416e-02]
 [ 4.6096754e+00]
 [ 2.2532315e+00]
 [ 5.6304112e-02]
 [-3.3466453e-03]]
イベント : [[-2.29295   ]
 [-1.5653939 ]
 [ 1.3469864 ]
 [ 1.1870376 ]
 [ 2.1099634 ]
 [ 0.09054656]]
センチメント : [[ 0.37914583]
 [ 3.1615064 ]
 [-0.72150475]
 [-0.09890315]
 [ 0.3793597 ]
 [-0.38380346]
 [-0.31435633]
 [-0.05326426]]
intercept : [101.09758]


センチメント

.......
企業 : [[ 7.3845416e-02]
 [ 4.6096754e+00]
 [ 2.2532315e+00]
 [ 5.6304112e-02]
 [-3.3466453e-03]]
イベント : [[-2.29295   ]
 [-1.5653939 ]
 [ 1.3469864 ]
 [ 1.1870376 ]
 [ 2.1099634 ]
 [ 0.09054656]]
センチメント : [[-0.4437524 ]
 [ 3.1690054 ]
 [-0.1344518 ]



.........
企業 : [[-1.0165588 ]
 [-9.591884  ]
 [ 0.6699147 ]
 [-0.18634042]
 [-0.14859755]]
イベント : [[ 1.4515178 ]
 [ 1.2490394 ]
 [-0.3859292 ]
 [-0.931054  ]
 [-1.0227224 ]
 [ 0.21726681]]
センチメント : [[ 1.6686559e-01]
 [ 3.1628900e+00]
 [ 2.6587823e-02]
 [ 1.1534233e-02]
 [-1.6538350e-02]
 [ 2.8439010e-02]
 [ 1.8284045e-02]
 [ 6.4137430e-07]]
intercept : [110.62699]
4回目


企業

.................
企業 : [[-2.0837764e-01]
 [-8.3035336e+00]
 [-3.5057688e-01]
 [ 2.2116831e-02]
 [ 5.0825877e-03]]
イベント : [[ 1.4515178 ]
 [ 1.2490394 ]
 [-0.3859292 ]
 [-0.931054  ]
 [-1.0227224 ]
 [ 0.21726681]]
センチメント : [[ 1.6686559e-01]
 [ 3.1628900e+00]
 [ 2.6587823e-02]
 [ 1.1534233e-02]
 [-1.6538350e-02]
 [ 2.8439010e-02]
 [ 1.8284045e-02]
 [ 6.4137430e-07]]
intercept : [111.08837]


イベント

.......
企業 : [[-2.0837764e-01]
 [-8.3035336e+00]
 [-3.5057688e-01]
 [ 2.2116831e-02]
 [ 5.0825877e-03]]
イベント : [[ 1.5522895 ]
 [ 1.0349948 ]
 [-0.8096668 ]
 [-0.922101  ]
 [-1.2348899 ]
 [-0.08269547]]
センチメント : [[ 1.6686559e

In [115]:
result#.to_csv('自転車TeSIA予測(一様分布).csv')

Unnamed: 0,10000000-0,10000000-1,10000000-2,1000000-0,1000000-1,1000000-2,100000-0,100000-1,100000-2,10000-0,...,1000-2,100-0,100-1,100-2,10-0,10-1,10-2,1-0,1-1,1-2
W_com_1,-3214.69,,-64.3225,-208.912,-33.1425,-1311.17,-859.933,-428.552,-98.4705,-120.012,...,-61.0104,-101.447,-249.266,64.9307,9.12115,-49.478,-111.636,-0.600425,-0.208378,-5.76372
W_com_2,280.307,,4.39994,-12.641,38.0659,321.616,-200.533,-240.671,-3.37138,279.627,...,11.6475,246.298,-78.9163,166.71,28.129,-62.8406,-77.0333,5.02862,-8.30353,-0.309054
W_com_3,50.129,,68.4196,96.2393,58.513,-18.0527,121.187,-71.8394,16.6841,61.0111,...,75.8389,83.004,14.9688,68.8111,33.0404,64.0356,20.1463,0.734323,-0.350577,10.7799
W_com_4,252.542,,49.1399,10.4065,43.59,-59.6183,-156.406,-197.996,-15.5028,239.439,...,46.7545,140.635,-79.785,-60.8022,-3.39626,4.73007,-32.387,-0.116649,0.0221168,1.85465
W_com_5,-409.187,,-13.9838,-34.7551,-13.9337,-151.096,-88.9585,-17.5855,-10.4356,-96.2469,...,-10.8759,-9.36866,-8.12049,0.54528,0.073049,-1.43603,-4.69331,-0.00926711,0.00508259,-0.357715
W_eve_1,0.240089,,0.173266,0.42825,0.57418,0.183324,0.483351,0.815082,1.13075,0.0397139,...,0.257045,0.0460507,0.867074,0.0510572,-0.592532,1.41502,1.28827,-3.03059,1.55229,0.579863
W_eve_2,0.555824,,-11.2381,13.1257,-1.14155,0.222573,2.50339,0.775007,16.8987,0.0590575,...,-8.56658,0.0527594,0.734372,0.0654861,-0.38625,1.62152,1.10467,-1.97035,1.03499,0.00179868
W_eve_3,1.21894,,0.808892,0.802235,1.27803,1.25942,-0.24626,0.00391452,-0.350455,0.848685,...,1.22974,0.856031,-0.628882,1.41605,1.86323,0.391282,-0.110759,0.961776,-0.809667,1.23828
W_eve_4,1.32418,,21.9402,-23.4517,4.87592,0.973895,-4.22589,-0.0486699,-34.6541,0.789786,...,18.2592,0.807148,-0.776541,1.58223,1.40258,0.105825,-0.0290156,1.215,-0.922101,0.000702634
W_eve_5,2.15287,,64.9347,-68.2014,11.7391,1.84199,-12.1133,-0.676197,-86.9457,1.6748,...,52.3883,1.46391,-2.36887,2.90709,3.1647,-1.14863,-1.23994,1.82413,-1.23489,-0.0203551


In [60]:
model = TeSIA(epsilon=0.5, C = 10000)

In [61]:
model.fit(X, y, epochs = 100, learning_rate=0.1)

1回目


企業

....................................................................................................
企業 : [[766.3495   ]
 [ -7.0115457]
 [-15.542332 ]
 [ 23.873764 ]
 [ 87.565094 ]]
イベント : [[ 0.89848594]
 [ 0.17750156]
 [-0.19254803]
 [ 2.27716695]
 [ 0.65630341]
 [-2.40993621]]
センチメント : [[-0.11342811]
 [-1.0810656 ]
 [-0.85953887]
 [ 0.14572328]
 [-0.22527628]
 [-1.04895103]
 [-0.29967393]
 [-0.19867546]]
intercept : [141.9478]


イベント

..........................................
企業 : [[766.3495   ]
 [ -7.0115457]
 [-15.542332 ]
 [ 23.873764 ]
 [ 87.565094 ]]
イベント : [[ 1.0141603]
 [-5.350684 ]
 [ 2.0338206]
 [12.615202 ]
 [26.036161 ]
 [ 4.5410695]]
センチメント : [[-0.11342811]
 [-1.0810656 ]
 [-0.85953887]
 [ 0.14572328]
 [-0.22527628]
 [-1.04895103]
 [-0.29967393]
 [-0.19867546]]
intercept : [49.553593]


センチメント

........
企業 : [[766.3495   ]
 [ -7.0115457]
 [-15.542332 ]
 [ 23.873764 ]
 [ 87.565094 ]]
イベント : [[ 1.0141603]
 [-5.350684 ]
 [ 2.0338206]
 [12.615202 ]
 [26.036161 ]
 [

<__main__.TeSIA at 0x2812902bc48>

In [62]:
model.predict(X)

array([[-25.359684],
       [-28.142914],
       [-28.142914],
       ...,
       [ 82.719444],
       [ 74.31771 ],
       [ 64.174446]], dtype=float32)

In [63]:
model.score(X,y)

0.4970325694643365

In [64]:
print(model.predict(X_test).max())
print(model.predict(X_test).mean())
print(model.predict(X_test).min())

388.15012
127.501205
-206.39844


In [65]:
df_submit = pd.read_table('test.tsv')
df_submit['cnt'] = pd.DataFrame(model.predict(X_test).reshape(8734,1))
df_submit = df_submit[['id', 'cnt']]

In [66]:
df_submit.to_csv('submit3.csv', header=False, index=False)

In [75]:
model.predict(X).tolist()

[[-25.359683990478516],
 [-28.142913818359375],
 [-28.142913818359375],
 [-20.309860229492188],
 [-31.84149932861328],
 [-18.405120849609375],
 [-39.674556732177734],
 [-48.328521728515625],
 [32.80017852783203],
 [46.416603088378906],
 [63.24291229248047],
 [46.70525360107422],
 [97.64842987060547],
 [127.0382308959961],
 [126.68624114990234],
 [119.22588348388672],
 [221.77191162109375],
 [225.0447998046875],
 [162.51742553710938],
 [162.51742553710938],
 [77.3228530883789],
 [75.91255950927734],
 [70.72740936279297],
 [88.37267303466797],
 [33.150177001953125],
 [23.437889099121094],
 [15.469520568847656],
 [25.631744384765625],
 [14.10009765625],
 [-29.49881362915039],
 [18.416419982910156],
 [34.826271057128906],
 [80.13957977294922],
 [66.04637908935547],
 [72.01592254638672],
 [104.59423065185547],
 [103.8661117553711],
 [43.71368408203125],
 [35.176658630371094],
 [153.97207641601562],
 [202.00473022460938],
 [232.51239013671875],
 [210.73980712890625],
 [71.9448013305664],
 [5

In [80]:
test = pd.DataFrame()
test['predict'] = model.predict(X).reshape(len(X)).tolist()
test['y'] = y

In [82]:
test.to_csv('テスト20200619.csv', index = False)

## SVRでもやってみる

In [118]:
class SVR(object):
    def __init__(self, epsilon=0.5, C=1):
        self.epsilon = epsilon
        self.C = C
        
    def fit(self, X, y, epochs=100, learning_rate=0.1):
        self.sess = tf.Session()
        
        feature_len = X.shape[-1] if len(X.shape) > 1 else 1
        
        if len(X.shape) == 1:
            X = X.reshape(-1, 1)
        if len(y.shape) == 1:
            y = y.reshape(-1, 1)
        
        self.X = tf.placeholder(dtype=tf.float32, shape=(None, feature_len))
        self.y = tf.placeholder(dtype=tf.float32, shape=(None, 1))
        
        self.W = tf.Variable(tf.random_normal(shape=(feature_len, 1)))
        self.b = tf.Variable(tf.random_normal(shape=(1,)))
        
        self.y_pred = tf.matmul(self.X, self.W) + self.b
        
        #self.loss = tf.reduce_mean(tf.square(self.y - self.y_pred))
        #self.loss = tf.reduce_mean(tf.cond(self.y_pred - self.y < self.epsilon, lambda: 0, lambda: 1))
        
        # Second part of following equation, loss is a function of how much the error exceeds a defined value, epsilon
        # Error lower than epsilon = no penalty.
        self.loss = tf.norm(self.W)/2 + self.C * tf.reduce_mean(tf.maximum(0., tf.abs(self.y_pred - self.y) - self.epsilon))
#         self.loss = tf.reduce_mean(tf.maximum(0., tf.abs(self.y_pred - self.y) - self.epsilon))
        
        opt = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        opt_op = opt.minimize(self.loss)

        self.sess.run(tf.global_variables_initializer())
        
        for i in range(epochs):
            loss = self.sess.run(
                self.loss, 
                {
                    self.X: X,
                    self.y: y
                }
            )
            print("{}/{}: loss: {}".format(i + 1, epochs, loss))
            
            self.sess.run(
                opt_op, 
                {
                    self.X: X,
                    self.y: y
                }
            )
            
        return self
            
    def predict(self, X, y=None):
        if len(X.shape) == 1:
            X = X.reshape(-1, 1)
            
        y_pred = self.sess.run(
            self.y_pred, 
            {
                self.X: X #self.Xはスペースホルダーだから、値を入れなおさなきゃいけない
            }
        )
        return y_pred
    
    def coef_(self):
        result_w = self.sess.run(self.W)
        
        return result_w
    
    def intercept_(self):
        result_b = self.sess.run(self.b)
        
        return result_b
    
    def score(self, X, y):
        if len(X.shape) == 1:
            X = X.reshape(-1, 1)
        if len(y.shape) == 1:
            y = y.reshape(-1, 1)
        
        y_pred = self.sess.run(
            self.y_pred, 
            {
                self.X: X #self.Xはスペースホルダーだから、値を入れなおさなきゃいけない
            }
        )

        u = ((y_pred - y)**2).sum()
        v = ((y - y.mean())**2).sum()
        R2 = 1 - u/v
        
        return R2

In [119]:
X_SVR = df[df.columns[df.columns != 'cnt']].values
X_SVR_test = df_test.values

In [120]:
X_SVR.shape

(8645, 19)

In [121]:
model_SVR = SVR(epsilon=5, C=1)

In [122]:
model_SVR.fit(X_SVR, y, epochs = 2000, learning_rate = 0.1)

AttributeError: module 'tensorflow' has no attribute 'Session'

In [123]:
print(model_SVR.intercept_())
print(model_SVR.coef_())

AttributeError: 'SVR' object has no attribute 'sess'

In [124]:
print(model_SVR.predict(X_SVR_test).max())
print(model_SVR.predict(X_SVR_test).mean())
print(model_SVR.predict(X_SVR_test).min())

AttributeError: 'SVR' object has no attribute 'sess'

In [125]:
df_submit = pd.read_table('test.tsv')
df_submit['cnt'] = pd.DataFrame(model_SVR.predict(X_SVR_test).reshape(8734,1))
df_submit = df_submit[['id', 'cnt']]

AttributeError: 'SVR' object has no attribute 'sess'

In [126]:
df_submit.to_csv('submit2.csv', header=False, index=False)

## 線形回帰

In [127]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

lr.fit(X_SVR, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [128]:
print('coefficient = ', lr.coef_) # 説明変数の係数を出力
print('intercept = ', lr.intercept_) # 切片を出力

coefficient =  [[-1.50046383e+01  1.49890296e+02  4.91038616e+01 -5.67506111e+01
  -2.86684279e+01 -1.40425847e+15 -1.40425847e+15 -1.40425847e+15
  -1.40425847e+15 -2.16265879e+14 -2.16265879e+14 -2.16265879e+14
  -2.16265879e+14 -1.81225245e+14 -1.81225245e+14 -1.81225245e+14
  -1.81225245e+14 -1.81225245e+14 -1.81225245e+14]]
intercept =  [1.80174959e+15]


In [129]:
Y_pred = lr.predict(X_SVR_test) # 検証データを用いて目的変数を予測
Y_pred

array([[-13.  ],
       [-18.25],
       [-26.  ],
       ...,
       [ 52.25],
       [ 56.25],
       [ 51.  ]])

In [130]:
print(Y_pred.max())
print(Y_pred.mean())
print(Y_pred.min())

404.25
148.2829173345546
-209.75


In [145]:
Y_pred = lr.predict(X_SVR) 
u = (Y_pred - y)**2
v = (Y_pred - y.mean())**2
u = u.sum()
v = v.sum()
1-u/v

0.2695268162745501

## SVR(sckit-learn)

In [146]:
from sklearn.svm import SVR as SVR_sklearn

In [151]:
regr = SVR_sklearn(kernel="linear")
regr.fit(X_SVR, y)
print("切片と係数")
print(regr.intercept_)
print(regr.coef_)

  y = column_or_1d(y, warn=True)


切片と係数
[81.31338785]
[[ -5.83264081  71.58836662  63.86608004 -48.83639047  -8.83982572
  -25.64111449   6.19842648   6.60508439  12.83760362  12.62395013
    7.27018826 -18.89413839  -1.         -87.93031852 -68.10706413
   21.85623967  24.28189572 120.88830538 -10.98905812]]


In [152]:
print(regr.predict(X_SVR_test).max())
print(regr.predict(X_SVR_test).mean())
print(regr.predict(X_SVR_test).min())

337.44342751108246
125.619535966543
-85.19014341510886


In [166]:
Y_pred = regr.predict(X_SVR) 
u = (Y_pred.reshape(len(Y_pred), 1) - y)**2
v = (Y_pred.reshape(len(Y_pred), 1) - y.mean())**2
u = u.sum()
v = v.sum()
1-u/v

-0.11421599358660872