In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LassoCV
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import RobustScaler
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, InputLayer
from keras.wrappers.scikit_learn import KerasRegressor

from managing_utils import *

Using TensorFlow backend.


In [61]:
train = pd.read_csv('./robot_data/train_data.csv')
test = pd.read_csv('./robot_data/test_data.csv')

num_train = len(train)

In [62]:
year_test = test['year']

In [63]:
train = train.sample(frac=1).reset_index(drop=True)

In [64]:
y_train = train['target']

In [65]:
test = test.drop(columns=['year', 'target'])
train = train.drop(columns=['year', 'target'])

In [66]:
train = simple_encode(train.copy())
test = simple_encode(test.copy())

In [67]:
df_all = pd.concat([train, test])

In [68]:
df_all.head()

Unnamed: 0,robot_gear_compression_diff_1,weapon_robot_armour_index_2,robot_gear_compression_diff_3,robot_gear_compression_diff_4,weapon_robot_punch_right_1,robot_gear_compression_diff_6,robot_gear_compression_diff_7,robot_gear_compression_diff_8,robot_gear_compression_diff_9,robot_gear_compression_diff_10,...,weapon_robot_eye_laser_sensor_4,robot_probe_temperature_5,robot_probe_temperature_6,robot_probe_temperature_7,robot_probe_temperature_8,robot_probe_temperature_9,weapon_robot_eye_laser_range_1,weapon_robot_punch_left_4,weapon_robot_punch_left_2,gamma_ray
0,14.77864,15.137062,16.059129,9.700867,17.306024,11.575922,19.362588,13.624536,-15.913761,21.324387,...,0.385419,-9.609263,-6.156877,2.542281,3.088324,1.28631,1.11037,-3.019102,-27.090173,0.25
1,3.630786,3.17774,1.912224,-1.527513,-5.789436,3.889655,3.070223,2.172617,-18.060239,-0.793423,...,0.385419,7.646213,4.227676,1.160998,2.992417,0.683194,0.740192,-0.647793,-3.94622,0.5
2,-18.021881,-17.889957,-30.008182,-86.361924,-47.201019,-15.785587,-27.947707,-25.795492,-97.607635,-34.821692,...,0.385419,7.34176,1.817519,1.612755,2.945107,1.738819,1.512082,-0.647793,-34.318704,0.5
3,-9.425478,-10.598927,-9.973356,-2.208846,-22.276067,-7.360231,-14.095744,-7.314123,16.365859,-21.135145,...,0.528241,6.842155,5.360223,-0.095677,3.1115,1.666459,1.050261,8.203714,13.207808,0.75
4,0.429388,0.711653,-2.661736,-12.004468,-5.32156,1.316416,-1.274352,-1.458662,-31.296882,-6.629988,...,0.385419,7.94643,4.152772,1.31267,3.117873,1.241301,1.421854,-0.647793,-4.945096,0.5


In [69]:
scaler = RobustScaler()

df_all = scaler.fit_transform(df_all)

In [70]:
train = df_all[:num_train]
test = df_all[num_train:]

In [50]:
sfm = SelectFromModel(LassoCV(cv=5), threshold=1e-07)
sfm.fit(train, y_train)
train = sfm.transform(train)
test = sfm.transform(test)

In [51]:
sfm.threshold_

1e-07

In [52]:
df = pd.read_csv('./robot_data/train_data.csv')
df = df.drop(columns=['year', 'target'])
feature_idx = sfm.get_support()
feature_name = df.columns[feature_idx]
feature_name

Index(['weapon_robot_punch_right_1', 'robot_gear_circulation_12',
       'weapon_robot_gun_power_3', 'robot_gear_temperature_11',
       'robotic_circuits_speed_12', 'robot_engine_speed_13',
       'robot_engine_speed_15', 'robot_engine_circulation_6',
       'robot_engine_circulation_7', 'robot_probe_circulation_6',
       'robot_probe_circulation_7', 'robot_probe_temperature_8'],
      dtype='object')

In [53]:
'Number of features: %d' % train.shape[1]

'Number of features: 12'

In [98]:
def create_model(neurons=20):
    model = Sequential()
    model.add(InputLayer(input_shape=(train.shape[1],)))
    model.add(Dense(neurons, activation='tanh'))
    model.add(Dense(neurons, activation='tanh'))
    model.add(Dense(neurons, activation='tanh'))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mean_squared_error', optimizer='nadam', metrics=[coeff_determination])
    return model

In [73]:
model = KerasRegressor(build_fn=create_model, epochs=75, verbose=0)

In [74]:
gsc = GridSearchCV(
    estimator=model,
    param_grid={
        'neurons': range(20, 80, 4)
    },
    scoring='neg_mean_squared_error',
    cv=5
)

In [75]:
grid_result = gsc.fit(train, y_train)



In [76]:
"Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)

"Best: -70.577451 using {'neurons': 68}"

In [77]:
for test_mean, test_stdev, train_mean, train_stdev, param in zip(
        grid_result.cv_results_['mean_test_score'],
        grid_result.cv_results_['std_test_score'],
        grid_result.cv_results_['mean_train_score'],
        grid_result.cv_results_['std_train_score'],
        grid_result.cv_results_['params']):
    print("Train: %f (%f) // Test : %f (%f) with: %r" % (train_mean, train_stdev, test_mean, test_stdev, param))
    

Train: -103.080701 (5.660198) // Test : -131.558517 (16.107492) with: {'neurons': 20}
Train: -86.836912 (3.779908) // Test : -120.941684 (24.246797) with: {'neurons': 24}
Train: -70.340725 (5.061097) // Test : -101.954682 (17.944758) with: {'neurons': 28}
Train: -60.397982 (6.530301) // Test : -93.874045 (16.440557) with: {'neurons': 32}
Train: -51.584305 (1.962680) // Test : -84.141915 (18.102621) with: {'neurons': 36}
Train: -47.013261 (4.423008) // Test : -87.519279 (28.338121) with: {'neurons': 40}
Train: -41.354478 (2.382907) // Test : -86.649531 (21.989518) with: {'neurons': 44}
Train: -37.867747 (1.732104) // Test : -78.352130 (28.353550) with: {'neurons': 48}
Train: -38.528453 (5.611626) // Test : -86.148180 (27.219925) with: {'neurons': 52}
Train: -34.632422 (4.769129) // Test : -74.180238 (27.190082) with: {'neurons': 56}
Train: -35.686812 (11.688889) // Test : -72.775210 (31.389504) with: {'neurons': 60}
Train: -37.867366 (14.382463) // Test : -92.374571 (30.146171) with: {'



In [99]:
model = create_model(**grid_result.best_params_)

In [None]:
model.fit(train, y_train, epochs=75, verbose=2)

In [101]:
y_test = model.predict(test).flatten()

In [102]:
df_sub = pd.DataFrame({'year': year_test, 'target': y_test})
df_sub.to_csv('./submissions/subm_lasso_02.csv', index=False)

# keras model selection 

In [54]:
#  Модель из которой мы будем извлекать фичи 
def create_model_feature(layers=1, neurons=20):
    model = Sequential()
    model.add(InputLayer(input_shape=(train.shape[1],)))
    
    for _ in range(layers):
        model.add(Dense(neurons, activation='tanh'))
    
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mean_squared_error', optimizer='nadam', metrics=[coeff_determination])
    return model

In [56]:
model = KerasRegressor(build_fn=create_model_feature, epochs=100, verbose=0)

In [71]:
gsc = GridSearchCV(
    estimator=model,
    param_grid={
        'neurons': range(20, 100, 10),
        'layers': range(1, 6)
    },
    scoring='r2',
    cv=3
)

In [72]:
grid_result = gsc.fit(train, y_train)

In [73]:
"Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)

"Best: 0.882499 using {'layers': 3, 'neurons': 90}"

In [74]:
for test_mean, test_stdev, train_mean, train_stdev, param in zip(
        grid_result.cv_results_['mean_test_score'],
        grid_result.cv_results_['std_test_score'],
        grid_result.cv_results_['mean_train_score'],
        grid_result.cv_results_['std_train_score'],
        grid_result.cv_results_['params']):
    print("Train: %f (%f) // Test : %f (%f) with: %r" % (train_mean, train_stdev, test_mean, test_stdev, param))
    

Train: 0.712372 (0.013856) // Test : 0.668258 (0.007072) with: {'layers': 1, 'neurons': 20}
Train: 0.785631 (0.021693) // Test : 0.722505 (0.040267) with: {'layers': 1, 'neurons': 30}
Train: 0.843286 (0.035281) // Test : 0.767591 (0.031533) with: {'layers': 1, 'neurons': 40}
Train: 0.874177 (0.010242) // Test : 0.799726 (0.007777) with: {'layers': 1, 'neurons': 50}
Train: 0.892483 (0.013242) // Test : 0.813930 (0.006813) with: {'layers': 1, 'neurons': 60}
Train: 0.915863 (0.004796) // Test : 0.837334 (0.008430) with: {'layers': 1, 'neurons': 70}
Train: 0.935666 (0.002174) // Test : 0.856807 (0.015428) with: {'layers': 1, 'neurons': 80}
Train: 0.937212 (0.006403) // Test : 0.860044 (0.010010) with: {'layers': 1, 'neurons': 90}
Train: 0.832787 (0.003232) // Test : 0.768517 (0.009593) with: {'layers': 2, 'neurons': 20}
Train: 0.908375 (0.009859) // Test : 0.834448 (0.003437) with: {'layers': 2, 'neurons': 30}
Train: 0.928676 (0.012068) // Test : 0.852672 (0.016290) with: {'layers': 2, 'ne



In [81]:
model = create_model_feature(**grid_result.best_params_)

In [85]:
model.fit(train, y_train, epochs=3, verbose=2, validation_split=0.2)

Train on 2610 samples, validate on 653 samples
Epoch 1/3
 - 0s - loss: 13.9756 - coeff_determination: 0.9344 - val_loss: 28.7585 - val_coeff_determination: 0.8880
Epoch 2/3
 - 0s - loss: 13.9777 - coeff_determination: 0.9389 - val_loss: 29.0937 - val_coeff_determination: 0.8864
Epoch 3/3
 - 0s - loss: 13.9917 - coeff_determination: 0.9386 - val_loss: 28.5713 - val_coeff_determination: 0.8879


<keras.callbacks.History at 0x1a6f39ed30>

In [86]:
y_test = model.predict(test).flatten()

In [87]:
df_sub = pd.DataFrame({'year': year_test, 'target': y_test})
df_sub.to_csv('./submissions/subm_keras_features_01.csv', index=False)

# hmmmmmmm

In [113]:
test = pd.read_csv('./robot_data/test_data.csv')
test = test.drop(columns=['year', 'target'])
test = simple_encode(test)

In [114]:
from keras.models import load_model

In [125]:
# 940_simple looks like right model 
model = load_model('models/model_979_simple.h5', custom_objects={'coeff_determination': coeff_determination})

In [126]:
y_test = model.predict(test).flatten()

In [127]:
df_sub = pd.DataFrame({'year': year_test, 'target': y_test})
df_sub.to_csv('./test.csv', index=False)

In [128]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 128)               18560     
_________________________________________________________________
dense_14 (Dense)             (None, 256)               33024     
_________________________________________________________________
dense_15 (Dense)             (None, 256)               65792     
_________________________________________________________________
dense_16 (Dense)             (None, 256)               65792     
_________________________________________________________________
dense_17 (Dense)             (None, 256)               65792     
_________________________________________________________________
dense_18 (Dense)             (None, 1)                 257       
Total params: 249,217
Trainable params: 249,217
Non-trainable params: 0
_________________________________________________________________
