In [17]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LassoCV
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import RobustScaler
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, InputLayer
from keras.wrappers.scikit_learn import KerasRegressor

from managing_utils import *

In [85]:
train = pd.read_csv('./robot_data/train_data.csv')
test = pd.read_csv('./robot_data/test_data.csv')

num_train = len(train)

In [86]:
year_test = test['year']

In [87]:
train = train.sample(frac=1).reset_index(drop=True)

In [88]:
y_train = train['target']

In [89]:
test = test.drop(columns=['year', 'target'])
train = train.drop(columns=['year', 'target'])

In [90]:
train = simple_encode(train.copy())
test = simple_encode(test.copy())

In [91]:
df_all = pd.concat([train, test])

In [92]:
df_all.head()

Unnamed: 0,robot_gear_compression_diff_1,weapon_robot_armour_index_2,robot_gear_compression_diff_3,robot_gear_compression_diff_4,weapon_robot_punch_right_1,robot_gear_compression_diff_6,robot_gear_compression_diff_7,robot_gear_compression_diff_8,robot_gear_compression_diff_9,robot_gear_compression_diff_10,...,weapon_robot_eye_laser_sensor_4,robot_probe_temperature_5,robot_probe_temperature_6,robot_probe_temperature_7,robot_probe_temperature_8,robot_probe_temperature_9,weapon_robot_eye_laser_range_1,weapon_robot_punch_left_4,weapon_robot_punch_left_2,gamma_ray
0,-1.535539,-1.85312,-0.386286,15.337403,-6.841122,-2.833555,-6.874889,-5.741406,18.481229,-6.248703,...,0.813811,5.023002,5.236148,-0.561166,2.946505,1.35867,1.173954,5.531442,-6.35684,0.75
1,14.948409,14.917733,16.049968,17.242078,18.633408,11.171869,16.518522,13.288183,-14.432651,17.926551,...,0.528241,7.851436,2.820153,-0.528179,3.099115,2.608178,1.862051,-10.015326,-89.09539,0.5
2,-12.501167,-13.72929,-13.163379,-6.148563,3.606355,-8.416955,-14.73831,-13.636401,20.852301,-22.842443,...,0.813811,8.584943,3.632918,2.358821,3.11759,1.777423,1.496122,3.991699,-11.50771,0.75
3,12.250339,11.463944,15.31075,41.136878,16.249233,11.594227,17.027518,18.362356,46.516629,18.386935,...,0.813811,7.087089,6.010736,1.685909,3.312646,2.181635,1.899507,-3.439233,-14.840626,0.5
4,-4.141261,-5.294652,-2.738368,18.563424,-14.707739,-3.382031,-7.965869,-2.841373,41.201553,-10.566035,...,0.813811,4.681417,3.719504,3.185915,2.946785,2.473498,1.691893,5.330931,-5.664286,0.75


In [93]:
scaler = RobustScaler()

df_all = scaler.fit_transform(df_all)

In [94]:
train = df_all[:num_train]
test = df_all[num_train:]

In [95]:
sfm = SelectFromModel(LassoCV())
sfm.fit(train, y_train)
train = sfm.transform(train)
test = sfm.transform(test)



In [96]:
df = pd.read_csv('./robot_data/train_data.csv')
df = df.drop(columns=['year', 'target'])
feature_idx = sfm.get_support()
feature_name = df.columns[feature_idx]
feature_name

Index(['weapon_robot_punch_right_1', 'robot_gear_circulation_12',
       'weapon_robot_gun_power_3', 'robot_gear_temperature_11',
       'robotic_circuits_speed_12', 'robot_engine_speed_13',
       'robot_engine_speed_15', 'robot_engine_circulation_6',
       'robot_engine_circulation_7', 'robot_probe_circulation_6',
       'robot_probe_circulation_7', 'robot_probe_temperature_8'],
      dtype='object')

In [97]:
'Number of features: %d' % train.shape[1]

'Number of features: 12'

In [98]:
def create_model(neurons=20):
    model = Sequential()
    model.add(InputLayer(input_shape=(train.shape[1],)))
    model.add(Dense(neurons, activation='tanh'))
    model.add(Dense(neurons, activation='tanh'))
    model.add(Dense(neurons, activation='tanh'))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mean_squared_error', optimizer='nadam', metrics=[coeff_determination])
    return model

In [73]:
model = KerasRegressor(build_fn=create_model, epochs=75, verbose=0)

In [74]:
gsc = GridSearchCV(
    estimator=model,
    param_grid={
        'neurons': range(20, 80, 4)
    },
    scoring='neg_mean_squared_error',
    cv=5
)

In [75]:
grid_result = gsc.fit(train, y_train)



In [76]:
"Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)

"Best: -70.577451 using {'neurons': 68}"

In [77]:
for test_mean, test_stdev, train_mean, train_stdev, param in zip(
        grid_result.cv_results_['mean_test_score'],
        grid_result.cv_results_['std_test_score'],
        grid_result.cv_results_['mean_train_score'],
        grid_result.cv_results_['std_train_score'],
        grid_result.cv_results_['params']):
    print("Train: %f (%f) // Test : %f (%f) with: %r" % (train_mean, train_stdev, test_mean, test_stdev, param))
    

Train: -103.080701 (5.660198) // Test : -131.558517 (16.107492) with: {'neurons': 20}
Train: -86.836912 (3.779908) // Test : -120.941684 (24.246797) with: {'neurons': 24}
Train: -70.340725 (5.061097) // Test : -101.954682 (17.944758) with: {'neurons': 28}
Train: -60.397982 (6.530301) // Test : -93.874045 (16.440557) with: {'neurons': 32}
Train: -51.584305 (1.962680) // Test : -84.141915 (18.102621) with: {'neurons': 36}
Train: -47.013261 (4.423008) // Test : -87.519279 (28.338121) with: {'neurons': 40}
Train: -41.354478 (2.382907) // Test : -86.649531 (21.989518) with: {'neurons': 44}
Train: -37.867747 (1.732104) // Test : -78.352130 (28.353550) with: {'neurons': 48}
Train: -38.528453 (5.611626) // Test : -86.148180 (27.219925) with: {'neurons': 52}
Train: -34.632422 (4.769129) // Test : -74.180238 (27.190082) with: {'neurons': 56}
Train: -35.686812 (11.688889) // Test : -72.775210 (31.389504) with: {'neurons': 60}
Train: -37.867366 (14.382463) // Test : -92.374571 (30.146171) with: {'



In [99]:
model = create_model(**grid_result.best_params_)

In [None]:
model.fit(train, y_train, epochs=75, verbose=2)

In [101]:
y_test = model.predict(test).flatten()

In [102]:
df_sub = pd.DataFrame({'year': year_test, 'target': y_test})
df_sub.to_csv('./submissions/subm_lasso_02.csv', index=False)