In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import numpy as np
import pandas as pd
import sklearn
import collections
from functools import partial
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from hyperopt import fmin, tpe, hp, rand, Trials, space_eval

In [3]:
def impute_values(data):
    for c in data.columns:
        data[c].replace([np.inf, -np.inf], np.nan, inplace = True)
    for c in data.columns:
        data[c].fillna(value = data[c].mean(), inplace = True)
    return data

def rf_objective(parameter, feature, label):
    loss = -cross_val_score(RandomForestRegressor(**parameter, n_jobs = -1),
                            feature, label, cv = 5, scoring = 'neg_mean_absolute_error').mean()
    return loss

In [4]:
rf_parameter = {"n_estimators": hp.choice('n_estimators', np.linspace(10, 200, num = 20, dtype = np.int32)),
            "max_depth": hp.choice('max_depth', np.linspace(1, 20, num = 20)),
            'min_samples_leaf': hp.choice('min_samples_leaf', np.linspace(2, 30, num = 25, dtype = np.int32))}

targets = ['CIEX', 'CIEY', 'CIEX_DIFF', 'CIEY_DIFF']

In [5]:
data_path = '/content/drive/My Drive/project/smart_manufacturing/csv_file'
train = pd.read_csv(os.path.join(data_path, 'encode_train_features.csv'))
test = pd.read_csv(os.path.join(data_path, 'encode_test_features.csv'))
train.drop(['Unnamed: 0', 'P_ID'], axis = 1, inplace = True)
test.drop(['Unnamed: 0', 'P_ID'], axis = 1, inplace = True)
train = impute_values(train)
test = impute_values(test)
target = np.load('/content/drive/My Drive/project/smart_manufacturing/npy_file/target_array.npy')

In [6]:
predict_data = np.zeros((5723, 1))
for i in range(4):
    objective = partial(rf_objective, feature = np.array(train), label = target[:,i])
    min_objective = fmin(fn = objective, space = rf_parameter, algo = tpe.suggest, max_evals = 15, trials = Trials())
    values = space_eval(rf_parameter ,min_objective).values()
    best_parameter = np.int32(list(values))
    print('Target is : ', targets[i])
    print("best rf estimate parameters" , space_eval(rf_parameter, min_objective))
    print("=============================")
    best_rf = RandomForestRegressor(n_estimators = best_parameter[2], 
                                    max_depth = best_parameter[0], 
                                    min_samples_leaf =  best_parameter[1], 
                                    n_jobs = -1)
    # 訓練
    best_rf.fit(np.array(train), target[:,i])
    # 預測
    predict = best_rf.predict(test)
    predict = np.reshape(predict, (5723, 1))
    predict_data = np.concatenate((predict_data, predict), axis = 1)

predict_data = predict_data[:,1:]
predict_dataframe = pd.DataFrame(predict_data, columns = targets)

100%|██████████| 15/15 [02:20<00:00,  9.37s/it, best loss: 0.017718137995541675]
Target is :  CIEX
best rf estimate parameters {'max_depth': 6.0, 'min_samples_leaf': 10, 'n_estimators': 170}
100%|██████████| 15/15 [03:08<00:00, 12.57s/it, best loss: 0.017644075428174748]
Target is :  CIEY
best rf estimate parameters {'max_depth': 18.0, 'min_samples_leaf': 2, 'n_estimators': 80}
100%|██████████| 15/15 [02:36<00:00, 10.42s/it, best loss: 0.003908837753637767]
Target is :  CIEX_DIFF
best rf estimate parameters {'max_depth': 7.0, 'min_samples_leaf': 28, 'n_estimators': 200}
100%|██████████| 15/15 [03:16<00:00, 13.07s/it, best loss: 0.0039326756233675885]
Target is :  CIEY_DIFF
best rf estimate parameters {'max_depth': 20.0, 'min_samples_leaf': 28, 'n_estimators': 100}


In [10]:
df = pd.read_csv('/content/drive/My Drive/project/smart_manufacturing/data/submission.csv')
submmit = pd.concat([df['id'], predict_dataframe], axis = 1)
submmit

Unnamed: 0,id,CIEX,CIEY,CIEX_DIFF,CIEY_DIFF
0,2,3.136910,2.160318,0.007365,0.008398
1,4,3.136938,2.159338,0.008269,0.007842
2,5,3.136972,2.160718,0.009126,0.008346
3,7,3.136982,2.159494,0.008139,0.008429
4,10,3.136853,2.158966,0.007041,0.008279
...,...,...,...,...,...
5718,10674,0.732452,4.751103,0.004629,0.004705
5719,10675,0.732392,4.751425,0.004085,0.003483
5720,10676,0.732348,4.751731,0.003650,0.003048
5721,10680,0.732372,4.751463,0.003758,0.003008


In [None]:
submmit.to_csv('/home/motionlab/Desktop/weitai/project/test result/second_trial.csv', index = False)