In [15]:
import pandas as pd
import csv
import matplotlib.pyplot as plt
import numpy as np 
from ipykernel import kernelapp as app
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV, KFold,cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import optuna
import sklearn
from sklearn import datasets
from sklearn.svm import SVR
import optuna 
from optkeras.optkeras import OptKeras
import optkeras

df = pd.read_csv('10data.csv')
data = df.values
X_features = data[:,0:-1]
Y_properties = data[:,-1]

# standard
mean = Y_properties.mean()
scale = 10**int(np.log10(mean))
Y_properties /= scale
mean = Y_properties.mean()
std = Y_properties.std()
Zs = (Y_properties-mean)/std
mask = (Zs<3) & (Zs>-3)

X_features = X_features[mask.ravel(),:]
Y_properties = Y_properties[mask].ravel()

X_train,X_test,Y_train,Y_test = train_test_split(X_features,Y_properties,test_size=0.2, random_state=33)

kfold = KFold(n_splits=6, shuffle=True,random_state=42)

model = input('svm or rf choose   ')

if (model == 'rf'):

    def objective(trial):
        max_depth = trial.suggest_int("max_depth", 2, 32)
        list_trees = [10,15,25, 50,75,100,125,150,175,200]
        n_estimators = trial.suggest_categorical('n_estimators', list_trees)
        max_features = trial.suggest_uniform('max_features', 0.15, 1.0)
        min_samples_split = trial.suggest_int('min_samples_split', 2, 14)
        min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 14)
        max_samples = trial.suggest_uniform('max_samples', 0.6, 0.99)
        
        
        classifier_obj = RandomForestRegressor(n_estimators= n_estimators,
                                         max_features= max_features,
#                                            min_samples_split=rf_min_samples_split,
                                           min_samples_leaf=min_samples_leaf, 
                                           max_depth= max_depth, bootstrap=True, n_jobs=-1, verbose=0)
    
        mean_cv_score = cross_val_score(classifier_obj, X_train,Y_train, scoring='r2',cv=kfold, n_jobs=-1).mean()
        return mean_cv_score

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials= 5)
    # print('Number of finished trials:', len(study.trials))
    # print('Best trial:', study.best_trial.params)
    params = study.best_trial.params

    cvscores_rf = []
    scores_train_rf = []
    scores_test_rf = []
    for train, test in kfold.split(X_train, Y_train):
        np.random.seed(5)
        model_rf = RandomForestRegressor(**params)
        model_rf.fit(X_train[train],Y_train[train])
        scores2 = r2_score(model_rf.predict(X_train[train]),Y_train[train])
    
        scores = r2_score(model_rf.predict(X_train[test]),Y_train[test])
    
        y_test_pred = model_rf.predict(X_test)
        scores3 = r2_score(model_rf.predict(X_test),Y_test)
    
    #     print(model_rf.get_params)
        scores_train_rf.append(scores2)
        cvscores_rf.append(scores)
        scores_test_rf.append(scores3)
    
    
        heatmap, xedges, yedges = np.histogram2d(Y_test, y_test_pred, bins=40,density=1)
        extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
        from matplotlib import cm
        from matplotlib.colors import ListedColormap
        ocean = cm.get_cmap('gist_heat', 256)
        c_data = ocean(np.linspace(0, 1, 256))
        mycm = ListedColormap(c_data[::-1,:])
        plt.figure(figsize=(5,5))
        plt.clf()
        plt.plot(np.arange(9), np.arange(9), color='grey', linestyle='dashed')
        plt.imshow(heatmap.T, extent=extent, origin='lower',cmap = mycm)
        plt.xlim(2,8)
        plt.ylim(2,8)
        plt.xlabel("Real Values")
        plt.ylabel("Predicted Value")
        plt.title("Random Forest")
        plt.show()


    
    
    print(np.array(scores_train_rf))
    print(np.array(cvscores_rf))
    print(np.array(scores_test_rf))
    
if (model == 'svm'):
    
    def objective(trial):

        C = trial.suggest_uniform('C', 0.01, 20)
        gamma = trial.suggest_uniform('gamma', 0.0001,0.1)
        
        modelsv = SVR(kernel='rbf', C=C, gamma= gamma)

        mean_cv_scoresv = cross_val_score(modelsv, X_train,Y_train, scoring='r2', 
                                        cv=kfold, n_jobs=-1).mean()
        return mean_cv_scoresv
    
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials= 5)
    # print('Number of finished trials:', len(study.trials))
    # print('Best trial:', study.best_trial.params)
    params = study.best_trial.params

    
    cvscores = []
    scores_train = []
    scores_test = []
    for train, test in kfold.split(X_train, Y_train):
        np.random.seed(5)
        model = SVR(**params)
        model.fit(X_train[train],Y_train[train])
        scores2 = r2_score(model.predict(X_train[train]),Y_train[train])
    
        scores = r2_score(model.predict(X_train[test]),Y_train[test])
    
        y_test_pred = model.predict(X_test)
        scores3 = r2_score(model.predict(X_test),Y_test)
    
    #     print(model_rf.get_params)
        scores_train.append(scores2)
        cvscores.append(scores)
        scores_test.append(scores3)
    
    
        heatmap, xedges, yedges = np.histogram2d(Y_test, y_test_pred, bins=40,density=1)
        extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
        from matplotlib import cm
        from matplotlib.colors import ListedColormap
        ocean = cm.get_cmap('gist_heat', 256)
        c_data = ocean(np.linspace(0, 1, 256))
        mycm = ListedColormap(c_data[::-1,:])
        plt.figure(figsize=(5,5))
        plt.clf()
        plt.plot(np.arange(9), np.arange(9), color='grey', linestyle='dashed')
        plt.imshow(heatmap.T, extent=extent, origin='lower',cmap = mycm)
        plt.xlim(2,8)
        plt.ylim(2,8)
        plt.xlabel("Real Values")
        plt.ylabel("Predicted Value")
        plt.title("SVM")


    
    
    print(np.array(scores_train))
    print(np.array(cvscores))
    print(np.array(scores_test))

AttributeError: 'Version' object has no attribute 'major'

AttributeError: 'Version' object has no attribute 'major'

In [11]:

import pandas as pd
import csv
import matplotlib.pyplot as plt
import numpy as np 
from ipykernel import kernelapp as app
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV, KFold,cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import optuna
import sklearn
from sklearn import datasets
from sklearn.svm import SVR
import optuna 
from optkeras.optkeras import OptKeras
import optkeras
import keras
from keras.models import Sequential
from keras.layers import Activation, Dense, Conv2D
from keras.layers import MaxPooling2D, Dropout, BatchNormalization
from optuna.integration import KerasPruningCallback
from sklearn.metrics import r2_score

df = pd.read_csv('30data.csv')
data = df.values
X_features = data[:,0:-1]
Y_properties = data[:,-1]

# standard
mean = Y_properties.mean()
scale = 10**int(np.log10(mean))
Y_properties /= scale

mean = Y_properties.mean()
std = Y_properties.std()
Zs = (Y_properties-mean)/std
mask = (Zs<3) & (Zs>-3)

X_features = X_features[mask.ravel(),:]
Y_properties = Y_properties[mask].ravel()

X_train,X_test,Y_train,Y_test = train_test_split(X_features,Y_properties,test_size=0.2, random_state=33)

kfold = KFold(n_splits=6, shuffle=True,random_state=42)

AttributeError: 'Version' object has no attribute 'major'

In [12]:
def create_model(trial):
    
    n_layers = trial.suggest_int("n_layers", 1, 5)
    model = Sequential()
    for i in range(n_layers):
        num_hidden = trial.suggest_int("n_units_l{}".format(i), 8, 16 ,1)
        kernel_initializer = trial.suggest_categorical('kernel_initializer',['he_uniform', 'random_normal'])
        model.add(Dense(num_hidden,input_dim=24, activation="relu", kernel_initializer = kernel_initializer))
        dropout = trial.suggest_float("dropout_l{}".format(i), 0.1, 0.3)
        model.add(Dropout(rate=dropout))
    model.add(Dense(1))

    # We compile our model with a sampled learning rate.
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    model.compile(
        loss='mean_squared_error',
        optimizer='adam',
        metrics=["accuracy"],
    )

    return model

x_train,x_valid ,y_train,y_valid  = train_test_split(X_train,Y_train,test_size=0.16, random_state=33)

def objective(trial):
    
    keras.backend.clear_session()
    
    model = create_model(trial)
              
    model.fit(
        x_train,
        y_train,
        batch_size= 256,
        epochs= 250,
        validation_data=(x_valid, y_valid),
        verbose=1,
    )
    
#     score = model.evaluate(x_valid, y_valid, verbose=0)
#     return score
    score = r2_score(y_valid, model.predict(x_valid))
    return score
    
    
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)
params = study.best_trial.params

cvscores = []
scores_train = []
scores_test = []
for train, test in kfold.split(X_train, Y_train):
    np.random.seed(5)
    model = model(**params)
    model.fit(X_train[train],Y_train[train])
    scores2 = r2_score(model.predict(X_train[train]),Y_train[train])
    
    scores = r2_score(model.predict(X_train[test]),Y_train[test])
    
    y_test_pred = model.predict(X_test)
    scores3 = r2_score(model.predict(X_test),Y_test)
    
    #     print(model_rf.get_params)
    scores_train.append(scores2)
    cvscores.append(scores)
    scores_test.append(scores3)
    


NameError: name 'X_train' is not defined

In [None]:
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
model = XGBRegressor(objective="reg:squarederror")
def objective(trial):
    
        #train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.15,random_state=42)
#     param = {
#             #'tree_method':'gpu_hist',  # this parameter means using the GPU when training our model to speedup the training process
#             'lambda': trial.suggest_loguniform('lambda', 1e-5, 10.0),
#             'gamma': trial.suggest_loguniform('gamma', 1e-2, 10.0),
#             'alpha': trial.suggest_loguniform('alpha', 1e-6, 10.0),
#             'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.8,0.9, 1]),
#             'subsample': trial.suggest_categorical('subsample', [0.6,0.7,0.8,0.9,1.0]),
#             'learning_rate': trial.suggest_categorical('learning_rate', [0.1,0.3, 0.5, 0.4 ,0.2]),
#             'n_estimators': 400,
#             'max_depth': trial.suggest_categorical('max_depth', [1,2,3,4,5]),
#             'random_state': trial.suggest_categorical('random_state', [24, 48,2020]),
#             'min_child_weight': trial.suggest_int('min_child_weight', 1, 300)
#             }

    param = {
            #'tree_method':'gpu_hist',  # this parameter means using the GPU when training our model to speedup the training process
            'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-4, 1.0),
            'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
            'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 1.0),
            'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0),
            'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
            'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.7,0.8,0.9, 1]),
            'subsample': trial.suggest_categorical('subsample', [0.7,0.8,0.9,1.0]),
            'eta' : trial.suggest_loguniform('eta', 1e-8, 1.0),
            'grow_policy' : trial.suggest_categorical('grow_policy', ['depthwise','lossguide']),
            'n_estimators': trial.suggest_categorical('n_estimators',[300]),
            'max_depth': trial.suggest_categorical('max_depth', [1,2,3,4,5,6,7,8,9])
            #'sample_type' : trial.suggest_categorical('sample_type', ['uniform','weighted']),
            #'rate_drop': trial.suggest_loguniform('rate_drop', 1e-8, 1.0),
            #'skip_drop': trial.suggest_loguniform('skip_drop', 1e-8, 1.0),
            #'min_child_weight': trial.suggest_int('min_child_weight', 1, 300)
            }
    model = XGBRegressor(**param)  

    model.fit(X_train,Y_train,eval_set=[(X_test,Y_test)],early_stopping_rounds=100,verbose=False)

    preds = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(Y_test, preds,squared=True))

    return rmse
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials= 10)
    # print('Number of finished trials:', len(study.trials))
# print('Best trial:', study.best_trial.params)
params = study.best_trial.params
cvscores = []
scores_train = []
scores_test = []
for train, test in kfold.split(X_train, Y_train):
    np.random.seed(5)
    model = XGBRegressor(**params)
    model.fit(X_train[train],Y_train[train])
    
    scores2 = r2_score(model.predict(X_train[train]),Y_train[train])
    
    scores = r2_score(model.predict(X_train[test]),Y_train[test])
    
    y_test_pred = model.predict(X_test)
    scores3 = r2_score(model.predict(X_test),Y_test)
    
    #     print(model_rf.get_params)
    scores_train.append(scores2)
    cvscores.append(scores)
    scores_test.append(scores3)
print(np.array(scores_train))
print(np.array(cvscores))
print(np.array(scores_test))

In [10]:
from xgboost import XGBRegressor