In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
from sklearn.inspection import permutation_importance
from sklearn import metrics
from sklearn.metrics import r2_score
from sklearn.feature_selection import VarianceThreshold
%matplotlib inline

#Defining dictionaries and number of trees (n_est) and maximum depth (maxd) in List_estim and List_depth, respectively.
List_r2test = {}
List_cv = {}
List_estim=[40,45,50,55,60,70,100,120,150,170,200,220,250,300]
List_depth = [4,5,6,7,8,9,10,11,12,15,20]

#Define Features
Features=['K_off','AATSC5i', 'MID_h', 'Xp-4d', 'Xpc-5d']

#Reading and processing of Features
data = pd.read_csv("Mordred_Features.csv",';',low_memory=False,usecols=lambda x: x in Features)
features = pd.get_dummies(data)
labels = np.array(np.log10(features['K_off']))
features = features.drop('K_off', axis = 1)
feature_list = list(features.columns)
features = np.array(features)

#**************************************************************************************************
#Sorting Features by descending variance

variance_threshold=-1
#print(f'Variance threshold: {variance_threshold}')

Feat_var = {}
for x in range(len(features[0,:])):
    List_scal = []                 #scale all features between 0 and 1
    sort = sorted(features[:,x])
    mini = sort[0]
    maxi = sort[-1]
    for i in features[:,x]:
        i_scal = (i-mini)/(maxi-mini)
        List_scal.append(i_scal)
    mean = sum(List_scal)/len(List_scal)

    List_var = []
    for i in List_scal:
        var = ((i-mean)**2)/(len(List_scal)) #calculate variance of features
        List_var.append(var)
    variance = sum(List_var)
    List_features_dummy = []
    if variance>variance_threshold:
        for j in features[:,x]:
            List_features_dummy.append(j)
        Feat_var[feature_list[x],variance]=List_features_dummy

Feat_var2 = sorted(Feat_var, key=lambda x: x[1], reverse=True) #sort features by variance
feature_list = []
features = []

for x in Feat_var2:
    feature_list.append(x[0])
    features.append(Feat_var[x])
features = np.array(features).transpose()

#**************************************************************************************************

#Define random state:
rnd=7

for n_est in List_estim:

    for maxd in List_depth:
        print(f'Number of Trees: {n_est}')
        print(f'Maxmimum Depth: {maxd}')
        print(f'Random State: {rnd}')
        print('')
        
        #Splitting of Data
        train_features, test_features, train_labels, test_labels = train_test_split(
            features, labels, test_size = 0.20, random_state = 7)


        #Random Forest Regressor
        rf = RandomForestRegressor(
            n_estimators = n_est, random_state = rnd, max_depth = maxd, max_features='log2') 
        rf.fit(train_features, train_labels)
        predictions_test = rf.predict(test_features)
        predictions_train = rf.predict(train_features)


        #Calculation of Metrics
        mae_train = metrics.mean_absolute_error(train_labels, predictions_train)
        mse_train = metrics.mean_squared_error(train_labels, predictions_train)
        rmse_train = mse_train**0.5
        r2_train = r2_score(train_labels, predictions_train)

        mae_test = metrics.mean_absolute_error(test_labels, predictions_test)
        mse_test = metrics.mean_squared_error(test_labels, predictions_test)
        rmse_test = mse_test**0.5
        r2_test = r2_score(test_labels, predictions_test)

        print('MAE (Training Set): ',mae_train)
        print('RMSE (Training Set): ',rmse_train)
        print('r2 score (Training Set): ', r2_train)
        print('')
        print('MAE (Test Set): ',mae_test)
        print('RMSE (Test Set): ',rmse_test)
        print('r2 score (Test Set): ', r2_test)


        #Calculation of Cross Validation Scores
        n_scores = cross_val_score(rf, train_features, train_labels, cv=7, scoring='r2')
        n_score = n_scores.mean()

        print('')
        print('Cross Validation Values: ',n_scores)
        print('Cross Validation Score: ',n_score)
        print('')
        print('***********************************')
        print('')

In [None]:
sorted(List_cv.items(), key=lambda x:x[1], reverse=True)