In [1]:
import pandas as pd
import numpy as np

#
import rfr_remote
import rfr_ipynb

In [2]:
def rmse_calc (traindf, testdf, output_type='none'):
    
    y_train = traindf['dft_train']
    y_test = testdf['dft_test']
    pred_train = traindf['mean_train']
    pred_test = testdf['mean_test']
        
    train_rmse, test_rmse = rfr_remote.rmse(y_train=y_train, y_test=y_test,
                                        trainpred=pred_train, testpred=pred_test)
    if output_type == 'none':
        rmse_df = pd.DataFrame(data={'train_rmse':[train_rmse], 'test_rmse':[test_rmse]})
    
    elif output_type == 'type':
        rmse_df = pd.DataFrame(data={'Type':[traindf['Type'].iloc[0]], 
                                     'train_rmse':[train_rmse], 'test_rmse':[test_rmse]})
    elif output_type == 'site':
        rmse_df = pd.DataFrame(data={'Site':[traindf['Site'].iloc[0]], 
                                     'train_rmse':[train_rmse], 'test_rmse':[test_rmse]})
    
    return rmse_df

In [3]:
def df_type(output_df):
    frames={}
    for typ, df_typ in output_df.groupby('Type'):
        frames[typ] = df_typ
    return frames

In [4]:
def pp_rmse_cal(traindf, testdf, output_type='none'):
    if output_type == 'none':
        rmse_df = rmse_calc(traindf, testdf, output_type='none')
        
        return rmse_df
    
    if output_type == 'type':
        
        train_type_frames = df_type(traindf)
        test_type_frames = df_type(testdf)
        
        type_list = []
        for typ in ['II-VI', 'III-V', 'IV-IV']:
            rmse_df = rmse_calc(train_type_frames[typ], test_type_frames[typ], output_type='type')
            type_list.append(rmse_df)
            
        type_df = pd.concat(type_list)
        
        return type_df

In [12]:
fe_og_descrip = pd.read_csv('./xiaofeng_lasso/210929_originaldescriptors/inlier_KNN_FE.csv')
#FE_og_descrip

In [13]:
tl_og_descrip = pd.read_csv('./xiaofeng_lasso/210929_originaldescriptors/inlier_KNN_TL.csv')

# dHA

In [16]:
dHA_traindf, dHA_testdf = \
    rfr_remote.rfr_pp_predictor(fe_og_descrip, o=4, d_start=6, max_depth=10, max_feat='auto', 
                                min_samp_leaf=2, min_samples_split=15, num_trees=600,
                                folds=5, label_type=0, label_site=2)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [17]:
dHA_ogd_rmse = pp_rmse_cal(dHA_traindf, dHA_testdf, output_type='none')
dHA_ogd_rmse

Unnamed: 0,train_rmse,test_rmse
0,0.723265,1.114461


# dHB

In [19]:
dHB_traindf, dHB_testdf = \
    rfr_remote.rfr_pp_predictor(fe_og_descrip, o=5, d_start=6, max_depth=10, max_feat='auto', 
                                min_samp_leaf=2, min_samples_split=15, num_trees=600,
                                folds=5, label_type=0, label_site=2)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [20]:
dHB_ogd_rmse = pp_rmse_cal(dHB_traindf, dHB_testdf, output_type='none')
dHB_ogd_rmse

Unnamed: 0,train_rmse,test_rmse
0,0.788857,1.211009


# (+3, +2)

In [25]:
p32_traindf, p32_testdf = \
    rfr_remote.rfr_pp_predictor(tl_og_descrip, o=4, d_start=10, max_depth=7, max_feat='auto', 
                                min_samp_leaf=3, min_samples_split=3,
                                num_trees=450, folds=5, label_type=0, label_site=2)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [26]:
p32_ogd_rmse = pp_rmse_cal(p32_traindf, p32_testdf, output_type='none')
p32_ogd_rmse

Unnamed: 0,train_rmse,test_rmse
0,0.230062,0.341668


# (+2, +1)

In [27]:
p21_traindf, p21_testdf = \
    rfr_remote.rfr_pp_predictor(tl_og_descrip, o=5, d_start=10, max_depth=9, max_feat='auto', 
                                min_samp_leaf=7, min_samples_split=2,
                                num_trees=600, folds=5, label_type=0, label_site=2)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [28]:
p21_ogd_rmse = pp_rmse_cal(p21_traindf, p21_testdf, output_type='none')
p21_ogd_rmse

Unnamed: 0,train_rmse,test_rmse
0,0.292671,0.413341


# (+1, 0)

In [29]:
p01_traindf, p01_testdf = \
    rfr_remote.rfr_pp_predictor(tl_og_descrip, o=6, d_start=10, max_depth=10, max_feat='auto', 
                                min_samp_leaf=8, min_samples_split=12,
                                num_trees=150, folds=5, label_type=0, label_site=2)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [30]:
p01_ogd_rmse = pp_rmse_cal(p01_traindf, p01_testdf, output_type='none')
p01_ogd_rmse

Unnamed: 0,train_rmse,test_rmse
0,0.287344,0.397241


# (0, -1)

In [31]:
m01_traindf, m01_testdf = \
    rfr_remote.rfr_pp_predictor(tl_og_descrip, o=7, d_start=10, max_depth=8, max_feat='auto', 
                                min_samp_leaf=5, min_samples_split=3,
                                num_trees=150, folds=5, label_type=0, label_site=2)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [32]:
m01_ogd_rmse = pp_rmse_cal(m01_traindf, m01_testdf, output_type='none')
m01_ogd_rmse

Unnamed: 0,train_rmse,test_rmse
0,0.231541,0.352672


# (-1, -2)

In [34]:
m12_traindf, m12_testdf = \
    rfr_remote.rfr_pp_predictor(tl_og_descrip, o=8, d_start=10, max_depth=10, max_feat='auto', 
                                min_samp_leaf=8, min_samples_split=12,
                                num_trees=750, folds=5, label_type=0, label_site=2)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [35]:
m12_ogd_rmse = pp_rmse_cal(m12_traindf, m12_testdf, output_type='none')
m12_ogd_rmse

Unnamed: 0,train_rmse,test_rmse
0,0.234708,0.318384


# (-2, -3)

In [36]:
m23_traindf, m23_testdf = \
    rfr_remote.rfr_pp_predictor(tl_og_descrip, o=9, d_start=10, max_depth=10, max_feat='sqrt', 
                                min_samp_leaf=2, min_samples_split=14,
                                num_trees=450, folds=5, label_type=0, label_site=2)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [37]:
m23_ogd_rmse = pp_rmse_cal(m23_traindf, m23_testdf, output_type='none')
m23_ogd_rmse

Unnamed: 0,train_rmse,test_rmse
0,0.185758,0.253446
