In [None]:
# Main libraries
import os
import pandas as pd
import numpy as np

# Visualization libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.express as px
#import plotly.graph_objects as go
#from plotly.subplots import make_subplots
# Classifiers and other relevant libraries
import sklearn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, scale
from sklearn.impute import SimpleImputer #, KNNImputer
from scipy.spatial.distance import cdist
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import classification_report, confusion_matrix #, plot_confusion_matrix,
from imblearn.over_sampling import SMOTE, RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
from sklearn.utils import resample
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import QuantileTransformer
from sklearn.metrics import f1_score, accuracy_score, plot_confusion_matrix
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, cross_val_score

In [None]:
pd.set_option("display.max_rows", 200, "display.max_columns", 50)
#pd.set_option('display.max_colwidth', None)
plt.style.use('bmh')
# create contants
RS=42

In [None]:
print(os.listdir('../input'))


In [None]:
signal = pd.read_csv('../input/uci-semcom/uci-secom.csv')
print(f'There are {signal.shape[0]} rows and {signal.shape[1]} columns\n')
display(signal.head())
display(signal.tail())

In [None]:
# Let's check the data types first
signal.dtypes

In [None]:
# Time stamp data in useless, so we can remove that from here
# also Pass/Fail column can be modified slightly for better clarity
signal.drop('Time',1,inplace=True)
signal.replace({-1: 0},inplace=True)
signal.rename(columns={"Pass/Fail": "Fail",},inplace=True)
# Thus, Fail ==0, means product failed, else passed
signal.head()

In [None]:
df=signal.isna().sum()*100/signal.shape[0]
fig = px.line(x=df.index, y=df,title="Percentage of missing values in all the features (data: signal)")
fig.update_xaxes(title_text= 'Features')
fig.update_yaxes(title_text= 'Percentage of Missing values',range=[0,100])
fig.show()

**FLAG 1:** There are features with large number of missing values (upto 91%), which needs to be handled

In [None]:
df=signal.isna().sum()*100/signal.shape[0]
df = df[df>5].sort_values(ascending=False)
fig = px.bar(x=df.index, 
             y = df, 
             title='Percentage of missing values per feature (with >5% NaNs), data="signal"',
             text = round(df,1))
fig.update_xaxes(title_text='Features with more than 5% missing value (sorted)',type='category')
fig.update_yaxes(title_text='Percentage of missing values')
fig.show()

**Remark:** There is big jump after 17.4% to 45.6%. Generally features with more than 35% missing data, do nto offer much value in prediction

In [None]:
df=(signal == 0).sum()*100/signal.shape[0]
fig = px.line(x=df.index, y=df,title="Percentage of zeros in all the features (data: signal)")
fig.update_xaxes(title_text= 'Features')
fig.update_yaxes(title_text= 'Percentage of zeros',range=[0,100])
fig.show()

**FLAG 2:** Large number of zeros are present. Many features have only 1 value, i.e. 0 throughout

In [None]:
df = pd.cut(signal.var().round(2),[-0.1,0,0.1,0.2,1,10,50,100,500,1000,float('inf')]).value_counts().sort_index()
df.index = df.index.map(str)

fig = px.bar(x=df.index, y=df,title="variance (rounded off to 2 decimal places) vs number of features (data: signal)", text = df)
fig.update_xaxes(title='variance intervals')
fig.update_yaxes(title='Number of features')
fig.show()

**FLAG 3:** More than 250 features have extremely low variance (<0.1), thus having minimal contribution in the output

## 1.2 Data Modification: 2: drop features with high missing values and low variance

In [None]:
# Collect features with missing values more than 30%
df = signal.isna().sum()*100/signal.shape[0]
missing_features = df[df>30].index.tolist()

# Collect features with variance less than or equal to 0.1
df = signal.drop('Fail',1).var().round(2)
low_var_features = df[df<=0.1].index.tolist()

# combine the list and remove them frm the main dataset
signal2 = signal.drop(np.unique(low_var_features + missing_features).tolist(),1)
print(f'There are {signal2.shape[0]} rows and {signal2.shape[1]} columns\n')
print(f'Features left: {round(signal2.shape[1]*100/signal.shape[1],2)}%\n')
signal2.head(10)

In [None]:
df = signal2.drop('Fail',1)
vif = pd.Series(np.linalg.inv(df.corr().values).diagonal(),index=df.columns,
          name='VIF').abs().sort_values(ascending=False).round(2)
df = pd.cut(vif.round(1),[0,1,10,50,100,500,1000,float('inf')]).value_counts().sort_index()
df.index = df.index.map(str)

fig = px.bar(x=df.index, y=df,title="vif (absolute, rounded off to 1 decimal place) vs Number of features (data: signal2)", text = df)
fig.update_xaxes(title='vif intervals')
fig.update_yaxes(title='Number of features')
fig.show()

**FLAG 4:** There are several highly multicollinear (high vif value) features. Generally vif>10 is considered as high. Let's remove these features as well

In [None]:
# Objective: To keep removing the highest vif feature one-by-one, until the highest vif is less than the limit passed
def capture_vif(df,limit):
    high_vif = []
    while 1:
        temp_vif = pd.Series(np.linalg.inv(df.corr().values).diagonal(),index=df.columns,
          name='VIF').abs().sort_values(ascending=False).round(2)
        maxi = temp_vif.max()
        if maxi>limit:
            high_vif = temp_vif[temp_vif == maxi].index.tolist()[0]
            df = df.drop(high_vif,1)
        else:
            return df
    return

## 1.3 Data Modification: 3: drop features with high multicollinearity

In [None]:
# Let's remove features with vif>10
signal3 = capture_vif(signal2,10)
print(f'There are {signal3.shape[0]} rows and {signal3.shape[1]} columns\n')
print(f'Overall Features left: {round(signal3.shape[1]*100/signal.shape[1],2)}%\n')
signal3.head(7)

In [None]:
df=((signal3 == 0).sum() + signal3.isna().sum())*100/signal3.shape[0]
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df,mode='lines'))#,name='markers'
fig.layout = dict(title = 'Percentage of zeros + NA in all the features (data: signal3)',
              xaxis= dict(title= 'Features'),
                  yaxis= dict(title= 'Percentage of zeros + NA',range=[0,100]))
iplot(fig)

In [None]:
df = signal3.drop('Fail',1).nunique()
Drop = df[df<=20]
Drop

These two are the same features, which have high zeros, as shown in the previous plotly graph

In [None]:
x = Drop.index[0]
print('Pie plot (value_count) for feature: '+x)
vc = signal3[x].value_counts().reset_index()
fig = go.Figure(data=[go.Pie(labels=vc['index'], values=vc[x])])
iplot(fig)

In [None]:
x = Drop.index[1]
print('Pie plot (value_count) plot for feature: '+x)
vc = signal3[x].value_counts().reset_index()
fig = go.Figure(data=[go.Pie(labels=vc['index'], values=vc[x])])
iplot(fig)

**FLAG 5:** These two features offer no value in terms of predicting target column.  

Let's also check if any other features is dominated by any value other than zero

In [None]:
df = signal3.apply(pd.value_counts).max()*100/signal3.shape[0]#.sort_values(ascending=False)

fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df,mode='lines'))#,name='markers'
fig.layout = dict(title = 'Frequency of most frequent element (percentage) in all the features (data: after 3rd modification)',
              xaxis= dict(title= 'Features'),
                  yaxis= dict(title= 'frequency in percentage',range=[0,100]))
iplot(fig)

Thus, other than these two, no other feature is heavily dominated by a single value

## 1.4 Data Modification: 4: drop 2 features which are not adding any value (Flag:5)

In [None]:
signal4=signal3.drop(Drop.index,1)
print(f'There are {signal4.shape[0]} rows and {signal4.shape[1]} columns\n')
print(f'Overall Features left: {round(signal4.shape[1]*100/signal.shape[1],2)}%\n')
signal4.head()

In [None]:
df = signal4.drop('Fail',1).skew()
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df,mode='lines'))#,name='markers'
fig.layout = dict(title='skewness for each feature (data:after 4th modification)',
              xaxis= dict(title= 'features'),
                  yaxis= dict(title= 'skewness'))
iplot(fig)

In [None]:
df = signal4.drop('Fail',1)
df = pd.cut(df.skew().round(1),[float('-inf'),-1,0,1,10,float('inf')]).value_counts().sort_index()
df.index = df.index.map(str)

fig = go.Figure()
fig.add_trace(go.Bar(x=df.index, y=df,text = df,textposition='auto'))
fig.layout = dict(title='skewness (rounded off to 1 decimal) vs number of features (data:after 4th modification)',
              xaxis= dict(title= 'skewness intervals'),
                  yaxis= dict(title= 'Number of features'))
iplot(fig)

**FLAG 6:** Generally, skewness of more than +1 or less than -1, is considered as high. In this case, skewness is extremely high, i.e. the distribution of many features are highly non-normal and are expected to have extreme outliers, which could affect the prediction accuracies of many classifiers

In [None]:
print('Top 5 features with highest positive skewness')
df = signal4.drop('Fail',1).skew().sort_values(ascending=False)
df[:5]

In [None]:
df = signal4.drop('Fail',1).skew().sort_values(ascending=False)
x = df.index[0]
print('Distribution of feature with highest skewness: '+x)
signal4[x].value_counts().head(10)
signal4[x].hist(bins=100, figsize=(15,3))

Thus, these features have high outliers

In [None]:
# return percentage of outliers for each numerical column
def IQR_outliers(data,limit=1.5):
    numColumns = data.select_dtypes(include=np.number).columns.tolist(); # extract list of numeric columns
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)
    IQR = Q3-Q1;
    outliers=((data[numColumns] < (Q1 - limit*IQR)) | (data[numColumns] > (Q3 + limit*IQR))).sum()*100/data.shape[0]
    return outliers 

In [None]:
df = signal4.drop(['Fail'],1).copy()
outliers = IQR_outliers(df)

trace1 = go.Scatter(x=outliers.index, y=outliers,mode='lines',
                    name='Outliers Before transformation')
data = [trace1]
layout = go.Layout(xaxis = dict(title= 'Features'),
                   yaxis = dict(title= 'Percentage of IQR outliers'),
                   title='Percentage of IQR outliers in all the features (data= signal4)')
fig = go.Figure(data=data, layout=layout)
iplot(fig)

There are too may IQR outliers to remove. In case of removal, there is a possibility that the nature of data might change, gievn the small size of 'Fail' data. Thus it might be better to use a different strategy.

Let's check their box plots as well to get a better understanding

In [None]:
skw = signal4.skew().sort_values(ascending=False)
skewed_features = skw[(skw>1) | (skw<-1)].index.tolist()
try: skewed_features.remove('Fail')  # remove target column from the list, if it is present
except: pass

df = signal4[skewed_features].copy()
df = df-df.mean()
df_melt = pd.concat([df,signal4['Fail']],1).melt(id_vars=['Fail'], value_vars=df.columns, var_name='signal_name', 
                                                 value_name='signal_value', ignore_index=False)

fig = px.box(data_frame=df_melt, x='signal_value', color='Fail', animation_frame = 'signal_name',
      title = 'Box plot of all features with skewness greater than +1 or less than -1 (data: signal4)')
fig.update_layout(autosize=True)
fig.show()

Thus, some features have extreme outliers, some have distribution closer to uniform distribution than normal distribution, some have clusters.

In [None]:
# Let's plot data with all centered columns
df = signal4.drop(['Fail'],1).copy()
df = df-df.mean()
signal4_melt = pd.melt(pd.concat([df,signal4['Fail']],1), 
                        id_vars=['Fail'], 
                        value_vars=df.columns, 
                        var_name='signal_name', 
                        value_name='signal_value', 
                        ignore_index=False)
fig = px.line(signal4_melt,
              x=signal4_melt.index, 
              y='signal_value', 
              color='Fail',
              labels={'y':'signal_value'},
              animation_frame='signal_name',
             title='Visualisation of all the signals, with all features centered at 0 (data=signal4)')
limit = 2000
fig.update_layout(yaxis_range=[-1*limit,limit])
fig.show()

In these plots, we can clearly visualise the outliers and difference in values amongst signals.  
(Use 'Autoscale', in case signal values are too small to visualise

Rather than dropping or manipulating outliers from the data, it might be better to use transformation (eg: quantile transformation) to increase normality in the data, and thereby reducing the number of IQR outliers and skewness without any data loss

In [None]:
df = signal4.drop(['Fail'],1).copy()
quantile_transformer = QuantileTransformer(output_distribution='normal', random_state=RS)
df1 = pd.DataFrame(quantile_transformer.fit_transform(df),columns=df.columns)
outliers = IQR_outliers(df)
outliers1=IQR_outliers(df1)

df = df.skew()
df1 = df1.skew()

fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df,mode='lines',name='Skewness before transformation'))
fig.add_trace(go.Scatter(x=df1.index, y=df1,mode='lines',name='Skewness after transformation'))
fig.layout = dict(title='skewness for each feature (data:after 4th modification)',
              xaxis= dict(title= 'features'),yaxis= dict(title= 'skewness'))
iplot(fig)

fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=outliers.index, y=outliers,mode='lines',name='Outliers before transformation'))
fig1.add_trace(go.Scatter(x=outliers1.index, y=outliers1,mode='lines',name='Outliers after transformation'))
fig1.layout = dict(title='Percentage of IQR outliers for each feature (data:after 4th modification)',
              xaxis= dict(title= 'features'), yaxis= dict(title= 'percentage of IQR outliers'))
iplot(fig1)

Skewness has reduced greatly (even though there are still some features with skewness less than -1) and number of features with IQR outliers has also reduced greatly after the transformation. Thus, the overall impact of outliers on classifier accuracy will also reduce and without manipulating or removing outliers

## 1.5 Data Manipulation: 5: imputation (with 0) on main data

In [None]:
# Here, imputation is done with replacing NaN with zeros, since missing data can be considered as zero output from the machine.
# Other than this, KNNImputation can also be used, which can give good results
signal5 = signal4.replace(np.NaN, 0)
signal_X = signal5.drop('Fail',1)
Y = signal['Fail']

print('Actions performed: \n1. Imputation on main data (signal)\n2. X(signal_X) and Y(target) separated)')
print(f'signal dataframe: {signal5.shape[0]} rows and {signal5.shape[1]} columns\n')
print(f'signal_X dataframe {signal_X.shape[0]} rows and {signal_X.shape[1]} columns\n')

In [None]:
Y.value_counts()

In [None]:
fig = px.pie(Y.value_counts().reset_index(), values='Fail', names='index',title='Pie plot for target column')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

Thus, the target column is highly imbalanced.  
Therefore, it might be better to use 'f1_score' as metric and imblearn and class_balance methods for classification

## Model Building

In [None]:
def display_sbs(*args):
# Objective: To display dataframes side by side, for clearer and concise presentation
# Application: Simply pass two dataframes as arguments. * Works only for dataframes
    from IPython.display import display_html
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)
    return

In [None]:
# find number of features required to capture a given variance (defualt: 95)
def find_pca(data,var=95, verbosity=0):
    var/=100
    for i in range(1,data.shape[1]+1):
        pca = PCA(n_components=i, random_state=RS, whiten=True)
        pca_data = pca.fit_transform(data)
        #print(pca.explained_variance_ratio_)
        if np.cumsum(pca.explained_variance_ratio_)[-1] >=var:
            if verbosity == 1:
                evr = np.cumsum(pca.explained_variance_ratio_)
                #print("Overall variances captured: ",evr)
                #print('variances: ', pca.explained_variance_ratio_)
                fig = px.area(
                                x=range(1, evr.shape[0] + 1),
                                y=evr,
                                labels={"x": "# Components", "y": "Explained Variance"}
                            )
                fig.show()
            else:
                print("Overall variances captured: ",np.cumsum(pca.explained_variance_ratio_)[-1])
            break
    return i, pca

In [None]:
# To save scores of different models in a proper format
cv_scores=pd.DataFrame(index=['mean','std'])
scores = pd.DataFrame(index=['train','test','CV'])
def save_scores(name,cv,test,train):
    global cv_scores
    global scores
    cv_scores.loc['mean',name] = cv[0]
    cv_scores.loc['std',name] = cv[1]
    scores.loc['train',name] = train
    scores.loc['test',name] = test
    scores.loc['CV',name] = cv[0]
    return

In [None]:
# Find number of features required for capturing 95% variance
p95, _= find_pca(scale(signal_X),verbosity=1)
print('Features required: ',p95, '\ni.e. Percentage of features: ',round(p95*100/signal_X.shape[1],2),'%')

In [None]:
# Find number of features required for capturing 99% variance
p99, _= find_pca(scale(signal_X),99,verbosity=1)
print('Features required: ',p99, '\ni.e. Percentage of features: ',round(p99*100/signal_X.shape[1],2),'%')

i.e. we are able to capture 95% of the variance with 72.5% (103) features  
and 99% of the variance with 88%(125) features

In [None]:
# train-test split
X_train, X_test, y_train, y_test = train_test_split(signal_X, Y, test_size=0.25, stratify=Y, random_state=RS)

In [None]:
Pca = PCA(n_components=103, random_state=RS, whiten=True) # to capture most important features with 95% variance
Smot = SMOTE(random_state=RS) # to handle imbalanced classes
Trans = QuantileTransformer(output_distribution='normal', random_state=RS) # Transformation to reduce outliers
MinMax = MinMaxScaler() # Scaling for pca or classifier
Scaler = RobustScaler() # Scaling for pca or classifier

In [None]:
#Objective: To show the standard scores required and also save them in a dataframe
def give_scores(name,model,X_train, X_test, y_train, y_test):
    cvs = cross_val_score(model,pd.concat([X_train,X_test]).sort_index(),
                          pd.concat([y_train,y_test]).sort_index(),scoring='f1',cv=5)
    cvs = cross_val_score(model,X_train,y_train,scoring='f1',cv=5)
    print('CV score: ', cvs.mean().round(4))
    print('\nTrain Accuracy scores: ',round(accuracy_score(y_train, model.predict(X_train)),4))
    print('\nTest Accuracy scores: ',round(accuracy_score(y_test, model.predict(X_test)),4))
    print('\nClassification reports of train and test set, respectively '+name)
    train_report = pd.DataFrame(classification_report(y_train, model.predict(X_train),output_dict=True)).T.round(3)
    test_report = pd.DataFrame(classification_report(y_test, model.predict(X_test),output_dict=True)).T.round(3)
    display_sbs(train_report,test_report)
    
    plot_confusion_matrix(model, X_test, y_test,cmap=plt.cm.Blues)
                                 #display_labels=class_names,
                                 
    
    save_scores(name,
                [cvs.mean().round(4), cvs.std().round(4)],
                test_report.loc['1','f1-score'],
               train_report.loc['1','f1-score'])
    return

In [None]:
%%time
# 1.1. SVM Classifier With PCA
svc = SVC(C = 40,gamma = 0.0001, kernel='rbf',random_state=RS)
SVM_pipe1 = Pipeline([('trans',Trans),('scaler',Scaler),('pca',Pca),('smt', Smot), ('svc', svc)])
SVM_pipe1.fit(X_train,y_train)

give_scores('svc-pca',SVM_pipe1,X_train, X_test, y_train, y_test) # show the scores and save them for plotting

In [None]:
%%time
# 1.2. SVM Classifier Without PCA
svc = SVC(C = 40,gamma = 0.0061, kernel='rbf',random_state=RS)
SVM_pipe2 = Pipeline([('trans',Trans),('minmax',MinMax),('smt', Smot), ('svc', svc)])
SVM_pipe2.fit(X_train,y_train)

give_scores('svc',SVM_pipe2,X_train, X_test, y_train, y_test) # show the scores and save them for plotting

In [None]:
%%time
##### 2.1. xgboost Classifier With PCA
xgb_model = XGBClassifier(min_child_weight=2, max_depth=10,learning_rate=0.03, gamma=3,
                    early_stopping_rounds=20, eval_metric = 'auc', verbosity = 0, random_state=RS,nthreads=-1)
##### since xgboost is robust to outliers, transformation is not required
xgb_pipe = Pipeline([('scaler',Scaler),('pca',Pca),('smt', Smot),('xgb', xgb_model)])
xgb_pipe.fit(X_train,y_train)

give_scores('xgb-pca',xgb_pipe,X_train, X_test, y_train, y_test) # show the scores and save them for plotting

In [None]:
%%time
##### 2.2. xgboost Classifier Without pca
xgb = XGBClassifier(min_child_weight=2, max_depth=6,learning_rate=0.05, gamma=15,
                    early_stopping_rounds=20, eval_metric = 'auc', verbosity = 0, random_state=RS,nthreads=-1)
xgb_pipe2 = Pipeline([('smt', Smot),('xgb', xgb)])
xgb_pipe2.fit(X_train,y_train)

give_scores('xgb',xgb_pipe2,X_train, X_test, y_train, y_test) # show the scores and save them for plotting

In [None]:
%%time
# 3.1 Logistic regression with pca
# with class_weight=balanced, smote doesn't have much impact, thus removed
LR_cv = LogisticRegressionCV( scoring = 'f1', random_state=RS, class_weight='balanced',
                              verbose=0, n_jobs=-1, max_iter=10000)
LR_model1 = Pipeline([('trans',Trans),('scaler',Scaler),('pca',Pca),('LR', LR_cv)])
LR_model1.fit(X_train,y_train)

give_scores('LR-pca',LR_model1,X_train, X_test, y_train, y_test) # show the scores and save them for plotting

In [None]:
%%time
# 3.2 Logistic regression without pca and scaling
LR_cv = LogisticRegressionCV( scoring = 'f1', random_state=RS, class_weight='balanced',
                              verbose=0, n_jobs=-1, max_iter=10000)
LR_model2 = Pipeline([('trans',Trans),('LR', LR_cv)])
LR_model2.fit(X_train,y_train)

give_scores('LR',LR_model2,X_train, X_test, y_train, y_test) # show the scores and save them for plotting

Overall performace  
1. **Best f1 score (test, Fail=1):** xgb without pca: 0.34
1. **Best cv score:** Logistic regression without PCA: 0.2021
1. **Best precision (test set, Fail=1)**: xb with pca:  0.385
1. **Best Recall (test set, Fail=1)**: Logistic regression with pca;  0.692

In [None]:
# Objective: To plot the scores from different models saved in the 'scores' dataframe
def plot_scores(df=None):
    global scores
    if df==None: df = scores
    fig, ax = plt.subplots(figsize=(10,15))
    colors = ['orange','green', 'blue', 'red', 'yellow']
    rectangles=[]
    N =len(df.columns)
    ind = np.arange(N)
    xlabels = df.columns
    width = 0.2       # the width of the bars
    ax.set_yticks(ind + width)
    ax.set_yticklabels(xlabels,fontsize=10)
    ax.set_ylabel("Models", fontsize=12)
    ax.set_xlabel("scores", fontsize=12)
    ax.set_title('scores with different Models')
    def labelvalues(rects):
        for rect in rects:
            height = rect.get_width()*100
            ax.text(height/100, rect.get_y() + rect.get_height()/2., '{0:1.2f}'.format(height),va='center', ha='left')
    for i in range(df.shape[0]):
        rectangles.append(ax.barh(ind+width*i, df.iloc[i,:], width, color=colors[i]))
        labelvalues(rectangles[i])
    rect_leg = [item[0] for item in rectangles]
    rect_leg.reverse()
    scor = df.index.tolist()
    scor.reverse()
    ax.legend((rect_leg),(scor),bbox_to_anchor=(1.13, 1.01))
    plt.show()
    
    global cv_scores
    fig = go.Figure(data=go.Scatter(
        x=cv_scores.columns.tolist(),y=cv_scores.loc['mean'],
        error_y=dict(type='data', array=cv_scores.loc['std'], visible=True)))
    fig.update_layout(title='CV scores with stadard deviation for different models',
                      yaxis_zeroline=False, xaxis_zeroline=False)
    fig.show()
    return

In [None]:
plot_scores()

Thus, other than xgb-pca, all other algorithms have almost similar mean cv score.

## END