# Mice Protein Expression Classification

The dataset contains expression levels of 77 proteins measured in the cerebral cortex of 8 classes of control and Down syndrome mice exposed to context fear conditioning, a task used to assess associative learning.

We will explore the data and train models for classification.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('../input/mice-protein-expression/Data_Cortex_Nuclear.csv')
df.head(2)

In [None]:
#count and visualize missing values
print('total number of missing values:',df.isnull().sum().sum())
sns.heatmap(df.isnull());

From the above heatmap we see there are too many missing values for us to simply delete the rows that contain them. We do not want to remove the columns that contain numerous missing values, because there are five such columns, and they may contain important information.

There are two options. One is to perform correlation analysis and see if the columns with many (>100) missing values correlate highly with columns that are complete, in which case we can delete them. Second, we can impute the missing values, which is what we will do here, but before we do that let's do some exploratory analysis.

## Exploratory Data Analysis

In [None]:
df.describe()

In [None]:
#correlation heatmap
plt.figure(figsize=(10,6));
sns.heatmap(df.corr());

In [None]:
#clustermap
sns.clustermap(df.corr(),cmap='vlag');

Categorical Features

In [None]:
# function to separate categorical from numeric features
def feature_types(df):
    categ = []
    numer = []
    for c in df.columns:
        if df[c].dtype == 'object': categ.append(c)
        else: numer.append(c)
    return categ, numer

In [None]:
#display information on categorical features
categ, numer = feature_types(df)
print('categorical features:',categ)
print('')

for f in categ[1:-1]:
    print('value counts of feature: {}'.format(f))
    #print('')
    print(df[f].value_counts())
    print('        ---------------')
    print('')

In [None]:
#information on classes
print('number of classes: ',df['class'].nunique())
print('class names: ', df['class'].unique())

plt.figure(figsize=(12,4));
plt.subplot(1,2,1);
plt.title('class_count');
sns.countplot(x='class', data=df, color='Grey');
plt.subplot(1,2,2);
plt.title('class_proportions');
df['class'].value_counts().plot(kind='pie',autopct='%1.1f%%');

From the countplots below, we see that the classes are derived from the values of the categorical features. For example, class 'c-CS-m' means that the Genotype feature has value 'control', the Behavior feature has value 'C/S', and the Treatment feature has value 'memantine'. Therefore these features are encoded into the classes and we will not use them for model training.

Here are the countplots

In [None]:
#visualize classes for categorical features
plt.figure(figsize=(7,3))
sns.countplot(x='class', data=df, hue='Genotype',palette=['firebrick','tomato']);
plt.legend(loc=(1.01,0.8));
plt.title('Classes for feature: Genotype');
plt.tight_layout(True)

plt.figure(figsize=(7,3))
sns.countplot(x='class', data=df, hue='Treatment',palette=['firebrick','tomato']);
plt.legend(loc=(1.01,0.8));
plt.title('Classes for feature: Treatment');
plt.tight_layout(True)

plt.figure(figsize=(7,3))
sns.countplot(x='class', data=df, hue='Behavior',palette=['firebrick','tomato']);
plt.legend(loc=(1.01,0.8));
plt.title('Classes for feature: Behavior');
plt.tight_layout(True)

Before we impute the data and train models for classification, let's visualize the non-null entries to see what they look like.

First, we will min-max scale them so they assume values for 0 to 1, then display the dataset as an image. Then, we will plot a PCA scatter. Finally, we will select the most important features and visualize their distributions.

In [None]:
# temporarily remove null-values and min-max scale data
from sklearn.preprocessing import MinMaxScaler

nonna=df.dropna(axis=1,thresh=901)
nonna=nonna.dropna(axis=0,how='any')
categ,numeri=feature_types(nonna)
scaler = MinMaxScaler()
scaled = scaler.fit_transform(nonna[numeri].values)

In [None]:
#display dataset as image
plt.figure(figsize=(20,10));
sns.heatmap(nonna[numeri].values);

#### PCA visualization

In [None]:
from sklearn.decomposition import PCA

pca = PCA()
dec = pca.fit_transform(scaled)

In [None]:
#2d scatterplot
sns.scatterplot(x=dec[:,0],y=dec[:,1],hue=nonna['class']);
plt.title('PCA__ 2 components')
plt.legend(loc=(1.01,0.35));

#3d scatterplot
from mpl_toolkits import mplot3d
from sklearn.preprocessing import OrdinalEncoder
ordinal = OrdinalEncoder()
labels = ordinal.fit_transform(nonna['class'].values[:,np.newaxis]).astype('int').squeeze()
plt.figure(figsize=(10,6));
ax = plt.axes(projection='3d');
ax.scatter(dec[:,0], dec[:,1], dec[:,2], c=labels, cmap='winter');
plt.title('PCA__ 3 components');

#explained variance
plt.figure(figsize=(15,3));
sns.barplot(x=np.arange(pca.explained_variance_ratio_.shape[0])+1, 
            y=pca.explained_variance_ratio_,color='Grey');
plt.title('Explained Variance');

#cumulative explained variance
plt.figure(figsize=(5,3));
sns.lineplot(x=np.arange(pca.explained_variance_ratio_.shape[0])+1, 
             y=pca.explained_variance_ratio_.cumsum());
plt.title('Cumulative Explained Variance');

TSNE

In [None]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=3, random_state=33)
ts = tsne.fit_transform(scaled)

sns.scatterplot(ts[:,0], ts[:,1],hue=labels);
plt.figure();
ax = plt.axes(projection='3d');
ax.scatter(ts[:,0], ts[:,1], ts[:,2], c=labels, cmap='winter');

Other Dimensionality Reduction Methods

In [None]:
from sklearn.manifold import Isomap,LocallyLinearEmbedding,MDS,SpectralEmbedding

iso = Isomap().fit_transform(scaled)
lle = LocallyLinearEmbedding().fit_transform(scaled)
mds = MDS().fit_transform(scaled)
se = SpectralEmbedding().fit_transform(scaled)

plt.figure(figsize=(14,10))
plt.subplot(2,2,1)
sns.scatterplot(x=iso[:,0],y=iso[:,1],hue=nonna['class']);
plt.legend(loc='best');
plt.title('Isomap');

plt.subplot(2,2,2)
sns.scatterplot(x=lle[:,0],y=lle[:,1],hue=nonna['class']);
plt.legend(loc='best');
plt.title('Locally Linear Embedding');

plt.subplot(2,2,3)
sns.scatterplot(x=mds[:,0],y=mds[:,1],hue=nonna['class']);
plt.legend(loc='best');
plt.title('MDS');

plt.subplot(2,2,4)
sns.scatterplot(x=se[:,0],y=se[:,1],hue=nonna['class']);
plt.legend(loc='best');
plt.title('Spectral Embedding');

#### Feature Selection and Exploration

In [None]:
#identify most important features via Random Forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OrdinalEncoder

ordinal = OrdinalEncoder()
labels = ordinal.fit_transform(nonna['class'].values[:,np.newaxis]).astype('int').squeeze()

forest = RandomForestClassifier(n_estimators=1000, max_depth=8, random_state=33)
forest.fit(scaled, labels)

In [None]:
feats = np.argsort(forest.feature_importances_)[-10:]
plt.figure(figsize=(6,4));
sns.barplot(y=nonna[numeri].columns[feats][::-1], 
            x=forest.feature_importances_[feats][::-1], 
            color='Grey');
plt.title('Significance of Most Important Features');

The features are too many to plot all of them but, for demonstration purposes, we will pick the five most important features and visualize their distributions through pairgrids, violin plots, kde plots, and histograms.

In [None]:
feats = ['SOD1_N', 'pPKCG_N', 'pERK_N', 'APP_N', 'CaNA_N']

#pairplot
sns.pairplot(nonna, vars=feats, hue='class');

In [None]:
#boxenplots
for f in feats[::-1]:
    plt.figure(figsize=(6,4))
    sns.boxenplot(y=f, x='class', data=nonna);
    plt.title('{}'.format(f));

In [None]:
#kde plots of most important features
colors = ['b', 'y', 'g', 'r', 'm', 'darkgoldenrod', 'tab:pink', 'grey']
classes = nonna['class'].unique()

for f in feats:
    plt.figure(figsize=(12,4))
    for color, clas in zip(colors,classes):
        sns.distplot(nonna[nonna['class']==clas][f], hist=False, color=color, label=clas);
    plt.legend(loc='best');
    plt.title(f);
    plt.tight_layout();

In [None]:
#distribution histograms of most important features
colors = ['b', 'y', 'g', 'r', 'm', 'darkgoldenrod', 'tab:pink', 'grey']
classes = nonna['class'].unique()

for f in feats:
    plt.figure(figsize=(12,4))
    for color, clas in zip(colors,classes):
        sns.distplot(nonna[nonna['class']==clas][f], kde=False, color=color, label=clas);
    plt.legend(loc='best');
    plt.title(f);
    plt.tight_layout();

We will try one more Feature Selection algorithm, namely chi-square test, and compare the results with Random Forrest

In [None]:
#compute chi2-selected features
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.preprocessing import OrdinalEncoder

OE = OrdinalEncoder()
labs = OE.fit_transform(nonna['class'].values[:,np.newaxis]).squeeze().astype('int')
s_feats = SelectKBest(chi2, k=10).fit(scaled,labs)

In [None]:
#create comparative dataframe
FS = pd.DataFrame([])
FS['Random Forest'] = pd.Series(df[numer].columns[np.argsort(forest.feature_importances_)[-10:]][::-1])
FS['chi2'] = pd.Series(df[numer].columns[np.argsort(s_feats.scores_)[-10:]][::-1])
FS

These are the features both algorithms have in common in their selected ten most important features (they are seven):

In [None]:
FS[FS['Random Forest'].isin(FS['chi2'])]['Random Forest']

## Classification

#### Preprocessing

In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder,MinMaxScaler
from sklearn.model_selection import train_test_split

x = df.loc[:,numer].values
y = df['class']

imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(df.loc[:,numer].dropna(how='any',inplace=False).values)
x = imp.transform(x)

minmax = MinMaxScaler()
x = minmax.fit_transform(x)

ordi = OrdinalEncoder()
y = ordi.fit_transform(y[:,np.newaxis]).squeeze().astype('int')
cat = ordi.categories_[0]

x_tr, x_ts, y_tr, y_ts = train_test_split(x,y,test_size=0.1, random_state=33)

x_tr.shape,y_tr.shape,x_ts.shape,y_ts.shape

#### Machine Learning

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

results = {} #dictionary to store accuracy and f1-score of models

In [None]:
knn = KNeighborsClassifier()
knn.fit(x_tr,y_tr)
pred_knn = knn.predict(x_ts)

acc_knn = accuracy_score(y_ts, pred_knn)
f1_knn = f1_score(y_ts, pred_knn, average='macro')
results['K-Nearest Neighbors'] = [acc_knn, f1_knn]

print('accuracy:  ', acc_knn)
print('f1_score:  ', f1_knn)
print('')

sns.heatmap(confusion_matrix(y_ts,pred_knn),
            xticklabels=cat, yticklabels=cat,
            annot=True, fmt='1d', cbar=False);
plt.title('Confusion Matrix');

In [None]:
lr = LogisticRegression()
lr.fit(x_tr,y_tr)
pred_lr = lr.predict(x_ts)

acc_lr = accuracy_score(y_ts, pred_lr)
f1_lr = f1_score(y_ts, pred_lr, average='macro')
results['Logistic Regression'] = [acc_lr, f1_lr]

print('accuracy:  ', acc_lr)
print('f1_score:  ', f1_lr)
print('')

sns.heatmap(confusion_matrix(y_ts,pred_lr),
            xticklabels=cat, yticklabels=cat,
            annot=True, fmt='1d', cbar=False);
plt.title('Confusion Matrix');

In [None]:
rf = RandomForestClassifier(n_estimators=1000,max_depth=8,random_state=33)
rf.fit(x_tr,y_tr)
pred_rf = rf.predict(x_ts)

acc_rf = accuracy_score(y_ts, pred_rf)
f1_rf = f1_score(y_ts, pred_rf, average='macro')
results['Random Forest'] = [acc_rf, f1_rf]

print('accuracy:  ', acc_rf)
print('f1_score:  ', f1_rf)
print('')

sns.heatmap(confusion_matrix(y_ts,pred_rf),
            xticklabels=cat, yticklabels=cat,
            annot=True, fmt='1d', cbar=False);
plt.title('Confusion Matrix');

In [None]:
gb = GradientBoostingClassifier(n_estimators=1000,max_depth=5,random_state=33)
gb.fit(x_tr,y_tr)
pred_gb = gb.predict(x_ts)

acc_gb = accuracy_score(y_ts, pred_gb)
f1_gb = f1_score(y_ts, pred_gb, average='macro')
results['Gradient Boosting Tree'] = [acc_gb, f1_gb]

print('accuracy:  ', acc_gb)
print('f1_score:  ', f1_gb)
print('')

sns.heatmap(confusion_matrix(y_ts,pred_gb),
            xticklabels=cat, yticklabels=cat,
            annot=True, fmt='1d', cbar=False);
plt.title('Confusion Matrix');

In [None]:
tree = DecisionTreeClassifier()
ada = AdaBoostClassifier(base_estimator=tree,n_estimators=1000,random_state=33)
ada.fit(x_tr,y_tr)
pred_ada = ada.predict(x_ts)

acc_ada = accuracy_score(y_ts, pred_ada)
f1_ada = f1_score(y_ts, pred_ada, average='macro')
results['AdaBoost Tree'] = [acc_ada, f1_ada]

print('accuracy:  ', acc_ada)
print('f1_score:  ', f1_ada)
print('')

sns.heatmap(confusion_matrix(y_ts,pred_ada),
            xticklabels=cat, yticklabels=cat,
            annot=True, fmt='1d', cbar=False);
plt.title('Confusion Matrix');

#### Deep Learning

In [None]:
from keras.models import Sequential
from keras.layers import Dense,Conv2D,MaxPooling2D,Flatten
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam, RMSprop
from keras.losses import mean_squared_error as mse

DNN

In [None]:
dnn = Sequential()
dnn.add(Dense(110, input_shape=[x_tr.shape[1]], activation='relu'))
dnn.add(Dense(220,activation='relu'))
dnn.add(Dense(8,activation='sigmoid'))
dnn.compile(optimizer='adam',loss='sparse_categorical_crossentropy',
            metrics=['acc'])
dnn.summary()

In [None]:
stop = EarlyStopping(patience=4,verbose=1)
dnn.fit(x_tr,y_tr,validation_split=0.1,callbacks=[stop],verbose=0,epochs=30)

In [None]:
pred_dnn = np.argmax(dnn.predict(x_ts),axis=1)

acc_dnn = accuracy_score(y_ts, pred_dnn)
f1_dnn = f1_score(y_ts, pred_dnn, average='macro')
results['DNN'] = [acc_dnn, f1_dnn]

print('accuracy:  ', acc_dnn)
print('f1_score:  ', f1_dnn)
print('')

sns.heatmap(confusion_matrix(y_ts,pred_dnn),
            xticklabels=cat, yticklabels=cat,
            annot=True, fmt='1d', cbar=False);
plt.title('Confusion Matrix');

CNN2d

In [None]:
#convert training data to images for CNN training
ims_tr = np.zeros((x_tr.shape[0],81))
ims_ts = np.zeros((x_ts.shape[0],81))

for i, x in enumerate(x_tr): ims_tr[i,:77] = x
for i, x in enumerate(x_ts): ims_ts[i,:77] = x

ims_tr = np.reshape(ims_tr,(x_tr.shape[0],9,9))
ims_ts = np.reshape(ims_ts,(x_ts.shape[0],9,9))

plt.matshow(ims_tr[1]);
plt.tight_layout(True);

In [None]:
cnn = Sequential()

cnn.add(Conv2D(64,(3,3),input_shape=[9,9,1],activation='relu',padding='same'))
cnn.add(MaxPooling2D((2,2),padding='same'))
cnn.add(Conv2D(128,(3,3),activation='relu',padding='same'))
cnn.add(MaxPooling2D((2,2),padding='same'))
cnn.add(Flatten())
cnn.add(Dense(100,activation='relu'))
cnn.add(Dense(8,activation='softmax'))

cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
cnn.summary()

In [None]:
stop = EarlyStopping(patience=4,verbose=1)
cnn.fit(ims_tr[:,:,:,np.newaxis],y_tr,validation_split=0.1,
        epochs=20,verbose=1,callbacks=[stop])

In [None]:
pred_cnn = np.argmax(cnn.predict(ims_ts[:,:,:,np.newaxis]),axis=1)

acc_cnn = accuracy_score(y_ts, pred_cnn)
f1_cnn = f1_score(y_ts, pred_cnn, average='macro')
results['CNN'] = [acc_cnn, f1_cnn]

print('accuracy:  ', acc_cnn)
print('f1_score:  ', f1_cnn)
print('')

sns.heatmap(confusion_matrix(y_ts,pred_cnn),
            xticklabels=cat, yticklabels=cat,
            annot=True, fmt='1d', cbar=False);
plt.title('Confusion Matrix');

### Classification on Extracted Features

#### PCA-to-DNN

In [None]:
from sklearn.decomposition import PCA

pca = PCA()
dec_tr = pca.fit_transform(x_tr)
dec_ts = pca.transform(x_ts)


dnn = Sequential()
dnn.add(Dense(110, input_shape=[x_tr.shape[1]], activation='relu'))
dnn.add(Dense(220,activation='relu'))
dnn.add(Dense(8,activation='sigmoid'))
dnn.compile(optimizer='adam',loss='sparse_categorical_crossentropy',
            metrics=['acc'])

stop = EarlyStopping(patience=4,verbose=1)

dnn.fit(dec_tr,y_tr,validation_split=0.1,callbacks=[stop],verbose=0,epochs=30)

In [None]:
pred_dnn = np.argmax(dnn.predict(dec_ts),axis=1)

acc_dnn = accuracy_score(y_ts, pred_dnn)
f1_dnn = f1_score(y_ts, pred_dnn, average='macro')
results['PCA->DNN'] = [acc_dnn, f1_dnn]

print('accuracy:  ', acc_dnn)
print('f1_score:  ', f1_dnn)
print('')

sns.heatmap(confusion_matrix(y_ts,pred_dnn),
            xticklabels=cat, yticklabels=cat,
            annot=True, fmt='1d', cbar=False);
plt.title('Confusion Matrix');

#### AutoEncoder-to-DNN

In [None]:
dim = x_tr.shape[1]

ae = Sequential()
ae.add(Dense(dim,activation='elu',kernel_initializer='he_uniform',input_shape=[dim]))
ae.add(Dense(int(dim//1.5),activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//2,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//3,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//4,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//5,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//6,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//7,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//8,activation='linear',kernel_initializer='he_uniform', name='encoder'))
ae.add(Dense(dim//7,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//6,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//5,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//4,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//3,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim//2,activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(int(dim//1.5),activation='elu',kernel_initializer='he_uniform'))
ae.add(Dense(dim,activation='sigmoid'))
ae.compile(optimizer=RMSprop(learning_rate=0.001),loss='mse')

ae.fit(x_tr,x_tr,epochs=30,verbose=1)

In [None]:
#extract features
from keras import Model
encoder = Model(ae.input, ae.get_layer('encoder').output)

x_ae_tr = encoder.predict(x_tr)
x_ae_ts = encoder.predict(x_ts)

In [None]:
#DNN
#train model with AE-extracted features
dnn = Sequential()
dnn.add(Dense(15, input_shape=[x_ae_tr.shape[1]], activation='relu'))
dnn.add(Dense(30,activation='relu'))
dnn.add(Dense(8,activation='sigmoid'))
dnn.compile(optimizer='adam',loss='sparse_categorical_crossentropy',
            metrics=['acc'])

stop = EarlyStopping(patience=4,verbose=1)

dnn.fit(x_ae_tr,y_tr,validation_split=0.1,callbacks=[stop],verbose=0,epochs=30)

In [None]:
pred_dnn = np.argmax(dnn.predict(x_ae_ts),axis=1)

acc_dnn = accuracy_score(y_ts, pred_dnn)
f1_dnn = f1_score(y_ts, pred_dnn, average='macro')
results['AE->DNN'] = [acc_dnn, f1_dnn]

print('accuracy:  ', acc_dnn)
print('f1_score:  ', f1_dnn)
print('')

sns.heatmap(confusion_matrix(y_ts,pred_dnn),
            xticklabels=cat, yticklabels=cat,
            annot=True, fmt='1d', cbar=False);
plt.title('Confusion Matrix');

### Pre-trained CNN

In [None]:
#convert training data to images for CNN training
x_train = np.zeros((x_tr.shape[0],81))
x_test = np.zeros((x_ts.shape[0],81))

for i, x in enumerate(x_tr): x_train[i,:77] = x
for i, x in enumerate(x_ts): x_test[i,:77] = x

x_tr = np.reshape(x_train,(x_train.shape[0],9,9))
x_ts = np.reshape(x_test,(x_test.shape[0],9,9))

plt.matshow(x_tr[1]);
plt.tight_layout(True);

In [None]:
#magnify images
x_tr = np.repeat(x_tr,10,axis=1)
x_tr = np.repeat(x_tr,10,axis=2)

x_ts = np.repeat(x_ts,10,axis=1)
x_ts = np.repeat(x_ts,10,axis=2)

print(x_tr.shape, x_ts.shape)
plt.matshow(x_tr[1]);

In [None]:
#create RGB dimension, three channels
x_tr = np.stack([x_tr,x_tr,x_tr],axis=-1)
x_ts = np.stack([x_ts,x_ts,x_ts],axis=-1)
x_tr.shape, x_ts.shape

In [None]:
import keras
from keras import backend as K
from keras import models
from keras.models import Model, load_model
from keras import layers
from keras.layers import Dense,Conv2D,MaxPooling2D, Flatten, Input
from keras.layers.core import Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from tensorflow.keras.applications.xception import Xception


#weight and notops from pretrained models
!mkdir ~/.keras
!mkdir ~/.keras/models
!cp ../input/keras-pretrained-models/*notop* ~/.keras/models/
!cp ../input/keras-pretrained-models/imagenet_class_index.json ~/.keras/models/


from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

# create the base pre-trained model
base_model = Xception(weights='imagenet', include_top=False, input_shape=(90,90,3))

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False
base_model_output = base_model.output
new_concatenated_model = Flatten()(base_model_output)
new_concatenated_model = (Dense(8, activation='softmax'))(new_concatenated_model)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=new_concatenated_model)


# compile the model (should be done *after* setting layers to non-trainable)
stop= EarlyStopping(patience=4, verbose=1)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
model.fit(x_tr,y_tr,epochs=10,batch_size=20,callbacks=[stop],validation_split=0.1)

pred = model.predict(x_ts)
print('-----')
pred = np.argmax(pred, axis=1)
print('accuracy:',accuracy_score(y_ts,pred))
print('f1-score:',f1_score(y_ts,pred,average='macro'))
conf=confusion_matrix(y_ts,pred)
sns.heatmap(conf, annot=True, fmt='1d');

In [None]:
pred_keras = np.argmax(model.predict(x_ts),axis=1)

acc_keras = accuracy_score(y_ts, pred_keras)
f1_keras = f1_score(y_ts, pred_keras, average='macro')
results['Pretrained Xception'] = [acc_keras, f1_keras]

print('accuracy:  ', acc_keras)
print('f1_score:  ', f1_keras)
print('')

sns.heatmap(confusion_matrix(y_ts,pred_keras),
            xticklabels=cat, yticklabels=cat,
            annot=True, fmt='1d', cbar=False);
plt.title('Confusion Matrix');

## Overall Results

In [None]:
#display performance of models in a dataframe 
pd.DataFrame(data=results.values(),index=results.keys(),columns=['accuracy','f1-score'])

In [None]:
#prepare the results-data for barplot
models = list(results.keys())
accuracies = [results[m][0] for m in models]
f1_scores = [results[m][1] for m in models]
ml_models = pd.Series(np.hstack((models,models)))
value = pd.Series(np.hstack((accuracies,f1_scores)))
metric = pd.Series(np.repeat(['accuracy','f1_score'],len(models)))
results_df = pd.DataFrame([])
results_df['model'] = ml_models
results_df['value'] = value
results_df['metric'] = metric

#generate barplot
plt.figure(figsize=(10,5));
sns.barplot(y='model', x='value', data=results_df, 
            hue='metric',palette=['firebrick','lightcoral']);
plt.legend(loc=(1.01,0.8));
plt.tight_layout(True);
plt.title('Model Performance');