**Author**: Devanshi Verma
<BR>
**Objective**: Use Evidently to track shift in data for Plant Seedling's Multi class classification model
<br>
**Topics Covered**: Target drift and Classification Performance Metrics

# 1. Installing and Loading the Data

In [None]:
#installing the libraries
!pip install evidently

Collecting evidently
  Downloading evidently-0.1.22.dev0-py3-none-any.whl (15.2 MB)
[K     |████████████████████████████████| 15.2 MB 117 kB/s 
Collecting dataclasses
  Downloading dataclasses-0.6-py3-none-any.whl (14 kB)
Installing collected packages: dataclasses, evidently
Successfully installed dataclasses-0.6 evidently-0.1.22.dev0


In [None]:
#mounting the google drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
!cd /content/drive/MyDrive/ModelMonitoringBlog

Mounted at /content/drive


In [None]:
!pip install evidently



In [None]:
#loading the libraries
import os
import pandas as pd
import sklearn
import tensorflow as tf
import numpy as np
from evidently.dashboard import Dashboard
from evidently.tabs import DataDriftTab,CatTargetDriftTab,ClassificationPerformanceTab
from evidently.profile_sections import ClassificationPerformanceProfileSection, ProbClassificationPerformanceProfileSection

In [None]:
#defining path for the data
BASE_DATA_FOLDER = '/content/drive/My Drive/ModelMonitoringBlog/Rupa_Data/'
Train_DATA_FOLDER = os.path.join(BASE_DATA_FOLDER, "train")

# 2. Tracking the Target Shift 

In [None]:
#creating list of training images
training_img_list = pd.DataFrame(columns=['filename','target'])
pathToTrainData='/content/drive/My Drive/ModelMonitoringBlog/Rupas_Files_WIP/Rupa_Data/train'


for dirname, _, filenames in os.walk(pathToTrainData):
    for filename in filenames:
        path = os.path.join(dirname, filename)
        class_label = dirname.split('/')[-1]
        training_img_list = training_img_list.append({'filename':path , 'target':class_label}, ignore_index = True)

In [None]:
#creating list of testing images
testing_img_list = pd.DataFrame(columns=['filename','target'])
pathToTestData='/content/drive/My Drive/ModelMonitoringBlog/Rupas_Files_WIP/Rupa_Data/hold_out_images'


for dirname, _, filenames in os.walk(pathToTestData):
    for filename in filenames:
        path = os.path.join(dirname, filename)
        class_label = dirname.split('/')[-1]
        testing_img_list = testing_img_list.append({'filename':path , 'target':class_label}, ignore_index = True)

In [None]:
#training data list
training_img_list = sklearn.utils.shuffle(training_img_list)
training_img_list.head(10)

Unnamed: 0,filename,target
1478,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Loose Silky-bent
655,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Cleavers
477,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Charlock
1320,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Common wheat
1735,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Loose Silky-bent
29,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Black-grass
2489,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Common Chickweed
2555,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Maize
1813,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Loose Silky-bent
3026,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Scentless Mayweed


In [None]:
#training data list
testing_img_list = sklearn.utils.shuffle(testing_img_list)
testing_img_list.head(10)

Unnamed: 0,filename,target
41,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Small-flowered Cranesbill
54,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Small-flowered Cranesbill
39,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Small-flowered Cranesbill
142,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Maize
99,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Scentless Mayweed
66,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Shepherds Purse
19,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Cleavers
33,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Fat Hen
63,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Shepherds Purse
64,/content/drive/My Drive/ModelMonitoringBlog/Ru...,Shepherds Purse


In [None]:
#verifying the dataframe shape
training_img_list.shape

(3921, 2)

In [None]:
#verifying the dataframe shape
testing_img_list.shape

(147, 2)

In [None]:
#dropping unnecessary rows
testing_img_list=testing_img_list[testing_img_list['target']!='train2']
training_img_list=training_img_list[training_img_list['target']!='train']

In [None]:
#checking unique colummns
print(training_img_list['target'].unique())
print(testing_img_list['target'].unique())

['Loose Silky-bent' 'Cleavers' 'Charlock' 'Common wheat' 'Black-grass'
 'Common Chickweed' 'Maize' 'Scentless Mayweed'
 'Small-flowered Cranesbill' 'Sugar beet' 'Fat Hen' 'Shepherds Purse']
['Black-grass' 'Charlock' 'Sugar beet' 'Cleavers' 'Common Chickweed'
 'Common wheat' 'Fat Hen' 'Small-flowered Cranesbill' 'Shepherds Purse'
 'Scentless Mayweed' 'Loose Silky-bent' 'Maize']


In [None]:
#understanding the target shift
np.seterr(divide='ignore', invalid='ignore')
seedling_report = Dashboard(tabs=[CatTargetDriftTab])
seedling_report.calculate(pd.DataFrame(training_img_list.iloc[:,1]), pd.DataFrame(testing_img_list.iloc[:,1]), column_mapping = None)
seedling_report.save("reports/seedling_report.html")

# 3. Understanding the Classification Performance

In [None]:
#defining the image size
image_size = 256

#loading the model
from keras.models import load_model


In [None]:
model= load_model("/content/drive/My Drive/ModelMonitoringBlog/Rupas_Files_WIP/Rupa_Data/CNN_Models_Aug1/plant_classifier_81340_fold_no_4.h5")

In [None]:
model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_65 (Conv2D)           (None, 254, 254, 64)      1792      
_________________________________________________________________
conv2d_66 (Conv2D)           (None, 252, 252, 64)      36928     
_________________________________________________________________
max_pooling2d_52 (MaxPooling (None, 126, 126, 64)      0         
_________________________________________________________________
batch_normalization_52 (Batc (None, 126, 126, 64)      256       
_________________________________________________________________
conv2d_67 (Conv2D)           (None, 124, 124, 64)      36928     
_________________________________________________________________
max_pooling2d_53 (MaxPooling (None, 62, 62, 64)        0         
_________________________________________________________________
batch_normalization_53 (Batc (None, 62, 62, 64)      

In [None]:
Class_names=['Black-grass',
	'Charlock',
	'Cleavers',
	'Common Chickweed',
	'Common wheat',
	'Fat Hen',
	'Loose Silky-bent',
	'Maize',
	'Scentless Mayweed',
	'Shepherds Purse',
	'Small-flowered Cranesbill',
	'Sugar beet']

In [None]:
predictions_list=[]
list_test_img=list(testing_img_list['filename'])
for i in list_test_img:
  img = tf.keras.preprocessing.image.load_img(i, target_size=(256, 256))
  img_array = tf.keras.preprocessing.image.img_to_array(img)
  img_array = tf.expand_dims(img_array, 0) #in the format it should be to perform prediction

  predictions = model.predict(img_array)
  score = tf.nn.softmax(predictions[0])
  predictions_list.append(Class_names[np.argmax(score)])

In [None]:
len(predictions_list)

147

In [None]:
target_list=[]
list_train_img=list(training_img_list['filename'])
for i in list_train_img:
  img = tf.keras.preprocessing.image.load_img(i, target_size=(256, 256))
  img_array = tf.keras.preprocessing.image.img_to_array(img)
  img_array = tf.expand_dims(img_array, 0) #in the format it should be to perform prediction

  predictions = model.predict(img_array)
  score = tf.nn.softmax(predictions[0])
  target_list.append(Class_names[np.argmax(score)])

In [None]:
len(target_list)

3921

In [None]:
target_list[0:5]

In [None]:
len(predictions_list)

0

In [None]:
reference=pd.DataFrame(columns=['target','prediction'])

production=pd.DataFrame(columns=['target','prediction'])


reference['target'] = training_img_list['target']
reference['prediction'] = target_list

production['target'] = testing_img_list['target']
production['prediction'] = predictions_list

In [None]:
data_dict = {}

data_dict['target'] = 'target'
data_dict['prediction'] = 'prediction'

In [None]:
classification_performance_report = Dashboard(tabs=[ClassificationPerformanceTab])
classification_performance_report.calculate(reference, production, column_mapping=data_dict)
classification_performance_report.save("reports/classification_performance_report.html")


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



# 4. Going through Iris's example - Shared by evidently

In [None]:
predictions= np.argmax(model.predict(test_data), axis=-1)

In [None]:
import pandas as pd

from sklearn import datasets, model_selection, neighbors

from evidently.dashboard import Dashboard
from evidently.tabs import ClassificationPerformanceTab

from evidently.model_profile import Profile
from evidently.profile_sections import ClassificationPerformanceProfileSection

In [None]:
iris = datasets.load_iris()

In [None]:
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)

In [None]:

reference, production, y_train, y_test = model_selection.train_test_split(iris_frame, 
                                                                          iris.target, 
                                                                          random_state=0)

In [None]:
model = neighbors.KNeighborsClassifier(n_neighbors=1)

In [None]:

model.fit(reference, y_train)
train_predictions = model.predict(reference)
test_predictions = model.predict(production)

In [None]:
reference['target'] = y_train
reference['prediction'] = train_predictions

production['target'] = y_test
production['prediction'] = test_predictions

In [None]:

reference.target = reference.target.apply(lambda x: iris.target_names[x])
reference.prediction = reference.prediction.apply(lambda x: iris.target_names[x])

production.target = production.target.apply(lambda x: iris.target_names[x])
production.prediction = production.prediction.apply(lambda x: iris.target_names[x])

In [None]:
reference

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,prediction
61,5.9,3.0,4.2,1.5,versicolor,versicolor
92,5.8,2.6,4.0,1.2,versicolor,versicolor
112,6.8,3.0,5.5,2.1,virginica,virginica
2,4.7,3.2,1.3,0.2,setosa,setosa
141,6.9,3.1,5.1,2.3,virginica,virginica
...,...,...,...,...,...,...
9,4.9,3.1,1.5,0.1,setosa,setosa
103,6.3,2.9,5.6,1.8,virginica,virginica
67,5.8,2.7,4.1,1.0,versicolor,versicolor
117,7.7,3.8,6.7,2.2,virginica,virginica


In [None]:
production

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,prediction
114,5.8,2.8,5.1,2.4,virginica,virginica
62,6.0,2.2,4.0,1.0,versicolor,versicolor
33,5.5,4.2,1.4,0.2,setosa,setosa
107,7.3,2.9,6.3,1.8,virginica,virginica
7,5.0,3.4,1.5,0.2,setosa,setosa
100,6.3,3.3,6.0,2.5,virginica,virginica
40,5.0,3.5,1.3,0.3,setosa,setosa
86,6.7,3.1,4.7,1.5,versicolor,versicolor
76,6.8,2.8,4.8,1.4,versicolor,versicolor
71,6.1,2.8,4.0,1.3,versicolor,versicolor


In [None]:

iris_column_mapping = {}

iris_column_mapping['target'] = 'target'
iris_column_mapping['prediction'] = 'prediction'
iris_column_mapping['numerical_features'] = iris.feature_names

In [None]:
iris_column_mapping

{'numerical_features': ['sepal length (cm)',
  'sepal width (cm)',
  'petal length (cm)',
  'petal width (cm)'],
 'prediction': 'prediction',
 'target': 'target'}

In [None]:
iris_model_performance_dashboard = Dashboard(tabs=[ClassificationPerformanceTab])
iris_model_performance_dashboard.calculate(reference, production, column_mapping = iris_column_mapping)
iris_model_performance_dashboard.show()

In [None]:
reference=pd.DataFrame(columns=['target','prediction'])

production=pd.DataFrame(columns=['target','prediction'])


reference['target'] = y_train
reference['prediction'] = train_predictions

production['target'] = y_test
production['prediction'] = test_predictions

In [None]:

iris_column_mapping = {}

iris_column_mapping['target'] = 'target'
iris_column_mapping['prediction'] = 'prediction'

In [None]:
iris_column_mapping

{'prediction': 'prediction', 'target': 'target'}

In [None]:
iris_model_performance_dashboard = Dashboard(tabs=[ClassificationPerformanceTab])
iris_model_performance_dashboard.calculate(reference, production, column_mapping = iris_column_mapping)
iris_model_performance_dashboard.show()