# Data Drift and Concept Drift Analyses Using Evidently 

### 1. Importing necessary packages 

In [None]:
# Importing all the relevant libraries
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import LabelEncoder
import evidently

### 2. Importing the dataset

In [217]:
# v1_df(Version 1) is the dataset recorded before drift.
v1_df = pd.read_parquet('/Users/harish/Desktop/Human Acivity Recognition/Data/Drift_data/v1.gzip')
# v2_df(Version 2) is the dataset recorded after drift.
v2_df = pd.read_parquet('/Users/harish/Desktop/Human Acivity Recognition/Data/Drift_data/v2.gzip')
# Print the shape of Version 1 and Version 2
print('Version 1:',v1_df.shape, '\n', 'Version 2:', v2_df.shape)

Version 1: (100000, 563) 
 Version 2: (100000, 563)


In [219]:
# Checking the Activity distribution of both versions of the dataset.
print (v1_df['Activity'].value_counts(), '\n\n', v2_df['Activity'].value_counts())

LAYING                16762
WALKING               16728
WALKING_UPSTAIRS      16675
STANDING              16645
WALKING_DOWNSTAIRS    16627
SITTING               16563
Name: Activity, dtype: int64 

 SITTING               28827
LAYING                16762
WALKING_UPSTAIRS      16675
STANDING              16645
WALKING_DOWNSTAIRS    16627
WALKING                4464
Name: Activity, dtype: int64


In [220]:
# Label encoding for both V1 and V2 dataset. 
le = LabelEncoder()
v1_df['Activity'] = le.fit_transform(v1_df['Activity'])
v2_df['Activity'] = le.fit_transform(v2_df['Activity'])

In [221]:
v1_df.rename({'Activity':'target'}, axis =1,inplace = True)
v2_df.rename({'Activity':'target'}, axis =1,inplace = True)

### Prediction

In [223]:
## Load Features and model weight
train_features = joblib.load("/Users/harish/Desktop/Human Acivity Recognition/Notebooks/model_features/K12_train_features.joblib")

model = joblib.load("/Users/harish/Desktop/Human Acivity Recognition/Notebooks/model_registry/K12-tuned-random_forest.joblib")


In [224]:
v1_test_features = v1_df[train_features]
v1_test_features.columns

Index(['tGravityAcc-energy()-X', 'tGravityAcc-mean()-X', 'tGravityAcc-max()-X',
       'tGravityAcc-min()-X', 'angle(X,gravityMean)', 'tGravityAcc-min()-Y',
       'tGravityAcc-mean()-Y', 'tGravityAcc-max()-Y', 'angle(Y,gravityMean)',
       'tBodyAcc-max()-X', 'tGravityAcc-energy()-Y', 'fBodyAcc-entropy()-X'],
      dtype='object')

In [225]:
## V1 prediction
v1_test_features = v1_df[train_features]
v1_test_features['prediction'] = model.predict(v1_test_features)
v1_test_features['target'] = v1_df['target']

In [226]:
v1_test_features.columns

Index(['tGravityAcc-energy()-X', 'tGravityAcc-mean()-X', 'tGravityAcc-max()-X',
       'tGravityAcc-min()-X', 'angle(X,gravityMean)', 'tGravityAcc-min()-Y',
       'tGravityAcc-mean()-Y', 'tGravityAcc-max()-Y', 'angle(Y,gravityMean)',
       'tBodyAcc-max()-X', 'tGravityAcc-energy()-Y', 'fBodyAcc-entropy()-X',
       'prediction', 'target'],
      dtype='object')

In [227]:
print(v1_test_features['prediction'].head())

0    4
1    0
2    3
3    4
4    1
Name: prediction, dtype: int64


In [228]:
v1_test_features.shape

(100000, 14)

In [229]:
## V2 prediction
v2_test_features = v2_df[train_features]
v2_test_features['prediction'] = model.predict(v2_test_features)
v2_test_features['target'] = v2_df['target']

In [230]:
v2_test_features.columns

Index(['tGravityAcc-energy()-X', 'tGravityAcc-mean()-X', 'tGravityAcc-max()-X',
       'tGravityAcc-min()-X', 'angle(X,gravityMean)', 'tGravityAcc-min()-Y',
       'tGravityAcc-mean()-Y', 'tGravityAcc-max()-Y', 'angle(Y,gravityMean)',
       'tBodyAcc-max()-X', 'tGravityAcc-energy()-Y', 'fBodyAcc-entropy()-X',
       'prediction', 'target'],
      dtype='object')

In [231]:
v2_test_features.shape

(100000, 14)

### There has been a Software update and we have data from both V1 and V2 softwares 

#### Let's compare both the data and see of there has been any data drift 

### Data Drift

In [236]:
pip install evidently

Note: you may need to restart the kernel to use updated packages.


In [233]:
from evidently.report import Report

from evidently.test_suite import TestSuite
from evidently.test_preset import DataStabilityTestPreset
from evidently.test_preset import DataQualityTestPreset
from evidently.metric_preset import TargetDriftPreset, DataQualityPreset
from evidently.metric_preset import DataDriftPreset , ClassificationPreset


In [234]:
data_drift_report = Report(metrics=[
    DataDriftPreset(),
])
data_drift_report.run(reference_data=v1_test_features, current_data=v1_test_features)

In [235]:
data_drift_report.save_html("/Users/harish/Desktop/Human Activity Recognition/Notebooks/model_results/drift_report.html")


### Data Quality Report

In [169]:
data_quality_report = Report(metrics=[
    DataQualityPreset(),
])

data_quality_report.run(reference_data=v1_test_features, current_data=v2_test_features)
data_quality_report


elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison



In [170]:
data_quality_report.save_html("/Users/harish/Desktop/Human Activity Recognition/Notebooks/model_results/quality_report.html")

### Concept Drift

In [171]:
num_target_drift_report = Report(metrics=[
    TargetDriftPreset(),
])

num_target_drift_report.run(reference_data=v1_test_features, current_data=v2_test_features)

In [172]:
num_target_drift_report.save_html("/Users/harish/Desktop/Human Activity Recognition/Notebooks/model_results/concept_drift.html")

## Classification Report 

In [175]:
v1_test_features.columns

Index(['tGravityAcc-energy()-X', 'tGravityAcc-mean()-X', 'tGravityAcc-max()-X',
       'tGravityAcc-min()-X', 'angle(X,gravityMean)', 'tGravityAcc-min()-Y',
       'tGravityAcc-mean()-Y', 'tGravityAcc-max()-Y', 'angle(Y,gravityMean)',
       'tBodyAcc-max()-X', 'tGravityAcc-energy()-Y', 'fBodyAcc-entropy()-X',
       'prediction', 'Target'],
      dtype='object')

In [176]:
v2_test_features.columns

Index(['tGravityAcc-energy()-X', 'tGravityAcc-mean()-X', 'tGravityAcc-max()-X',
       'tGravityAcc-min()-X', 'angle(X,gravityMean)', 'tGravityAcc-min()-Y',
       'tGravityAcc-mean()-Y', 'tGravityAcc-max()-Y', 'angle(Y,gravityMean)',
       'tBodyAcc-max()-X', 'tGravityAcc-energy()-Y', 'fBodyAcc-entropy()-X',
       'prediction', 'Target'],
      dtype='object')

In [206]:
classification_performance_report = Report(metrics=[
    ClassificationPreset(),
])

classification_performance_report.run(reference_data=v1_test_features, current_data=v2_test_features)

classification_performance_report

In [207]:
classification_performance_report.save_html("/Users/harish/Desktop/Human Activity Recognition/Notebooks/model_results/classification_report.html")