# Getting Started Tutorial

To install Evidently using the pip package manager, run:

```$ pip install evidently```


If you want to see reports inside a Jupyter notebook, you need to also install the Jupyter nbextension. After installing evidently, run the two following commands in the terminal from the Evidently directory.

To install jupyter nbextension, run:

```$ jupyter nbextension install --sys-prefix --symlink --overwrite --py evidently```

To enable it, run:

```$ jupyter nbextension enable evidently --py --sys-prefix```

That's it!

In [None]:
#run following commands one by one on the command prompt/terminal with admin rights
pip install evidently
jupyter nbextension install --sys-prefix --symlink --overwrite --py evidently
jupyter nbextension enable evidently --py --sys-prefix

In [4]:
try:
    import evidently
except:
    !pip install git+https://github.com/evidentlyai/evidently.git

In [None]:
import evidently

In [1]:
import pandas as pd
import numpy as np

from sklearn.datasets import fetch_california_housing

from evidently import ColumnMapping

from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset
from evidently.metrics import *

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset, RegressionTestPreset
from evidently.tests import *

In [2]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Load Data

In [3]:
#try same code on boston housing dataset
data = fetch_california_housing(as_frame=True)
housing_data = data.frame

In [4]:
type(housing_data)

pandas.core.frame.DataFrame

In [5]:
housing_data.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [11]:
housing_data.shape

(20640, 9)

In [12]:
housing_data.rename(columns={'MedHouseVal': 'target'}, inplace=True)

In [13]:
 np.random.normal(0, 5, housing_data.shape[0])

array([ 1.63143607, -4.25460749, -0.38110453, ...,  5.57301301,
       -3.42196101,  1.32324102])

In [None]:
#build ML model and make prediction

In [14]:
housing_data['target'].values + np.random.normal(0, 5, housing_data.shape[0])

array([ 5.64674777,  2.1991729 ,  6.50331735, ..., -3.422815  ,
       -7.56748161, -2.07044836])

In [15]:
housing_data['prediction'] = housing_data['target'].values + np.random.normal(0, 5, housing_data.shape[0])
housing_data.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target,prediction
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526,9.136062
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585,6.351146
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521,9.295092
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413,-2.974831
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422,8.934268


In [16]:
reference = housing_data.sample(n=5000, replace=False)
current = housing_data.sample(n=5000, replace=False)

In [17]:
reference.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target,prediction
12914,2.774,22.0,5.114024,1.107471,1870.0,2.450852,38.61,-121.32,1.902,3.073662
13327,2.5391,33.0,4.324324,0.972973,831.0,3.743243,34.05,-117.66,1.086,3.476598
12907,6.2817,25.0,6.65625,0.948438,1635.0,2.554688,38.6,-121.33,2.882,5.22352
16899,5.6206,44.0,4.84375,0.934028,736.0,2.555556,37.59,-122.34,3.864,0.754028
6176,4.125,36.0,5.5875,0.990625,1139.0,3.559375,34.08,-117.93,1.858,-4.938751


In [18]:
current.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target,prediction
300,1.9479,50.0,4.825203,1.060976,907.0,3.686992,37.76,-122.18,0.895,-3.185097
1761,3.9417,42.0,5.07485,1.032934,719.0,2.152695,37.94,-122.33,1.391,-0.101593
17091,3.3015,33.0,4.334728,1.122734,1895.0,2.642957,37.48,-122.23,2.677,5.299589
19097,3.7115,20.0,6.091549,1.059859,1707.0,3.005282,38.25,-122.63,1.819,4.528876
1485,6.0901,8.0,7.536062,1.050682,1555.0,3.031189,37.95,-122.01,2.982,-6.898002


## Report

In [19]:
report = Report(metrics=[
    DataDriftPreset(), 
])

report.run(reference_data=reference, current_data=current)
report

In [22]:
report = Report(metrics=[
    ColumnSummaryMetric(column_name='AveRooms')    
])

report.run(reference_data=reference, current_data=current)
report

In [14]:
report.save_html('report.html')

In [15]:
report.as_dict()

{'metrics': [{'metric': 'ColumnSummaryMetric',
   'result': {'column_name': 'AveRooms',
    'column_type': 'num',
    'reference_characteristics': {'number_of_rows': 5000,
     'count': 5000,
     'missing': 0,
     'missing_percentage': 0.0,
     'mean': 5.37,
     'std': 1.88,
     'min': 0.89,
     'p25': 4.46,
     'p50': 5.21,
     'p75': 6.03,
     'max': 56.27,
     'unique': 4905,
     'unique_percentage': 98.1,
     'infinite_count': 0,
     'infinite_percentage': 0.0,
     'most_common': 5.0,
     'most_common_percentage': 0.14},
    'current_characteristics': {'number_of_rows': 5000,
     'count': 5000,
     'missing': 0,
     'missing_percentage': 0.0,
     'mean': 5.45,
     'std': 2.29,
     'min': 1.0,
     'p25': 4.43,
     'p50': 5.24,
     'p75': 6.08,
     'max': 61.81,
     'unique': 4898,
     'unique_percentage': 97.96,
     'infinite_count': 0,
     'infinite_percentage': 0.0,
     'most_common': 5.0,
     'most_common_percentage': 0.2}}},
  {'metric': 'ColumnQua

In [16]:
report.json()

'{"version": "0.4.5", "metrics": [{"metric": "ColumnSummaryMetric", "result": {"column_name": "AveRooms", "column_type": "num", "reference_characteristics": {"number_of_rows": 5000, "count": 5000, "missing": 0, "missing_percentage": 0.0, "mean": 5.37, "std": 1.88, "min": 0.89, "p25": 4.46, "p50": 5.21, "p75": 6.03, "max": 56.27, "unique": 4905, "unique_percentage": 98.1, "infinite_count": 0, "infinite_percentage": 0.0, "most_common": 5.0, "most_common_percentage": 0.14}, "current_characteristics": {"number_of_rows": 5000, "count": 5000, "missing": 0, "missing_percentage": 0.0, "mean": 5.45, "std": 2.29, "min": 1.0, "p25": 4.43, "p50": 5.24, "p75": 6.08, "max": 61.81, "unique": 4898, "unique_percentage": 97.96, "infinite_count": 0, "infinite_percentage": 0.0, "most_common": 5.0, "most_common_percentage": 0.2}}}, {"metric": "ColumnQuantileMetric", "result": {"column_name": "AveRooms", "column_type": "num", "quantile": 0.25, "current": {"value": 4.432022683460071}, "reference": {"value": 

In [None]:
report.save_json('report.json')

## Test Suite 

In [24]:
tests = TestSuite(tests=[
    TestNumberOfColumnsWithMissingValues(),
    TestNumberOfRowsWithMissingValues(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedRows(),
    TestNumberOfDuplicatedColumns(),
    TestColumnsType(),
    TestNumberOfDriftedColumns(),
    
])

tests.run(reference_data=reference, current_data=current)
tests

In [None]:
suite.as_dict()

In [None]:
suite.json()

In [None]:
suite.save_html('test_suite.html')

In [None]:
suite.save_json('test_suite.json')