# Getting Started Tutorial

To install Evidently using the pip package manager, run:

```$ pip install evidently```


If you want to see reports inside a Jupyter notebook, you need to also install the Jupyter nbextension. After installing evidently, run the two following commands in the terminal from the Evidently directory.

To install jupyter nbextension, run:

```$ jupyter nbextension install --sys-prefix --symlink --overwrite --py evidently```

To enable it, run:

```$ jupyter nbextension enable evidently --py --sys-prefix```

That's it!

In [None]:
#run following commands one by one on the command prompt/terminal with admin rights
pip install evidently
jupyter nbextension install --sys-prefix --symlink --overwrite --py evidently
jupyter nbextension enable evidently --py --sys-prefix

In [4]:
try:
    import evidently
except:
    !pip install git+https://github.com/evidentlyai/evidently.git

In [1]:
import pandas as pd
import numpy as np

from sklearn.datasets import fetch_california_housing

from evidently import ColumnMapping

from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset
from evidently.metrics import *

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset, RegressionTestPreset
from evidently.tests import *

In [2]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Load Data

In [3]:
data = fetch_california_housing(as_frame=True)
housing_data = data.frame

In [4]:
type(housing_data)

pandas.core.frame.DataFrame

In [5]:
housing_data.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [6]:
housing_data.shape

(20640, 9)

In [7]:
housing_data.rename(columns={'MedHouseVal': 'target'}, inplace=True)

In [8]:
 np.random.normal(0, 5, housing_data.shape[0])

array([ 4.4569605 ,  1.56058866,  2.57662987, ...,  1.73087704,
       -0.68102046,  2.26708524])

In [9]:
housing_data['target'].values + np.random.normal(0, 5, housing_data.shape[0])

array([-0.86023245, 13.4799254 ,  4.33017248, ...,  4.99174932,
        1.2616727 ,  6.9174899 ])

In [10]:
housing_data['prediction'] = housing_data['target'].values + np.random.normal(0, 5, housing_data.shape[0])
housing_data.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target,prediction
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526,-3.896043
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585,2.204695
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521,3.936666
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413,0.018146
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422,6.989847


In [11]:
reference = housing_data.sample(n=5000, replace=False)
current = housing_data.sample(n=5000, replace=False)

## Report

In [12]:
report = Report(metrics=[
    DataDriftPreset(), 
])

report.run(reference_data=reference, current_data=current)
report

In [13]:
report = Report(metrics=[
    ColumnSummaryMetric(column_name='AveRooms'),
    ColumnQuantileMetric(column_name='AveRooms', quantile=0.25),
    ColumnDriftMetric(column_name='AveRooms'),
    
])

report.run(reference_data=reference, current_data=current)
report

In [14]:
report.save_html('report.html')

In [15]:
report.as_dict()

{'metrics': [{'metric': 'ColumnSummaryMetric',
   'result': {'column_name': 'AveRooms',
    'column_type': 'num',
    'reference_characteristics': {'number_of_rows': 5000,
     'count': 5000,
     'missing': 0,
     'missing_percentage': 0.0,
     'mean': 5.37,
     'std': 1.88,
     'min': 0.89,
     'p25': 4.46,
     'p50': 5.21,
     'p75': 6.03,
     'max': 56.27,
     'unique': 4905,
     'unique_percentage': 98.1,
     'infinite_count': 0,
     'infinite_percentage': 0.0,
     'most_common': 5.0,
     'most_common_percentage': 0.14},
    'current_characteristics': {'number_of_rows': 5000,
     'count': 5000,
     'missing': 0,
     'missing_percentage': 0.0,
     'mean': 5.45,
     'std': 2.29,
     'min': 1.0,
     'p25': 4.43,
     'p50': 5.24,
     'p75': 6.08,
     'max': 61.81,
     'unique': 4898,
     'unique_percentage': 97.96,
     'infinite_count': 0,
     'infinite_percentage': 0.0,
     'most_common': 5.0,
     'most_common_percentage': 0.2}}},
  {'metric': 'ColumnQua

In [16]:
report.json()

'{"version": "0.4.5", "metrics": [{"metric": "ColumnSummaryMetric", "result": {"column_name": "AveRooms", "column_type": "num", "reference_characteristics": {"number_of_rows": 5000, "count": 5000, "missing": 0, "missing_percentage": 0.0, "mean": 5.37, "std": 1.88, "min": 0.89, "p25": 4.46, "p50": 5.21, "p75": 6.03, "max": 56.27, "unique": 4905, "unique_percentage": 98.1, "infinite_count": 0, "infinite_percentage": 0.0, "most_common": 5.0, "most_common_percentage": 0.14}, "current_characteristics": {"number_of_rows": 5000, "count": 5000, "missing": 0, "missing_percentage": 0.0, "mean": 5.45, "std": 2.29, "min": 1.0, "p25": 4.43, "p50": 5.24, "p75": 6.08, "max": 61.81, "unique": 4898, "unique_percentage": 97.96, "infinite_count": 0, "infinite_percentage": 0.0, "most_common": 5.0, "most_common_percentage": 0.2}}}, {"metric": "ColumnQuantileMetric", "result": {"column_name": "AveRooms", "column_type": "num", "quantile": 0.25, "current": {"value": 4.432022683460071}, "reference": {"value": 

In [None]:
report.save_json('report.json')

## Test Suite 

In [17]:
tests = TestSuite(tests=[
    TestNumberOfColumnsWithMissingValues(),
    TestNumberOfRowsWithMissingValues(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedRows(),
    TestNumberOfDuplicatedColumns(),
    TestColumnsType(),
    TestNumberOfDriftedColumns(),
])

tests.run(reference_data=reference, current_data=current)
tests

In [None]:
suite.as_dict()

In [None]:
suite.json()

In [None]:
suite.save_html('test_suite.html')

In [None]:
suite.save_json('test_suite.json')