# Getting Started Tutorial

To install Evidently using the pip package manager, run:

```$ pip install evidently```


If you want to see reports inside a Jupyter notebook, you need to also install the Jupyter nbextension. After installing evidently, run the two following commands in the terminal from the Evidently directory.

To install jupyter nbextension, run:

```$ jupyter nbextension install --sys-prefix --symlink --overwrite --py evidently```

To enable it, run:

```$ jupyter nbextension enable evidently --py --sys-prefix```

That's it!

In [1]:
import os 

os.chdir("..")

In [2]:
from CreditCard.constants import DATABASE_FILE
from CreditCard.utils import read_yaml
from pathlib import Path

In [3]:
DATABASE_FILE

WindowsPath('c:/Users/princ/OneDrive/Desktop/project-python/creditcard/configs/database.yaml')

In [4]:
from CreditCard.Database import MongoDB

In [5]:
test_cont = MongoDB("test")

2023-03-16 14:33:37.000 | INFO     | CreditCard.utils.common:read_yaml:34 - yaml file: c:\Users\princ\OneDrive\Desktop\project-python\creditcard\configs\database.yaml loaded successfully
2023-03-16 14:33:37.070 | DEBUG    | CreditCard.Database:__init__:27 - connection to mongo db successful
2023-03-16 14:33:37.071 | DEBUG    | CreditCard.Database:__init__:37 - connection to mongo db successful


In [1]:
import pandas as pd 

In [2]:
df = pd.read_csv(r'C:\Users\princ\OneDrive\Desktop\project-python\creditcard\artifact\Stage00_data_ingestion\ingested_data\test.csv')

In [3]:
df1 = df = pd.read_csv(r'C:\Users\princ\OneDrive\Desktop\project-python\creditcard\artifact\Stage00_data_ingestion\raw_data\UCI_Credit_Card.csv')

In [22]:
tests = TestSuite(tests=[
    TestNumberOfDuplicatedRows(),
])


In [32]:
tests.run(reference_data=df , current_data=df)

In [4]:
df1.compare(df)

In [27]:
df1.duplicated().sum()

0

In [31]:
df1.drop_duplicates(keep="first")

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
0,1,20000.0,2,2,1,24,2,2,-1,-1,...,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1
1,2,120000.0,2,2,2,26,-1,2,0,0,...,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1
2,3,90000.0,2,2,2,34,0,0,0,0,...,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0
3,4,50000.0,2,2,1,37,0,0,0,0,...,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0
4,5,50000.0,1,2,1,57,-1,0,-1,0,...,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,29996,220000.0,1,3,1,39,0,0,0,0,...,88004.0,31237.0,15980.0,8500.0,20000.0,5003.0,3047.0,5000.0,1000.0,0
29996,29997,150000.0,1,3,2,43,-1,-1,-1,-1,...,8979.0,5190.0,0.0,1837.0,3526.0,8998.0,129.0,0.0,0.0,0
29997,29998,30000.0,1,2,2,37,4,3,2,-1,...,20878.0,20582.0,19357.0,0.0,0.0,22000.0,4200.0,2000.0,3100.0,1
29998,29999,80000.0,1,3,1,41,1,-1,0,0,...,52774.0,11855.0,48944.0,85900.0,3409.0,1178.0,1926.0,52964.0,1804.0,1


In [24]:
tests.as_dict()

{'tests': [{'name': 'Number of Duplicate Rows',
   'description': 'The number of duplicate rows is 0. The test threshold is eq=0 ± 1e-12.',
   'status': 'SUCCESS',
   'group': 'data_integrity',
   'parameters': {'condition': {'eq': 0.0 ± 1e-12},
    'number_of_duplicated_rows': 0}}],
 'summary': {'all_passed': True,
  'total_tests': 1,
  'success_tests': 1,
  'failed_tests': 0,
  'by_status': Counter({'SUCCESS': 1})}}

In [20]:
df1.shape

(24000, 25)

In [None]:
test_cont.Insert_Many( df.to_dict(orient="records"))

In [7]:
data = test_cont.find_many_as_df()

2023-03-16 14:33:53.739 | DEBUG    | CreditCard.Database:find_many_as_df:141 - find many data from mongo db and return as dataframe


In [9]:
data.shape

(6000, 25)

In [13]:
pd.DataFrame.equals(df, data)

True

In [None]:
df = pd.DataFrame(data=data)

In [None]:
df

In [None]:
try:
    import evidently
except:
    !pip install git+https://github.com/evidentlyai/evidently.git -q

In [17]:
import pandas as pd
import numpy as np

from sklearn.datasets import fetch_california_housing

from evidently import ColumnMapping

from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset
from evidently.metrics import *

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset
from evidently.tests import *

In [18]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Load Data

In [19]:
data = fetch_california_housing(as_frame=True)
housing_data = data.frame

In [20]:
housing_data.rename(columns={'MedHouseVal': 'target'}, inplace=True)
housing_data['prediction'] = housing_data['target'].values + np.random.normal(0, 5, housing_data.shape[0])

In [21]:
reference = housing_data.sample(n=5000, replace=False)
current = housing_data.sample(n=5000, replace=False)

## Report

In [22]:
report = Report(metrics=[
    DataDriftPreset(), 
])

report.run(reference_data=reference, current_data=current)
# report

In [23]:
from box import ConfigBox
report_data = ConfigBox(report.as_dict())
if report_data.metrics[0].result.dataset_drift:
    drift_status = True

In [25]:
report_data.metrics[0].result.dataset_drift

False

In [10]:
from CreditCard.utils import write_yaml , read_yaml
from CreditCard.constants import ROOT_DIR , CONFIG_DIR 
import os 
from pathlib import Path

In [11]:
data = report.as_dict()

file_path = Path(os.path.join(CONFIG_DIR , "drift_report.yaml"))

write_yaml(file_path=file_path , data= data)

In [12]:
report.save_json(os.path.join(CONFIG_DIR , "report.json"))

In [13]:
from box import ConfigBox

data1 = ConfigBox(data)

In [16]:
data1.metrics[0].result.dataset_drift

False

In [14]:
data1.metrics[1].result.dataset_drift

False

In [None]:
report = Report(metrics=[
    ColumnSummaryMetric(column_name='AveRooms'),
    ColumnQuantileMetric(column_name='AveRooms', quantile=0.25),
    ColumnDriftMetric(column_name='AveRooms'),
    
])

report.run(reference_data=reference, current_data=current)
report

In [None]:
report = Report(metrics=[
    generate_column_metrics(ColumnQuantileMetric, parameters={'quantile':0.25}, columns=['AveRooms', 'AveBedrms']),
])

report.run(reference_data=reference, current_data=current)
report

In [None]:
report = Report(metrics=[
    ColumnSummaryMetric(column_name='AveRooms'),
    generate_column_metrics(ColumnQuantileMetric, parameters={'quantile':0.25}, columns='num'),
    DataDriftPreset()
])

report.run(reference_data=reference, current_data=current)
report

In [None]:
report.as_dict()

In [None]:
report.json()

In [None]:
#report.save_html('report.html')

In [None]:
#report.save_json('report.json')

## Test Suite 

In [None]:
tests = TestSuite(tests=[
    TestNumberOfColumnsWithMissingValues(),
    TestNumberOfRowsWithMissingValues(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedRows(),
    TestNumberOfDuplicatedColumns(),
    TestColumnsType(),
    TestNumberOfDriftedColumns(),
])

tests.run(reference_data=reference, current_data=current)
tests

In [None]:
tests.save_html(os.path.join(CONFIG_DIR, "test.html"))

In [None]:
json_save = os.path.join(CONFIG_DIR, "test.json")
tests.save_json(json_save)

In [None]:
import json
data_dic =  tests.as_dict()
type(data_dic)

In [None]:
data_test = ConfigBox(tests.as_dict())

In [None]:
data_test.summary.all_passed 

In [None]:
report = Report(metrics=[
        DataDriftPreset(), DataQualityPreset()
            ])
report.run(reference_data=reference, current_data=current)

In [None]:
report.save_html("report.html")

In [None]:
report.save_json("report.json")

In [None]:
data = ConfigBox( report.as_dict())

In [None]:
len(data.metrics)

In [None]:
data.metrics[0].result.dataset_drift

In [None]:
suite = TestSuite(tests=[
    NoTargetPerformanceTestPreset(),
])

suite.run(reference_data=reference, current_data=current)
suite

In [None]:
suite = TestSuite(tests=[
    TestColumnDrift('Population'),
    TestMeanInNSigmas('HouseAge'),
    NoTargetPerformanceTestPreset(columns=['AveRooms', 'AveBedrms', 'AveOccup'])
])

suite.run(reference_data=reference, current_data=current)
suite

In [None]:
suite = TestSuite(tests=[
    TestColumnDrift('Population'),
    TestShareOfOutRangeValues('Population'),
    generate_column_tests(TestMeanInNSigmas, columns='num'),
    
])

suite.run(reference_data=reference, current_data=current)
suite

In [None]:
suite.as_dict()

In [None]:
suite.json()

In [None]:
#suite.save_html('test_suite.html')

In [None]:
#suite.save_json('test_suite.json')

In [1]:
import numpy as np 

In [5]:
x = np.zeros((100,2))

In [6]:
x[1]

array([0., 0.])