# Goal
This is to use one of the previous training/inference pipeline and tie it to evlidently

In [1]:
from evidently.ui.workspace import RemoteWorkspace

### Create Project

In [2]:
ws = RemoteWorkspace("http://localhost:8000")
project = ws.create_project("Test")
project.description = "My project description"
project.save()

In [None]:
import pandas as pd
from sklearn import datasets
    
from evidently import Dataset
from evidently import DataDefinition
from evidently import Report
from evidently.presets import DataDriftPreset, DataSummaryPreset


### Get data & create report

In [None]:
adult_data = datasets.fetch_openml(name="adult", version=2, as_frame="auto")
adult = adult_data.frame
adult_ref = adult[~adult.education.isin(["Some-college", "HS-grad", "Bachelors"])]
adult_prod = adult[adult.education.isin(["Some-college", "HS-grad", "Bachelors"])]
schema = DataDefinition(
    numerical_columns=["education-num", "age", "capital-gain", "hours-per-week", "capital-loss", "fnlwgt"],
    categorical_columns=["education", "occupation", "native-country", "workclass", "marital-status", "relationship", "race", "sex", "class"],
    )
eval_data_1 = Dataset.from_pandas(
    pd.DataFrame(adult_prod),
    data_definition=schema
)
eval_data_2 = Dataset.from_pandas(
    pd.DataFrame(adult_ref),
    data_definition=schema
)
report = Report([
    DataDriftPreset() 
])
my_eval = report.run(eval_data_1, eval_data_2)


In [10]:
ws.add_run(project.id, my_eval, include_data=False)

### Data analysis

In [11]:
df = pd.DataFrame(adult_prod)
df.describe(include="all")

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,class
count,34687.0,32784,34687.0,34687,34687.0,34687,32780,34687,34687,34687,34687.0,34687.0,34687.0,34124,34687
unique,,8,,3,,7,14,6,5,2,,,,41,2
top,,Private,,HS-grad,,Married-civ-spouse,Adm-clerical,Husband,White,Male,,,,United-States,<=50K
freq,,24520,,15784,,15558,4670,13682,29710,22935,,,,31848,26808
mean,37.967509,,188997.4,,10.239023,,,,,,844.265489,83.323925,40.51022,,
std,13.308068,,105374.9,,1.574856,,,,,,6159.971515,391.262436,11.944506,,
min,17.0,,12285.0,,9.0,,,,,,0.0,0.0,1.0,,
25%,27.0,,117618.0,,9.0,,,,,,0.0,0.0,40.0,,
50%,36.0,,177675.0,,10.0,,,,,,0.0,0.0,40.0,,
75%,47.0,,236393.5,,10.0,,,,,,0.0,0.0,45.0,,


In [19]:


my_eval.json()


'{"metrics": [{"id": "15e89f895b482f9b84ba7274ed18a106", "metric_id": "DriftedColumnsCount(drift_share=0.5)", "value": {"count": 5.0, "share": 0.3333333333333333}}, {"id": "84c249c384495467eda04c510443557a", "metric_id": "ValueDrift(column=education-num)", "value": 0.6176968555693167}, {"id": "1cf7e5c2a8eb2a62a2667e70ee06d259", "metric_id": "ValueDrift(column=age)", "value": 0.18534692319042428}, {"id": "5cf10e0f4a964f3f2c5fbeb480939633", "metric_id": "ValueDrift(column=capital-gain)", "value": 0.0817732650223179}, {"id": "2e8359ba1142f9250d66ab64b4bde735", "metric_id": "ValueDrift(column=hours-per-week)", "value": 0.08859914569715911}, {"id": "732ea745dc73e93174c7cab970ff7513", "metric_id": "ValueDrift(column=capital-loss)", "value": 0.03378837284314874}, {"id": "e4e7c5caf091394a8fba77dca1af3931", "metric_id": "ValueDrift(column=fnlwgt)", "value": 0.02364332069106699}, {"id": "3a4389f62f6a9720415e27025a4f8bd6", "metric_id": "ValueDrift(column=education)", "value": 0.8325546111576977},