## Baseline model with monitoring

In [None]:
import os
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error

In [None]:
from evidently import ColumnMapping
from evidently.report import Report
from evidently.metrics import ColumnDriftMetric, DatasetDriftMetric, DatasetMissingValuesMetric, DatasetCorrelationsMetric

In [None]:
from joblib import load, dump
from tqdm import tqdm

In [None]:
dataset_path = os.path.abspath('data/hour.csv')

In [None]:
df = pd.read_csv(dataset_path)
df.head()

In [None]:
df.describe().T

In [None]:
features = ['season', 'holiday', 'workingday', 'weathersit', 'temp', 'atemp', 
                'hum', 'windspeed', 'hr', 'mnth', 'yr']

In [None]:
df.shape

In [None]:
X = df[features]
y = df['cnt']
X.shape

In [None]:
model = LinearRegression()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
model.fit(X_train, y_train)

In [None]:
train_pred = model.predict(X_train)
X_train_preds = model.predict(X_train)
X_train['prediction'] = X_train_preds

In [None]:
test_pred = model.predict(X_test)
X_test_preds = model.predict(X_test)
X_test['prediction'] = X_test_preds

In [None]:
train_mae = mean_absolute_error(y_train, train_pred)
print(train_mae)
test_mae = mean_absolute_error(y_test,test_pred)
print(test_mae)

In [None]:
with open('models/lin_reg.bin','wb') as f_out:
    dump(model,f_out)

In [None]:
X_test.to_csv('data/reference.csv')

---

## Evidently Report

In [None]:
column_mapping = ColumnMapping(
    target = None,
    prediction = 'prediction',
    numerical_features = features)

In [None]:
report = Report(metrics = [
    ColumnDriftMetric(column_name = 'prediction'),
    DatasetDriftMetric(),
    DatasetMissingValuesMetric()])

In [None]:
report.run(reference_data = X_train,current_data = X_test,column_mapping =column_mapping)

In [None]:
report.show('inline')

In [None]:
result = report.as_dict()
result

In [None]:
# prediction drift
result['metrics'][0]['result']['drift_score']

In [None]:
result['metrics'][1]['result']

---

## Evidently Dashboard

In [None]:
from evidently.metric_preset import DataDriftPreset, DataQualityPreset
from evidently.ui.workspace import Workspace
from evidently.ui.dashboards import DashboardPanelCounter, DashboardPanelPlot, CounterAgg, PanelValue, PlotType,ReportFilter
from evidently.renderers.html_widgets import WidgetSize

In [None]:
ws = Workspace('workspace')

In [None]:
project = ws.create_project('Bike Sharing Data Quality Project')
project.description = (
    'This project aims to improve the data quality of the Bike Sharing dataset. '
    'Tasks include data cleaning, handling missing values, outlier detection, '
    'and generating data quality reports to enhance dataset reliability for analysis.'
)
project.save()

In [None]:
regular_report = Report(
    metrics = [
        DataQualityPreset(),
        DatasetCorrelationsMetric()
    ])
regular_report.run(reference_data = None,
                   current_data = X_test,
                   column_mapping = column_mapping)

# Display the report
regular_report.show()

In [None]:
ws.add_report(project.id, regular_report)

In [None]:
# configure dashboard
project.dashboard.add_panel(
    DashboardPanelCounter(
    filter = ReportFilter(metadata_values = {}, tag_values = []),
        agg = CounterAgg.NONE,
        title = 'Bike sharing data dashboard')
    )


project.dashboard.add_panel(
    DashboardPanelPlot(
    filter = ReportFilter(metadata_values = {}, tag_values = []),
        title = 'Inference Count',
        values  = [PanelValue(
            metric_id = 'DatasetSummaryMetric',
            field_path = 'current.number_of_rows',
            legend = 'count'),
                  ], 
        plot_type = PlotType.BAR,
        size = WidgetSize.HALF
    ),

    )


project.dashboard.add_panel(
    DashboardPanelPlot(
    filter = ReportFilter(metadata_values = {}, tag_values = []),
        title = 'Number of Missing Values',
        values  = [PanelValue(
            metric_id = 'DatasetSummaryMetric',
            field_path = 'current.number_of_missing_values',
            legend = 'count'),
                  ], 
        plot_type = PlotType.LINE,
        size = WidgetSize.HALF,
    )
    )

project.save()