**Monitoring the performance of a Regression model using Evidently AI**

In [12]:
#Importing Evidently library for monitoring the model
try:
    import evidently
except:
    !pip install git+https://github.com/evidentlyai/evidently.git

In [13]:
#Importing the Basic libraries
import pandas as pd
import numpy as np
#import requests
#import zipfile
#import io

#from datetime import datetime, time
from sklearn import ensemble

#Importing the required classes from Evidently
from evidently import ColumnMapping
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, RegressionPreset

In [14]:
#Reading the dataset as a Pandas dataframe
df = pd.read_csv("/content/AirBnb_listings.csv")

In [15]:
#Display the first 5 rows of the dataset
df.head()

Unnamed: 0,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,...,availability_365,number_of_reviews,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,host_listing_count
0,40.756852,-73.964754,1,0,0,2,1.0,1,1,4,...,322,62,86,9,7,9,9,10,9,1
1,40.830599,-73.941014,0,0,0,10,1.0,3,3,4,...,348,22,85,8,8,9,8,7,8,2
2,40.692189,-73.92412,1,0,1,2,1.0,1,2,4,...,227,35,98,10,10,10,10,9,10,4
3,40.734751,-74.002592,1,0,0,2,1.0,1,1,4,...,274,26,96,10,9,10,10,10,9,1
4,40.745282,-73.997836,1,0,0,2,1.0,1,2,4,...,365,1,100,10,10,10,10,10,10,1


In [16]:
#Display the shape of the dataframe
df.shape

(18131, 28)

In [17]:
#Creating a variable for the Dependent variable from dataset
target = 'price'

#Creating a variable for the Dependent variable  to be predicted by the model
prediction = 'prediction'

#Creating a list of features
numerical_features = ['latitude',
'longitude',
'accommodates',
'bathrooms',
'bedrooms',
'beds',
'guests_included',
'extra_people',
'minimum_nights',
'maximum_nights',
'availability_30',
'availability_60',
'availability_90',
'availability_365',
'number_of_reviews',
'review_scores_rating',
'review_scores_accuracy',
'review_scores_cleanliness',
'review_scores_checkin',
'review_scores_communication',
'review_scores_location',
'review_scores_value',
'host_listing_count']
#'calendar_last_scraped', 'first_review','last_review']
#categorical_features = ['is_location_exact',
#'property_type',
#'room_type',
#'bed_type']

In [18]:
#Shuffling the dataset
df=df.sample(frac=1)

In [19]:
#Creating the reference and current dataframes for model training and model performance evaluation
reference=df.iloc[:12000]
current=df.iloc[12000:]

In [20]:
#Display the shape of the reference dataframe
reference.shape

(12000, 28)

In [21]:
#Display the shape of the current dataframe
current.shape

(6131, 28)

In [22]:
#Initialize the RandomForest Regressor
regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)

In [23]:
#Train the model using the reference dataframe
regressor.fit(reference#[numerical_features + categorical_features]
              , reference[target])

In [24]:
#Make the model predict using the reference dataframe
ref_prediction = regressor.predict(reference#[numerical_features + categorical_features]
                                   )

#Make the model predict using the current dataframe
current_prediction = regressor.predict(current#[numerical_features + categorical_features]
                                       )

In [25]:
#Storing the predictions in the reference and current dataframes
reference['prediction'] = ref_prediction
current['prediction'] = current_prediction

In [26]:
#Create a column mapping
column_mapping = ColumnMapping()

#Assigning the target, prediction and features
column_mapping.target = target
column_mapping.prediction = prediction
column_mapping.numerical_features = numerical_features
#column_mapping.categorical_features = categorical_features

In [27]:
#Evaluating the performance of the regression model with the reference dataframe
regression_perfomance = Report(metrics=[RegressionPreset()])
regression_perfomance.run(current_data=reference, reference_data=None, column_mapping=column_mapping)



In [28]:
#Displaying the regression model performance
regression_perfomance.show()

Output hidden; open in https://colab.research.google.com to view.

In [29]:
#Displaying the performance of the model with the first 2000 rows of the current dataframe
regression_perfomance = Report(metrics=[RegressionPreset()])
regression_perfomance.run(current_data=current.iloc[:2000],
                          reference_data=reference,
                          column_mapping=column_mapping)

regression_perfomance.show()

Output hidden; open in https://colab.research.google.com to view.

In [30]:
#Displaying the drift of the Dependent variable with the first 2000 rows of the current dataframe
target_drift = Report(metrics=[TargetDriftPreset()])
target_drift.run(current_data=current.iloc[:2000],
                 reference_data=reference,
                 column_mapping=column_mapping)

target_drift.show()

Output hidden; open in https://colab.research.google.com to view.

In [31]:
#Displaying the performance of the model with the rows 2001:4000 of the current dataframe
regression_perfomance = Report(metrics=[RegressionPreset()])
regression_perfomance.run(current_data=current.iloc[2000:4000],
                          reference_data=reference,
                          column_mapping=column_mapping)

regression_perfomance.show()

Output hidden; open in https://colab.research.google.com to view.

In [32]:
#Displaying the drift of the Dependent variable with the rows 2001:4000 of the current dataframe
target_drift = Report(metrics=[TargetDriftPreset()])
target_drift.run(current_data=current.iloc[2000:4000],
                 reference_data=reference,
                 column_mapping=column_mapping)

target_drift.show()

Output hidden; open in https://colab.research.google.com to view.

In [33]:
#Displaying the performance of the model with the rows 4001: of the current dataframe
regression_perfomance = Report(metrics=[RegressionPreset()])
regression_perfomance.run(current_data=current.iloc[4000:],
                          reference_data=reference,
                          column_mapping=column_mapping)

regression_perfomance.show()

Output hidden; open in https://colab.research.google.com to view.

In [34]:
#Displaying the drift of the Dependent variable with the rows 4001: of the current dataframe
target_drift = Report(metrics=[TargetDriftPreset()])
target_drift.run(current_data=current.iloc[4000:],
                 reference_data=reference,
                 column_mapping=column_mapping)

target_drift.show()

Output hidden; open in https://colab.research.google.com to view.

In [35]:
#Displaying the Data drift
column_mapping = ColumnMapping()

column_mapping.numerical_features = numerical_features

data_drift = Report(metrics = [DataDriftPreset()])
data_drift.run(current_data = current.iloc[:2000],
               reference_data = reference,
               column_mapping=column_mapping)

data_drift.show()

Output hidden; open in https://colab.research.google.com to view.