For context, see the previous code i.e. 'linRegModel - scikit+plotly.ipynb'

# Recap code...

## Fitted linear regression model

In [205]:
from sklearn import datasets
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
#------------------------
# Load the Boston dataset
myData = datasets.load_boston()
type(myData)

# Assigning identifiers to hold the data and target field values respectively...
x_all = myData.data # Could equivalently use myData['data']
y_all = myData.target # Could equivalently use myData['target']
#------------------------
# Training data (choosing first 20 rows for this purpose)
x_training = x_all[:30]
y_training = y_all[:30]

# Testing data (choosing last 20 rows for this purpose)
x_testing = x_all[-20:]
y_testing = y_all[-20:]
#------------------------
# Creating linear regression model object
lrm = linear_model.LinearRegression()

# Using the fit method of the LinearRegression class to fit the model.
# fit uses the ordinary least squares method to fit a model to the data.
lrm.fit(x_training, y_training)
#------------------------
# Getting predicted target values for the rows of data field values
y_predicted = lrm.predict(x_testing)
# The values are predicted by plugging the data values into the model.
# The output of the model is the predicted target value.

print("Mean squared error =", mean_squared_error(y_testing, y_predicted))
print("R-squared =", r2_score(y_testing, y_predicted))

Mean squared error = 255.8368395932173
R-squared = -9.942504077964477


## Graph of fitted regression model

In [206]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
#------------------------
# CREATING A DATAFRAME WITH ALL RELEVANT FIELDS
# The 1st argument in plotly.express plotting functions must be a dataframe.
# The x and y arguments can be assigned to different keys i.e. fields of the dataframe.
d = {"x_testing": list(x_testing), "y_testing": y_testing, "y_predicted": y_predicted}
# It is important to convert x_testing, since x_testing is a 2D array.
# A 2D array is not immediately recognised as a list of lists.
d = pd.DataFrame(d)
#------------------------
# SCATTER PLOT FOR TESTING DATA POINTS
# Adding plots to a dynamically created figure object
f1 = px.scatter(d, x = "x_testing", y = "y_testing")
#------------------------
# LINE PLOT FOR REGRESSION LINE
# Adding plots to a dynamically created figure object
f2 = px.line(d, x = "x_testing", y = "y_predicted")
#------------------------
# DISPLAYING THE PLOTS TOGETHER
# Creating another figure object to contain both of the above figures
f = go.Figure(data = f1.data + f2.data)
f.show()

# Presenting graphs using dash

In [207]:
import dash
from dash import dcc
from dash import html
import plotly.express as px

## Some additional features before making the Dash application

(Not strictly necessary, but desirable)...

### Data table (for presenting the dataset)

**Modules necessary for this...**

In [210]:
import pandas as pd
from dash import dash_table

**We will use the feature names given for the dataset as column headings...**

In [216]:
print("Feature names\n------------\n")
for n in myData.feature_names: print(n) # Since we have multiple features...

Feature names
------------

CRIM
ZN
INDUS
CHAS
NOX
RM
AGE
DIS
RAD
TAX
PTRATIO
B
LSTAT


 Note that there is one target when we know to me named "MEDV" in the dataset (median value of owner-occupied homes (measured in $1000's).

**Creating a suitably small dataframe from the dataset used above (Boston dataset)...**

In [212]:
# Creating dictionary to hold intended data frame
d = {}
# Features
for i in range(0, 13): # Since there are 13 data fields
    x = x_all[:, i][0:10] # Array of 1st 10 elements from the ith field
    k = str(myData.feature_names[i]) + str(i)
    d[k] = x
# Target (there is only 1 target field here, named MEDV)
d["*MEDV"] = y_all[0:10]
#------------------------
# Creating dataframe from the above dictionary (and reassigning it to d)
d = pd.DataFrame(d)
d

Unnamed: 0,CRIM0,ZN1,INDUS2,CHAS3,NOX4,RM5,AGE6,DIS7,RAD8,TAX9,PTRATIO10,B11,LSTAT12,*MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2
5,0.02985,0.0,2.18,0.0,0.458,6.43,58.7,6.0622,3.0,222.0,18.7,394.12,5.21,28.7
6,0.08829,12.5,7.87,0.0,0.524,6.012,66.6,5.5605,5.0,311.0,15.2,395.6,12.43,22.9
7,0.14455,12.5,7.87,0.0,0.524,6.172,96.1,5.9505,5.0,311.0,15.2,396.9,19.15,27.1
8,0.21124,12.5,7.87,0.0,0.524,5.631,100.0,6.0821,5.0,311.0,15.2,386.63,29.93,16.5
9,0.17004,12.5,7.87,0.0,0.524,6.004,85.9,6.5921,5.0,311.0,15.2,386.71,17.1,18.9


**Creating the Dash data table element from the above dataframe**

In [213]:
dataTable = dash_table.DataTable(
    id='dataTable',
    columns=[{"name": i, "id": i} for i in d.columns], # Defining column names
    data = d.to_dict('records')) # Giving the data as a dictionary

### Information table (for describing each attribute name)

In [247]:
# Lists of attributes and their respective descriptions
attr = [
    'CRIM',
    'ZN',
    'INDUS',
    'CHAS',
    'NOX',
    'RM',
    'AGE',
    'DIS',
    'RAD',
    'TAX',
    'PTRATIO',
    'B',
    'LSTAT',
    'MEDV (target)']
descr = [
    "Per capita crime rate by town",
    "Proportion of residential land zoned for lots over 25,000 sq.ft.",
    "Proportion of non-retail business acres per town",
    "Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)",
    "Nitric oxides concentration (parts per 10 million)",
    "Average number of rooms per dwelling",
    "Proportion of owner-occupied units built prior to 1940",
    "Weighted distances to five Boston employment centres",
    "Index of accessibility to radial highways",
    "Full-value property-tax rate per $10,000",
    "Pupil-teacher ratio by town",
    "1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town",
    "Percentage lower status of the population",
    "Median value of owner-occupied homes in $1000's"]
#------------------------
# Dataframe creation
info = {}
info['ATTRIBUTE'] = attr
info['DESCRIPTION'] = descr
info = pd.DataFrame(info)
#------------------------
# Data table creation
infoTable = dash_table.DataTable(
    id='infoTable',
    columns=[{"name": i, "id": i} for i in info.columns],
    data = info.to_dict('records'),
    style_cell={'textAlign': 'left'},)

### Styles and texts

In [271]:
# Texts that will make up the paragraphs of the Dash application...
P =["Mean squared error = 255.8368", html.Br(), "R-squared score = -9.9425"]

Q = """
Here, we see how the regression line fits for each combination of
feature values (i.e. the factor variables) (one combination of values
is one point in the x-axis). The points represent the actual target
values for the given combination of feature values."""

R = """
As the  𝑅-squared score suggested, the fit is extremely bad overall,
and it does seem like a horizontal line at the mean target value
would serve as a better fit, given the sizeable deviations of the
fitted model from the actual data, even when the actual data points
do not vary too much in vertical distance."""

In [272]:
# Styles that will be used in the Dash application...
A = {"text-align": "center"}

B = {"font-family": "Monospace",
     "padding": "10px",
     "line-height": "300%",
     "font-size": "15px"}

## Actual creation of Dash application object and its layout

In [273]:
# Creating a Dash object
myApp = dash.Dash()
# Creating layout for the Dash object (the contents and layout of the HTML document)
myApp.layout = html.Div(
                style = D,
                children =
                # Elements of the application that come within this HTML 'Div' element...
                [
                    html.H1("Multiple linear regression model", style = A),

                    html.H2("Data table"),
                    dataTable,
                    
                    html.H2("Attribute descriptions"),
                    infoTable,
                    
                    html.H2("Some key statistics of the model"),
                    html.P(P, style = B),
                    
                    html.H2("Graph of model against actual data points"),
                    dcc.Graph(id='mlrm_graph', figure = f),
                    
                    html.H2("Interpretation"),
                    html.P(Q),
                    html.P(R)
                ])

In [275]:
# Running the application using the Internet Protocol, hosting it on your own computer.
myApp.run_server()