In [0]:
# imports 
from pyspark.ml.regression import LinearRegression
from pyspark.ml.feature import VectorAssembler
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import RegressionEvaluator 
from mlflow.tracking import MlflowClient
import mlflow
import mlflow.spark 
import pandas as pd
from pyspark.ml.feature import RFormula

spark.conf.set('spark.sql.execution.arrow.enabled', 'true')
client = MlflowClient()

# load the datasets
userhome = 'dbfs:/user/skim658@gwu.edu'

redDF = spark.read.parquet(userhome + '/final-project/reddf.parquet')
whiteDF = spark.read.parquet(userhome + '/final-project/whitedf.parquet')

In [0]:
# split the data set into train and test sets 
redTrainDF, redTestDF = redDF.repartition(8).randomSplit([0.8, 0.2], seed = 42)
print(redTrainDF.cache().count())
whiteTrainDF, whiteTestDF = whiteDF.repartition(8).randomSplit([0.8, 0.2], seed = 42)
print(whiteTrainDF.cache().count())

In [0]:
display(redTestDF)

fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
5.4,0.74,0.0,1.2,0.041,16.0,46.0,0.99258,4.01,0.59,12.5,6
5.9,0.61,0.08,2.1,0.071,16.0,24.0,0.99376,3.56,0.77,11.1,6
6.0,0.5,0.0,1.4,0.057,15.0,26.0,0.99448,3.36,0.45,9.5,5
6.2,0.64,0.09,2.5,0.081,15.0,26.0,0.99538,3.57,0.63,12.0,5
6.4,0.56,0.15,1.8,0.078,17.0,65.0,0.99294,3.33,0.6,10.5,6
6.6,0.815,0.02,2.7,0.072,17.0,34.0,0.9955,3.58,0.89,12.3,7
6.8,0.91,0.06,2.0,0.06,4.0,11.0,0.99592,3.53,0.64,10.9,4
7.0,0.36,0.21,2.4,0.086,24.0,69.0,0.99556,3.4,0.53,10.1,6
7.0,0.685,0.0,1.9,0.067,40.0,63.0,0.9979,3.6,0.81,9.9,5
7.0,0.78,0.08,2.0,0.093,10.0,19.0,0.9956,3.4,0.47,10.0,5


## Single Feature Linear Regression

## Red Wine (Alcohol %)

In [0]:
# log single feature (alcohol) linear regression run 
with mlflow.start_run(run_name = 'RED-LR-Single-Feature-Alcohol') as run:
  vecAssembler = VectorAssembler(inputCols = ['alcohol'], outputCol = 'features')
  # linear regression model
  lr = LinearRegression(featuresCol = 'features', labelCol = 'quality')
  # define the pipeline
  pipeline = Pipeline(stages = [vecAssembler, lr])
  pipelineModel = pipeline.fit(redTrainDF)
  
  # log the run parameters 
  mlflow.log_param('features', 'alcohol')
  mlflow.log_param('color', 'red')
  # log the model 
  mlflow.spark.log_model(pipelineModel, 'model')
  
  # evaluate the predictions 
  redPredDF = pipelineModel.transform(redTestDF)
  regressionEvaluator = RegressionEvaluator(predictionCol = 'prediction', labelCol = 'quality')
  rmse = regressionEvaluator.setMetricName('rmse').evaluate(redPredDF)
  r2 = regressionEvaluator.setMetricName('r2').evaluate(redPredDF)
  # log metrics 
  mlflow.log_metric('rmse', rmse)
  mlflow.log_metric('r2', r2)
  
  display(redPredDF.select('features', 'quality', 'prediction'))

features,quality,prediction
"Map(vectorType -> dense, length -> 1, values -> List(12.5))",6,6.373933646022195
"Map(vectorType -> dense, length -> 1, values -> List(11.1))",6,5.878303748322224
"Map(vectorType -> dense, length -> 1, values -> List(9.5))",5,5.3118695795222575
"Map(vectorType -> dense, length -> 1, values -> List(12.0))",5,6.1969229682722045
"Map(vectorType -> dense, length -> 1, values -> List(10.5))",6,5.665890935022237
"Map(vectorType -> dense, length -> 1, values -> List(12.3))",7,6.303129374922198
"Map(vectorType -> dense, length -> 1, values -> List(10.9))",4,5.807499477222228
"Map(vectorType -> dense, length -> 1, values -> List(10.1))",6,5.524282392822245
"Map(vectorType -> dense, length -> 1, values -> List(9.9))",5,5.453478121722249
"Map(vectorType -> dense, length -> 1, values -> List(10.0))",5,5.488880257272247


In [0]:
experiment_id = run.info.experiment_id
runs = client.search_runs(experiment_id, order_by=["attributes.start_time desc"], max_results=1)
redMetricsDF = pd.DataFrame(runs[0].data.metrics, index = [0])
display(redMetricsDF)

r2,rmse
0.2473137230820193,0.7038935289263599


## White Wine (Alcohol %)

In [0]:
# log single feature (alcohol) linear regression run 
with mlflow.start_run(run_name = 'WHITE-LR-Single-Feature-Alcohol') as run:
  vecAssembler = VectorAssembler(inputCols = ['alcohol'], outputCol = 'features')
  # linear regression model
  lr = LinearRegression(featuresCol = 'features', labelCol = 'quality')
  # define the pipeline
  pipeline = Pipeline(stages = [vecAssembler, lr])
  pipelineModel = pipeline.fit(whiteTrainDF)
  
  # log the run parameters 
  mlflow.log_param('features', 'alcohol')
  mlflow.log_param('color', 'white')
  # log the model 
  mlflow.spark.log_model(pipelineModel, 'model')
  
  # evaluate the predictions 
  whitePredDF = pipelineModel.transform(whiteTestDF)
  regressionEvaluator = RegressionEvaluator(predictionCol = 'prediction', labelCol = 'quality')
  rmse = regressionEvaluator.setMetricName('rmse').evaluate(whitePredDF)
  r2 = regressionEvaluator.setMetricName('r2').evaluate(whitePredDF)
  # log metrics 
  mlflow.log_metric('rmse', rmse)
  mlflow.log_metric('r2', r2)
  
  display(whitePredDF.select('features', 'quality', 'prediction'))

features,quality,prediction
"Map(vectorType -> dense, length -> 1, values -> List(11.5))",6,6.192774849229745
"Map(vectorType -> dense, length -> 1, values -> List(11.5))",6,6.192774849229745
"Map(vectorType -> dense, length -> 1, values -> List(13.0))",8,6.671527175362573
"Map(vectorType -> dense, length -> 1, values -> List(12.4))",6,6.480026244909442
"Map(vectorType -> dense, length -> 1, values -> List(12.9))",6,6.639610353620385
"Map(vectorType -> dense, length -> 1, values -> List(10.2))",6,5.777856166581294
"Map(vectorType -> dense, length -> 1, values -> List(9.9))",6,5.682105701354729
"Map(vectorType -> dense, length -> 1, values -> List(11.3))",6,6.1289412057453685
"Map(vectorType -> dense, length -> 1, values -> List(12.6))",6,6.543859888393818
"Map(vectorType -> dense, length -> 1, values -> List(9.4))",6,5.522521592643786


In [0]:
experiment_id = run.info.experiment_id
runs = client.search_runs(experiment_id, order_by=["attributes.start_time desc"], max_results=1)
whiteMetricsDF = pd.DataFrame(runs[0].data.metrics, index = [0])
display(whiteMetricsDF)

r2,rmse
0.1627795241709205,0.8106369758700381


## Red Wine (Sulphates)

Second highest correlation with quality in red wine dataset.

In [0]:
# log single feature (Sulphates) linear regression run 
with mlflow.start_run(run_name = 'RED-LR-Single-Feature-Sulphates') as run:
  vecAssembler = VectorAssembler(inputCols = ['sulphates'], outputCol = 'features')
  # linear regression model
  lr = LinearRegression(featuresCol = 'features', labelCol = 'quality')
  # define the pipeline
  pipeline = Pipeline(stages = [vecAssembler, lr])
  pipelineModel = pipeline.fit(redTrainDF)
  
  # log the run parameters 
  mlflow.log_param('features', 'sulphates')
  mlflow.log_param('color', 'red')
  # log the model 
  mlflow.spark.log_model(pipelineModel, 'model')
  
  # evaluate the predictions 
  redPredDF = pipelineModel.transform(redTestDF)
  regressionEvaluator = RegressionEvaluator(predictionCol = 'prediction', labelCol = 'quality')
  rmse = regressionEvaluator.setMetricName('rmse').evaluate(redPredDF)
  r2 = regressionEvaluator.setMetricName('r2').evaluate(redPredDF)
  # log metrics 
  mlflow.log_metric('rmse', rmse)
  mlflow.log_metric('r2', r2)
  
  display(redPredDF.select('features', 'quality', 'prediction'))

features,quality,prediction
"Map(vectorType -> dense, length -> 1, values -> List(0.59))",6,5.553306077453898
"Map(vectorType -> dense, length -> 1, values -> List(0.77))",6,5.772169409980988
"Map(vectorType -> dense, length -> 1, values -> List(0.45))",5,5.383079041043938
"Map(vectorType -> dense, length -> 1, values -> List(0.63))",5,5.6019423735710285
"Map(vectorType -> dense, length -> 1, values -> List(0.6))",6,5.56546515148318
"Map(vectorType -> dense, length -> 1, values -> List(0.89))",7,5.918078298332381
"Map(vectorType -> dense, length -> 1, values -> List(0.64))",4,5.614101447600311
"Map(vectorType -> dense, length -> 1, values -> List(0.53))",6,5.4803516332782
"Map(vectorType -> dense, length -> 1, values -> List(0.81))",5,5.82080570609812
"Map(vectorType -> dense, length -> 1, values -> List(0.47))",5,5.407397189102504


In [0]:
experiment_id = run.info.experiment_id
runs = client.search_runs(experiment_id, order_by=["attributes.start_time desc"], max_results=1)
redMetricsDF = pd.DataFrame(runs[0].data.metrics, index = [0])
display(redMetricsDF)

r2,rmse
0.0548546086216144,0.7887680018439472


## White Wine (Sulphates)

In [0]:
# log single feature (sulphates) linear regression run 
with mlflow.start_run(run_name = 'WHITE-LR-Single-Feature-Sulphates') as run:
  vecAssembler = VectorAssembler(inputCols = ['sulphates'], outputCol = 'features')
  # linear regression model
  lr = LinearRegression(featuresCol = 'features', labelCol = 'quality')
  # define the pipeline
  pipeline = Pipeline(stages = [vecAssembler, lr])
  pipelineModel = pipeline.fit(whiteTrainDF)
  
  # log the run parameters 
  mlflow.log_param('features', 'sulphates')
  mlflow.log_param('color', 'white')
  # log the model 
  mlflow.spark.log_model(pipelineModel, 'model')
  
  # evaluate the predictions 
  whitePredDF = pipelineModel.transform(whiteTestDF)
  regressionEvaluator = RegressionEvaluator(predictionCol = 'prediction', labelCol = 'quality')
  rmse = regressionEvaluator.setMetricName('rmse').evaluate(whitePredDF)
  r2 = regressionEvaluator.setMetricName('r2').evaluate(whitePredDF)
  # log metrics 
  mlflow.log_metric('rmse', rmse)
  mlflow.log_metric('r2', r2)
  
  display(whitePredDF.select('features', 'quality', 'prediction'))

features,quality,prediction
"Map(vectorType -> dense, length -> 1, values -> List(0.35))",6,5.814456517824979
"Map(vectorType -> dense, length -> 1, values -> List(0.4))",6,5.836500699528692
"Map(vectorType -> dense, length -> 1, values -> List(0.48))",8,5.871771390254632
"Map(vectorType -> dense, length -> 1, values -> List(0.36))",6,5.818865354165721
"Map(vectorType -> dense, length -> 1, values -> List(0.37))",6,5.823274190506464
"Map(vectorType -> dense, length -> 1, values -> List(0.35))",6,5.814456517824979
"Map(vectorType -> dense, length -> 1, values -> List(0.5))",6,5.880589062936117
"Map(vectorType -> dense, length -> 1, values -> List(0.69))",6,5.964356953410225
"Map(vectorType -> dense, length -> 1, values -> List(0.35))",6,5.814456517824979
"Map(vectorType -> dense, length -> 1, values -> List(0.31))",6,5.796821172462009


In [0]:
experiment_id = run.info.experiment_id
runs = client.search_runs(experiment_id, order_by=["attributes.start_time desc"], max_results=1)
whiteMetricsDF = pd.DataFrame(runs[0].data.metrics, index = [0])
display(whiteMetricsDF)

r2,rmse
0.0013904339602293,0.8853283059841446


## Red Wine (pH)

In [0]:
# log single feature (pH) linear regression run 
with mlflow.start_run(run_name = 'RED-LR-Single-Feature-pH') as run:
  vecAssembler = VectorAssembler(inputCols = ['pH'], outputCol = 'features')
  # linear regression model
  lr = LinearRegression(featuresCol = 'features', labelCol = 'quality')
  # define the pipeline
  pipeline = Pipeline(stages = [vecAssembler, lr])
  pipelineModel = pipeline.fit(redTrainDF)
  
  # log the run parameters 
  mlflow.log_param('features', 'pH')
  mlflow.log_param('color', 'red')
  # log the model 
  mlflow.spark.log_model(pipelineModel, 'model')
  
  # evaluate the predictions 
  redPredDF = pipelineModel.transform(redTestDF)
  regressionEvaluator = RegressionEvaluator(predictionCol = 'prediction', labelCol = 'quality')
  rmse = regressionEvaluator.setMetricName('rmse').evaluate(redPredDF)
  r2 = regressionEvaluator.setMetricName('r2').evaluate(redPredDF)
  # log metrics 
  mlflow.log_metric('rmse', rmse)
  mlflow.log_metric('r2', r2)
  
  display(redPredDF.select('features', 'quality', 'prediction'))

features,quality,prediction
"Map(vectorType -> dense, length -> 1, values -> List(4.01))",6,5.512768048425997
"Map(vectorType -> dense, length -> 1, values -> List(3.56))",6,5.591717664522763
"Map(vectorType -> dense, length -> 1, values -> List(3.36))",5,5.626806382787992
"Map(vectorType -> dense, length -> 1, values -> List(3.57))",5,5.589963228609501
"Map(vectorType -> dense, length -> 1, values -> List(3.33))",6,5.632069690527777
"Map(vectorType -> dense, length -> 1, values -> List(3.58))",7,5.58820879269624
"Map(vectorType -> dense, length -> 1, values -> List(3.53))",4,5.596980972262547
"Map(vectorType -> dense, length -> 1, values -> List(3.4))",6,5.619788639134946
"Map(vectorType -> dense, length -> 1, values -> List(3.6))",5,5.584699920869717
"Map(vectorType -> dense, length -> 1, values -> List(3.4))",5,5.619788639134946


In [0]:
experiment_id = run.info.experiment_id
runs = client.search_runs(experiment_id, order_by=["attributes.start_time desc"], max_results=1)
redMetricsDF = pd.DataFrame(runs[0].data.metrics, index = [0])
display(redMetricsDF)

r2,rmse
0.0090250524533936,0.8076650894867797


## White Wine (pH)

Second highest correlation with quality in white wine dataset.

In [0]:
# log single feature (pH) linear regression run 
with mlflow.start_run(run_name = 'WHITE-LR-Single-Feature-pH') as run:
  vecAssembler = VectorAssembler(inputCols = ['pH'], outputCol = 'features')
  # linear regression model
  lr = LinearRegression(featuresCol = 'features', labelCol = 'quality')
  # define the pipeline
  pipeline = Pipeline(stages = [vecAssembler, lr])
  pipelineModel = pipeline.fit(whiteTrainDF)
  
  # log the run parameters 
  mlflow.log_param('features', 'pH')
  mlflow.log_param('color', 'white')
  # log the model 
  mlflow.spark.log_model(pipelineModel, 'model')
  
  # evaluate the predictions 
  whitePredDF = pipelineModel.transform(whiteTestDF)
  regressionEvaluator = RegressionEvaluator(predictionCol = 'prediction', labelCol = 'quality')
  rmse = regressionEvaluator.setMetricName('rmse').evaluate(whitePredDF)
  r2 = regressionEvaluator.setMetricName('r2').evaluate(whitePredDF)
  # log metrics 
  mlflow.log_metric('rmse', rmse)
  mlflow.log_metric('r2', r2)
  
  display(whitePredDF.select('features', 'quality', 'prediction'))

features,quality,prediction
"Map(vectorType -> dense, length -> 1, values -> List(3.27))",6,5.925143789194573
"Map(vectorType -> dense, length -> 1, values -> List(3.38))",6,5.990510008073503
"Map(vectorType -> dense, length -> 1, values -> List(3.56))",8,6.097472911693573
"Map(vectorType -> dense, length -> 1, values -> List(3.28))",6,5.931086172729021
"Map(vectorType -> dense, length -> 1, values -> List(3.35))",6,5.972682857470159
"Map(vectorType -> dense, length -> 1, values -> List(3.32))",6,5.954855706866814
"Map(vectorType -> dense, length -> 1, values -> List(3.34))",6,5.966740473935711
"Map(vectorType -> dense, length -> 1, values -> List(3.44))",6,6.026164309280194
"Map(vectorType -> dense, length -> 1, values -> List(3.24))",6,5.907316638591228
"Map(vectorType -> dense, length -> 1, values -> List(3.38))",6,5.990510008073503


In [0]:
experiment_id = run.info.experiment_id
runs = client.search_runs(experiment_id, order_by=["attributes.start_time desc"], max_results=1)
whiteMetricsDF = pd.DataFrame(runs[0].data.metrics, index = [0])
display(whiteMetricsDF)

r2,rmse
0.0090621708996575,0.8819210178867934


## Linear Regression - All Features

## Red Wine

In [0]:
with mlflow.start_run(run_name = 'RED-LR-All-Features') as run:
  rFormula = RFormula(formula = 'quality ~ .', featuresCol = 'features', labelCol = 'quality', handleInvalid = 'skip')
  lr = LinearRegression(labelCol = 'quality', featuresCol = 'features')
  
  # pipeline 
  pipeline = Pipeline(stages = [rFormula, lr])
  pipelineModel = pipeline.fit(redTrainDF)
  
  # log parameters 
  mlflow.log_param('features', 'all')
  mlflow.log_param('color', 'red')
  # log the model
  mlflow.spark.log_model(pipelineModel, 'model')
  
  # create and evaluate predictions 
  redPredDFAll = pipelineModel.transform(redTestDF)
  regressionEvaluator = RegressionEvaluator(predictionCol = 'prediction', labelCol = 'quality')
  rmse = regressionEvaluator.setMetricName('rmse').evaluate(redPredDFAll)
  r2 = regressionEvaluator.setMetricName('r2').evaluate(redPredDFAll)
  # log metrics 
  mlflow.log_metric('rmse', rmse)
  mlflow.log_metric('r2', r2)
  
  display(redPredDFAll.select('features', 'quality', 'prediction'))

features,quality,prediction
"Map(vectorType -> dense, length -> 11, values -> List(5.4, 0.74, 0.0, 1.2, 0.041, 16.0, 46.0, 0.99258, 4.01, 0.59, 12.5))",6,5.799852164667122
"Map(vectorType -> dense, length -> 11, values -> List(5.9, 0.61, 0.08, 2.1, 0.071, 16.0, 24.0, 0.99376, 3.56, 0.77, 11.1))",6,5.874464860345169
"Map(vectorType -> dense, length -> 11, values -> List(6.0, 0.5, 0.0, 1.4, 0.057, 15.0, 26.0, 0.99448, 3.36, 0.45, 9.5))",5,5.328606057177634
"Map(vectorType -> dense, length -> 11, values -> List(6.2, 0.64, 0.09, 2.5, 0.081, 15.0, 26.0, 0.99538, 3.57, 0.63, 12.0))",5,5.900042067312366
"Map(vectorType -> dense, length -> 11, values -> List(6.4, 0.56, 0.15, 1.8, 0.078, 17.0, 65.0, 0.99294, 3.33, 0.6, 10.5))",6,5.568492534637997
"Map(vectorType -> dense, length -> 11, values -> List(6.6, 0.815, 0.02, 2.7, 0.072, 17.0, 34.0, 0.9955, 3.58, 0.89, 12.3))",7,6.071215930290659
"Map(vectorType -> dense, length -> 11, values -> List(6.8, 0.91, 0.06, 2.0, 0.06, 4.0, 11.0, 0.99592, 3.53, 0.64, 10.9))",4,5.380965651640169
"Map(vectorType -> dense, length -> 11, values -> List(7.0, 0.36, 0.21, 2.4, 0.086, 24.0, 69.0, 0.99556, 3.4, 0.53, 10.1))",6,5.552201835941496
"Map(vectorType -> dense, length -> 11, values -> List(7.0, 0.685, 0.0, 1.9, 0.067, 40.0, 63.0, 0.9979, 3.6, 0.81, 9.9))",5,5.459960755815846
"Map(vectorType -> dense, length -> 11, values -> List(7.0, 0.78, 0.08, 2.0, 0.093, 10.0, 19.0, 0.9956, 3.4, 0.47, 10.0))",5,5.095899963535805


In [0]:
experiment_id = run.info.experiment_id
runs = client.search_runs(experiment_id, order_by=["attributes.start_time desc"], max_results=1)
redMetricsDF = pd.DataFrame(runs[0].data.metrics, index = [0])
display(redMetricsDF)

r2,rmse
0.3981479276382962,0.6294262539583688


## White Wine

In [0]:
with mlflow.start_run(run_name = 'WHITE-LR-All-Features') as run:
  rFormula = RFormula(formula = 'quality ~ .', featuresCol = 'features', labelCol = 'quality', handleInvalid = 'skip')
  lr = LinearRegression(labelCol = 'quality', featuresCol = 'features')
  
  # pipeline 
  pipeline = Pipeline(stages = [rFormula, lr])
  pipelineModel = pipeline.fit(whiteTrainDF)
  
  # log parameters 
  mlflow.log_param('features', 'all')
  mlflow.log_param('color', 'white')
  # log the model
  mlflow.spark.log_model(pipelineModel, 'model')
  
  # create and evaluate predictions 
  whitePredDFAll = pipelineModel.transform(whiteTestDF)
  regressionEvaluator = RegressionEvaluator(predictionCol = 'prediction', labelCol = 'quality')
  rmse = regressionEvaluator.setMetricName('rmse').evaluate(whitePredDFAll)
  r2 = regressionEvaluator.setMetricName('r2').evaluate(whitePredDFAll)
  # log metrics 
  mlflow.log_metric('rmse', rmse)
  mlflow.log_metric('r2', r2)
  
  display(whitePredDFAll.select('features', 'quality', 'prediction'))

features,quality,prediction
"Map(vectorType -> dense, length -> 11, values -> List(4.9, 0.47, 0.17, 1.9, 0.035, 60.0, 148.0, 0.98964, 3.27, 0.35, 11.5))",6,5.929938787065538
"Map(vectorType -> dense, length -> 11, values -> List(5.1, 0.35, 0.26, 6.8, 0.034, 36.0, 120.0, 0.99188, 3.38, 0.4, 11.5))",6,6.263919458944571
"Map(vectorType -> dense, length -> 11, values -> List(5.2, 0.3, 0.34, 1.5, 0.038, 18.0, 96.0, 0.98942, 3.56, 0.48, 13.0))",8,6.687563488037853
"Map(vectorType -> dense, length -> 11, values -> List(5.4, 0.29, 0.38, 1.2, 0.029, 31.0, 132.0, 0.98895, 3.28, 0.36, 12.4))",6,6.4462042700581605
"Map(vectorType -> dense, length -> 11, values -> List(5.6, 0.18, 0.27, 1.7, 0.03, 31.0, 103.0, 0.98892, 3.35, 0.37, 12.9))",6,6.852963284695676
"Map(vectorType -> dense, length -> 11, values -> List(5.6, 0.26, 0.18, 1.4, 0.034, 18.0, 135.0, 0.99174, 3.32, 0.35, 10.2))",6,5.675923980476597
"Map(vectorType -> dense, length -> 11, values -> List(5.7, 0.2, 0.3, 2.5, 0.046, 38.0, 125.0, 0.99276, 3.34, 0.5, 9.9))",6,5.8264370590817975
"Map(vectorType -> dense, length -> 11, values -> List(5.7, 0.28, 0.35, 1.2, 0.052, 39.0, 141.0, 0.99108, 3.44, 0.69, 11.3))",6,6.302038283858366
"Map(vectorType -> dense, length -> 11, values -> List(5.8, 0.22, 0.29, 1.3, 0.036, 25.0, 68.0, 0.98865, 3.24, 0.35, 12.6))",6,6.648074644212528
"Map(vectorType -> dense, length -> 11, values -> List(5.8, 0.23, 0.27, 1.8, 0.043, 24.0, 69.0, 0.9933, 3.38, 0.31, 9.4))",6,5.379019648120334


In [0]:
experiment_id = run.info.experiment_id
runs = client.search_runs(experiment_id, order_by=["attributes.start_time desc"], max_results=1)
whiteMetricsDF = pd.DataFrame(runs[0].data.metrics, index = [0])
display(whiteMetricsDF)

r2,rmse
0.2337080882736285,0.7755389415881734
