diff --git a/spark/jobs/current_job.py b/spark/jobs/current_job.py index 12c4c39..73e874e 100644 --- a/spark/jobs/current_job.py +++ b/spark/jobs/current_job.py @@ -9,7 +9,6 @@ from metrics.statistics import calculate_statistics_current from models.current_dataset import CurrentDataset from models.reference_dataset import ReferenceDataset -from utils.reference_regression import ReferenceMetricsRegressionService from utils.current_binary import CurrentMetricsService from utils.current_multiclass import CurrentMetricsMulticlassService from utils.models import JobStatus, ModelOut, ModelType @@ -57,9 +56,8 @@ def main( case ModelType.BINARY: metrics_service = CurrentMetricsService( spark_session=spark_session, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) statistics = calculate_statistics_current(current_dataset) data_quality = metrics_service.calculate_data_quality() @@ -80,25 +78,25 @@ def main( case ModelType.MULTI_CLASS: metrics_service = CurrentMetricsMulticlassService( spark_session=spark_session, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) statistics = calculate_statistics_current(current_dataset) data_quality = metrics_service.calculate_data_quality() + model_quality = metrics_service.calculate_model_quality() + drift = metrics_service.calculate_drift() complete_record["STATISTICS"] = statistics.model_dump_json( serialize_as_any=True ) complete_record["DATA_QUALITY"] = data_quality.model_dump_json( serialize_as_any=True ) - case ModelType.REGRESSION: - metrics_service = ReferenceMetricsRegressionService( - reference=reference_dataset + complete_record["MODEL_QUALITY"] = orjson.dumps(model_quality).decode( + "utf-8" ) + complete_record["DRIFT"] = orjson.dumps(drift).decode("utf-8") + case ModelType.REGRESSION: statistics = calculate_statistics_current(current_dataset) - data_quality = metrics_service.calculate_data_quality() - complete_record["STATISTICS"] = statistics.model_dump_json( serialize_as_any=True ) diff --git a/spark/jobs/utils/chi2.py b/spark/jobs/metrics/chi2.py similarity index 100% rename from spark/jobs/utils/chi2.py rename to spark/jobs/metrics/chi2.py diff --git a/spark/jobs/metrics/drift_calculator.py b/spark/jobs/metrics/drift_calculator.py new file mode 100644 index 0000000..a9e7bc9 --- /dev/null +++ b/spark/jobs/metrics/drift_calculator.py @@ -0,0 +1,79 @@ +from pyspark.sql import SparkSession + +from metrics.chi2 import Chi2Test +from metrics.ks import KolmogorovSmirnovTest +from models.current_dataset import CurrentDataset +from models.reference_dataset import ReferenceDataset + + +class DriftCalculator: + @staticmethod + def calculate_drift( + spark_session: SparkSession, + reference_dataset: ReferenceDataset, + current_dataset: CurrentDataset, + ): + drift_result = dict() + drift_result["feature_metrics"] = [] + + categorical_features = [ + categorical.name + for categorical in reference_dataset.model.get_categorical_features() + ] + chi2 = Chi2Test( + spark_session=spark_session, + reference_data=reference_dataset.reference, + current_data=current_dataset.current, + ) + + for column in categorical_features: + feature_dict_to_append = { + "feature_name": column, + "drift_calc": { + "type": "CHI2", + }, + } + if ( + reference_dataset.reference_count > 5 + and current_dataset.current_count > 5 + ): + result_tmp = chi2.test(column, column) + feature_dict_to_append["drift_calc"]["value"] = float( + result_tmp["pValue"] + ) + feature_dict_to_append["drift_calc"]["has_drift"] = bool( + result_tmp["pValue"] <= 0.05 + ) + else: + feature_dict_to_append["drift_calc"]["value"] = None + feature_dict_to_append["drift_calc"]["has_drift"] = False + drift_result["feature_metrics"].append(feature_dict_to_append) + + numerical_features = [ + numerical.name + for numerical in reference_dataset.model.get_numerical_features() + ] + ks = KolmogorovSmirnovTest( + reference_data=reference_dataset.reference, + current_data=current_dataset.current, + alpha=0.05, + phi=0.004, + ) + + for column in numerical_features: + feature_dict_to_append = { + "feature_name": column, + "drift_calc": { + "type": "KS", + }, + } + result_tmp = ks.test(column, column) + feature_dict_to_append["drift_calc"]["value"] = float( + result_tmp["ks_statistic"] + ) + feature_dict_to_append["drift_calc"]["has_drift"] = bool( + result_tmp["ks_statistic"] > result_tmp["critical_value"] + ) + drift_result["feature_metrics"].append(feature_dict_to_append) + + return drift_result diff --git a/spark/jobs/utils/ks.py b/spark/jobs/metrics/ks.py similarity index 100% rename from spark/jobs/utils/ks.py rename to spark/jobs/metrics/ks.py diff --git a/spark/jobs/models/current_dataset.py b/spark/jobs/models/current_dataset.py index 07154bf..4c988fc 100644 --- a/spark/jobs/models/current_dataset.py +++ b/spark/jobs/models/current_dataset.py @@ -1,8 +1,10 @@ from typing import List +from pyspark.ml.feature import StringIndexer from pyspark.sql import DataFrame from pyspark.sql.types import DoubleType, StructField, StructType +from models.reference_dataset import ReferenceDataset from utils.models import ModelOut, ModelType, ColumnDefinition from utils.spark import apply_schema_to_dataframe @@ -95,3 +97,47 @@ def get_all_variables(self) -> List[ColumnDefinition]: + [self.model.timestamp] + self.model.outputs.output ) + + def get_string_indexed_dataframe(self, reference: ReferenceDataset): + """ + Source: https://stackoverflow.com/questions/65911146/how-to-transform-multiple-categorical-columns-to-integers-maintaining-shared-val + Current dataset will be indexed with columns from both reference and current in order to have complete data + """ + predictions_df_current = self.current.select( + self.model.outputs.prediction.name + ).withColumnRenamed(self.model.outputs.prediction.name, "classes") + target_df_current = self.current.select( + self.model.target.name + ).withColumnRenamed(self.model.target.name, "classes") + predictions_df_reference = reference.reference.select( + self.model.outputs.prediction.name + ).withColumnRenamed(self.model.outputs.prediction.name, "classes") + target_df_reference = reference.reference.select( + self.model.target.name + ).withColumnRenamed(self.model.target.name, "classes") + prediction_target_df = ( + predictions_df_current.union(target_df_current) + .union(predictions_df_reference) + .union(target_df_reference) + ) + indexer = StringIndexer( + inputCol="classes", + outputCol="classes_index", + stringOrderType="alphabetAsc", + handleInvalid="skip", + ) + indexer_model = indexer.fit(prediction_target_df) + indexer_prediction = indexer_model.setInputCol( + self.model.outputs.prediction.name + ).setOutputCol(f"{self.model.outputs.prediction.name}-idx") + indexed_prediction_df = indexer_prediction.transform(self.current) + indexer_target = indexer_model.setInputCol(self.model.target.name).setOutputCol( + f"{self.model.target.name}-idx" + ) + indexed_target_df = indexer_target.transform(indexed_prediction_df) + + index_label_map = { + str(float(index)): str(label) + for index, label in enumerate(indexer_model.labelsArray[0]) + } + return index_label_map, indexed_target_df diff --git a/spark/jobs/reference_job.py b/spark/jobs/reference_job.py index b4e4ca7..627e4be 100644 --- a/spark/jobs/reference_job.py +++ b/spark/jobs/reference_job.py @@ -53,9 +53,7 @@ def main( match model.model_type: case ModelType.BINARY: - metrics_service = ReferenceMetricsService( - reference_dataset.reference, model=model - ) + metrics_service = ReferenceMetricsService(reference=reference_dataset) model_quality = metrics_service.calculate_model_quality() statistics = calculate_statistics_reference(reference_dataset) data_quality = metrics_service.calculate_data_quality() diff --git a/spark/jobs/utils/current_binary.py b/spark/jobs/utils/current_binary.py index 20939c4..82b1c7f 100644 --- a/spark/jobs/utils/current_binary.py +++ b/spark/jobs/utils/current_binary.py @@ -9,15 +9,17 @@ import pyspark.sql.functions as f from metrics.data_quality_calculator import DataQualityCalculator +from metrics.drift_calculator import DriftCalculator +from models.current_dataset import CurrentDataset from models.data_quality import ( NumericalFeatureMetrics, CategoricalFeatureMetrics, ClassMetrics, BinaryClassDataQuality, ) -from .models import ModelOut, Granularity -from .ks import KolmogorovSmirnovTest -from .chi2 import Chi2Test +from models.reference_dataset import ReferenceDataset +from .misc import create_time_format +from .models import Granularity class CurrentMetricsService: @@ -56,36 +58,34 @@ class CurrentMetricsService: def __init__( self, spark_session: SparkSession, - current: DataFrame, - reference: DataFrame, - model: ModelOut, + current: CurrentDataset, + reference: ReferenceDataset, ): self.spark_session = spark_session self.current = current self.reference = reference - self.current_count = self.current.count() - self.reference_count = self.reference.count() - self.model = model def calculate_data_quality_numerical(self) -> List[NumericalFeatureMetrics]: return DataQualityCalculator.calculate_combined_data_quality_numerical( - model=self.model, - current_dataframe=self.current, - current_count=self.current_count, - reference_dataframe=self.reference, + model=self.current.model, + current_dataframe=self.current.current, + current_count=self.current.current_count, + reference_dataframe=self.reference.reference, spark_session=self.spark_session, ) def calculate_data_quality_categorical(self) -> List[CategoricalFeatureMetrics]: return DataQualityCalculator.categorical_metrics( - model=self.model, dataframe=self.current, dataframe_count=self.current_count + model=self.current.model, + dataframe=self.current.current, + dataframe_count=self.current.current_count, ) def calculate_class_metrics(self) -> List[ClassMetrics]: metrics = DataQualityCalculator.class_metrics( - class_column=self.model.target.name, - dataframe=self.current, - dataframe_count=self.current_count, + class_column=self.current.model.target.name, + dataframe=self.current.current, + dataframe_count=self.current.current_count, ) # FIXME this should be avoided if we are sure that we have all classes in the file @@ -112,12 +112,12 @@ def calculate_class_metrics(self) -> List[ClassMetrics]: def calculate_data_quality(self) -> BinaryClassDataQuality: feature_metrics = [] - if self.model.get_numerical_features(): + if self.current.model.get_numerical_features(): feature_metrics.extend(self.calculate_data_quality_numerical()) - if self.model.get_categorical_features(): + if self.current.model.get_categorical_features(): feature_metrics.extend(self.calculate_data_quality_categorical()) return BinaryClassDataQuality( - n_observations=self.current_count, + n_observations=self.current.current_count, class_metrics=self.calculate_class_metrics(), feature_metrics=feature_metrics, ) @@ -125,14 +125,16 @@ def calculate_data_quality(self) -> BinaryClassDataQuality: # FIXME use pydantic struct like data quality def __calc_bc_metrics(self) -> dict[str, float]: return { - label: self.__evaluate_binary_classification(self.current, name) + label: self.__evaluate_binary_classification(self.current.current, name) for (name, label) in self.model_quality_binary_classificator.items() } # FIXME use pydantic struct like data quality def __calc_mc_metrics(self) -> dict[str, float]: return { - label: self.__evaluate_multi_class_classification(self.current, name) + label: self.__evaluate_multi_class_classification( + self.current.current, name + ) for (name, label) in self.model_quality_multiclass_classificator.items() } @@ -142,8 +144,8 @@ def __evaluate_binary_classification( try: return BinaryClassificationEvaluator( metricName=metric_name, - labelCol=self.model.target.name, - rawPredictionCol=self.model.outputs.prediction_proba.name, + labelCol=self.current.model.target.name, + rawPredictionCol=self.current.model.outputs.prediction_proba.name, ).evaluate(dataset) except Exception: return float("nan") @@ -156,37 +158,28 @@ def __evaluate_multi_class_classification( try: return MulticlassClassificationEvaluator( metricName=metric_name, - predictionCol=self.model.outputs.prediction.name, - labelCol=self.model.target.name, + predictionCol=self.current.model.outputs.prediction.name, + labelCol=self.current.model.target.name, metricLabel=1, ).evaluate(dataset) except Exception: return float("nan") def calculate_multiclass_model_quality_group_by_timestamp(self): - def create_time_format(granularity: Granularity): - match granularity: - case Granularity.HOUR: - return "yyyy-MM-dd HH" - case Granularity.DAY: - return "yyyy-MM-dd" - case Granularity.WEEK: - return "yyyy-MM-dd" - case Granularity.MONTH: - return "yyyy-MM" - - if self.model.granularity == Granularity.WEEK: - dataset_with_group = self.current.select( + if self.current.model.granularity == Granularity.WEEK: + dataset_with_group = self.current.current.select( [ - self.model.outputs.prediction.name, - self.model.target.name, + self.current.model.outputs.prediction.name, + self.current.model.target.name, f.date_format( f.to_timestamp( f.date_sub( f.next_day( f.date_format( - self.model.timestamp.name, - create_time_format(self.model.granularity), + self.current.model.timestamp.name, + create_time_format( + self.current.model.granularity + ), ), "sunday", ), @@ -198,15 +191,15 @@ def create_time_format(granularity: Granularity): ] ) else: - dataset_with_group = self.current.select( + dataset_with_group = self.current.current.select( [ - self.model.outputs.prediction.name, - self.model.target.name, + self.current.model.outputs.prediction.name, + self.current.model.target.name, f.date_format( f.to_timestamp( f.date_format( - self.model.timestamp.name, - create_time_format(self.model.granularity), + self.current.model.timestamp.name, + create_time_format(self.current.model.granularity), ) ), "yyyy-MM-dd HH:mm:ss", @@ -240,29 +233,20 @@ def create_time_format(granularity: Granularity): } def calculate_binary_class_model_quality_group_by_timestamp(self): - def create_time_format(granularity: Granularity): - match granularity: - case Granularity.HOUR: - return "yyyy-MM-dd HH" - case Granularity.DAY: - return "yyyy-MM-dd" - case Granularity.WEEK: - return "yyyy-MM-dd" - case Granularity.MONTH: - return "yyyy-MM" - - if self.model.granularity == Granularity.WEEK: - dataset_with_group = self.current.select( + if self.current.model.granularity == Granularity.WEEK: + dataset_with_group = self.current.current.select( [ - self.model.outputs.prediction_proba.name, - self.model.target.name, + self.current.model.outputs.prediction_proba.name, + self.current.model.target.name, f.date_format( f.to_timestamp( f.date_sub( f.next_day( f.date_format( - self.model.timestamp.name, - create_time_format(self.model.granularity), + self.current.model.timestamp.name, + create_time_format( + self.current.model.granularity + ), ), "sunday", ), @@ -274,15 +258,15 @@ def create_time_format(granularity: Granularity): ] ) else: - dataset_with_group = self.current.select( + dataset_with_group = self.current.current.select( [ - self.model.outputs.prediction_proba.name, - self.model.target.name, + self.current.model.outputs.prediction_proba.name, + self.current.model.target.name, f.date_format( f.to_timestamp( f.date_format( - self.model.timestamp.name, - create_time_format(self.model.granularity), + self.current.model.timestamp.name, + create_time_format(self.current.model.granularity), ) ), "yyyy-MM-dd HH:mm:ss", @@ -315,28 +299,33 @@ def create_time_format(granularity: Granularity): def calculate_confusion_matrix(self) -> dict[str, float]: prediction_and_label = ( - self.current.select( - [self.model.outputs.prediction.name, self.model.target.name] + self.current.current.select( + [ + self.current.model.outputs.prediction.name, + self.current.model.target.name, + ] ) - .withColumn(self.model.target.name, f.col(self.model.target.name)) - .orderBy(self.model.target.name) + .withColumn( + self.current.model.target.name, f.col(self.current.model.target.name) + ) + .orderBy(self.current.model.target.name) ) tp = prediction_and_label.filter( - (col(self.model.outputs.prediction.name) == 1) - & (col(self.model.target.name) == 1) + (col(self.current.model.outputs.prediction.name) == 1) + & (col(self.current.model.target.name) == 1) ).count() tn = prediction_and_label.filter( - (col(self.model.outputs.prediction.name) == 0) - & (col(self.model.target.name) == 0) + (col(self.current.model.outputs.prediction.name) == 0) + & (col(self.current.model.target.name) == 0) ).count() fp = prediction_and_label.filter( - (col(self.model.outputs.prediction.name) == 1) - & (col(self.model.target.name) == 0) + (col(self.current.model.outputs.prediction.name) == 1) + & (col(self.current.model.target.name) == 0) ).count() fn = prediction_and_label.filter( - (col(self.model.outputs.prediction.name) == 0) - & (col(self.model.target.name) == 1) + (col(self.current.model.outputs.prediction.name) == 0) + & (col(self.current.model.target.name) == 1) ).count() return { @@ -354,7 +343,7 @@ def calculate_model_quality_with_group_by_timestamp(self): self.calculate_multiclass_model_quality_group_by_timestamp() ) metrics["global_metrics"].update(self.calculate_confusion_matrix()) - if self.model.outputs.prediction_proba is not None: + if self.current.model.outputs.prediction_proba is not None: metrics["global_metrics"].update(self.__calc_bc_metrics()) binary_class_metrics = ( self.calculate_binary_class_model_quality_group_by_timestamp() @@ -363,62 +352,8 @@ def calculate_model_quality_with_group_by_timestamp(self): return metrics def calculate_drift(self): - drift_result = dict() - drift_result["feature_metrics"] = [] - - categorical_features = [ - categorical.name for categorical in self.model.get_categorical_features() - ] - chi2 = Chi2Test( + return DriftCalculator.calculate_drift( spark_session=self.spark_session, - reference_data=self.reference, - current_data=self.current, + reference_dataset=self.reference, + current_dataset=self.current, ) - - for column in categorical_features: - feature_dict_to_append = { - "feature_name": column, - "drift_calc": { - "type": "CHI2", - }, - } - if self.reference_count > 5 and self.current_count > 5: - result_tmp = chi2.test(column, column) - feature_dict_to_append["drift_calc"]["value"] = float( - result_tmp["pValue"] - ) - feature_dict_to_append["drift_calc"]["has_drift"] = bool( - result_tmp["pValue"] <= 0.05 - ) - else: - feature_dict_to_append["drift_calc"]["value"] = None - feature_dict_to_append["drift_calc"]["has_drift"] = False - drift_result["feature_metrics"].append(feature_dict_to_append) - - numerical_features = [ - numerical.name for numerical in self.model.get_numerical_features() - ] - ks = KolmogorovSmirnovTest( - reference_data=self.reference, - current_data=self.current, - alpha=0.05, - phi=0.004, - ) - - for column in numerical_features: - feature_dict_to_append = { - "feature_name": column, - "drift_calc": { - "type": "KS", - }, - } - result_tmp = ks.test(column, column) - feature_dict_to_append["drift_calc"]["value"] = float( - result_tmp["ks_statistic"] - ) - feature_dict_to_append["drift_calc"]["has_drift"] = bool( - result_tmp["ks_statistic"] > result_tmp["critical_value"] - ) - drift_result["feature_metrics"].append(feature_dict_to_append) - - return drift_result diff --git a/spark/jobs/utils/current_multiclass.py b/spark/jobs/utils/current_multiclass.py index ffd9071..0bddfa3 100644 --- a/spark/jobs/utils/current_multiclass.py +++ b/spark/jobs/utils/current_multiclass.py @@ -1,63 +1,231 @@ -from typing import List +from typing import List, Dict -from pyspark.sql import DataFrame, SparkSession +from pandas import DataFrame +from pyspark.ml.evaluation import MulticlassClassificationEvaluator +from pyspark.mllib.evaluation import MulticlassMetrics +from pyspark.sql import SparkSession +import pyspark.sql.functions as F from metrics.data_quality_calculator import DataQualityCalculator +from metrics.drift_calculator import DriftCalculator +from models.current_dataset import CurrentDataset from models.data_quality import ( NumericalFeatureMetrics, CategoricalFeatureMetrics, ClassMetrics, MultiClassDataQuality, ) -from utils.models import ModelOut +from models.reference_dataset import ReferenceDataset +from utils.misc import create_time_format +from utils.models import Granularity class CurrentMetricsMulticlassService: def __init__( self, spark_session: SparkSession, - current: DataFrame, - reference: DataFrame, - model: ModelOut, + current: CurrentDataset, + reference: ReferenceDataset, ): self.spark_session = spark_session self.current = current self.reference = reference - self.current_count = self.current.count() - self.reference_count = self.reference.count() - self.model = model + index_label_map, indexed_current = current.get_string_indexed_dataframe( + self.reference + ) + self.index_label_map = index_label_map + self.indexed_current = indexed_current + self.model_quality_multiclass_classificator_global = { + "f1": "f1", + "accuracy": "accuracy", + "weightedPrecision": "weighted_precision", + "weightedRecall": "weighted_recall", + "weightedTruePositiveRate": "weighted_true_positive_rate", + "weightedFalsePositiveRate": "weighted_false_positive_rate", + "weightedFMeasure": "weighted_f_measure", + } + self.model_quality_multiclass_classificator_by_label = { + "truePositiveRateByLabel": "true_positive_rate", + "falsePositiveRateByLabel": "false_positive_rate", + "precisionByLabel": "precision", + "recallByLabel": "recall", + "fMeasureByLabel": "f_measure", + } def calculate_data_quality_numerical(self) -> List[NumericalFeatureMetrics]: return DataQualityCalculator.calculate_combined_data_quality_numerical( - model=self.model, - current_dataframe=self.current, - current_count=self.current_count, - reference_dataframe=self.reference, + model=self.current.model, + current_dataframe=self.current.current, + current_count=self.current.current_count, + reference_dataframe=self.reference.reference, spark_session=self.spark_session, ) def calculate_data_quality_categorical(self) -> List[CategoricalFeatureMetrics]: return DataQualityCalculator.categorical_metrics( - model=self.model, - dataframe=self.reference, - dataframe_count=self.reference_count, + model=self.current.model, + dataframe=self.current.current, + dataframe_count=self.current.current_count, ) def calculate_class_metrics(self) -> List[ClassMetrics]: return DataQualityCalculator.class_metrics( - class_column=self.model.target.name, - dataframe=self.reference, - dataframe_count=self.reference_count, + class_column=self.current.model.target.name, + dataframe=self.current.current, + dataframe_count=self.current.current_count, ) + def calculate_multiclass_model_quality_group_by_timestamp(self): + if self.current.model.granularity == Granularity.WEEK: + dataset_with_group = self.indexed_current.select( + [ + f"{self.reference.model.outputs.prediction.name}-idx", + f"{self.current.model.target.name}-idx", + self.current.model.outputs.prediction.name, + self.current.model.target.name, + F.date_format( + F.to_timestamp( + F.date_sub( + F.next_day( + F.date_format( + self.current.model.timestamp.name, + create_time_format( + self.current.model.granularity + ), + ), + "sunday", + ), + 7, + ) + ), + "yyyy-MM-dd HH:mm:ss", + ).alias("time_group"), + ] + ) + else: + dataset_with_group = self.indexed_current.select( + [ + f"{self.reference.model.outputs.prediction.name}-idx", + f"{self.current.model.target.name}-idx", + self.current.model.outputs.prediction.name, + self.current.model.target.name, + F.date_format( + F.to_timestamp( + F.date_format( + self.current.model.timestamp.name, + create_time_format(self.current.model.granularity), + ) + ), + "yyyy-MM-dd HH:mm:ss", + ).alias("time_group"), + ] + ) + + dataset_with_group.show() + + list_of_time_group = ( + dataset_with_group.select("time_group") + .distinct() + .orderBy(F.col("time_group").asc()) + .rdd.flatMap(lambda x: x) + .collect() + ) + array_of_groups = [ + dataset_with_group.where(F.col("time_group") == x) + for x in list_of_time_group + ] + + return [ + { + "class_name": label, + "metrics": { + metric_label: self.__evaluate_multi_class_classification( + self.indexed_current, metric_name, float(index) + ) + for ( + metric_name, + metric_label, + ) in self.model_quality_multiclass_classificator_by_label.items() + }, + "grouped_metrics": { + metric_label: [ + { + "timestamp": group, + "value": self.__evaluate_multi_class_classification( + group_dataset, metric_name, float(index) + ), + } + for group, group_dataset in zip( + list_of_time_group, array_of_groups + ) + ] + for metric_name, metric_label in self.model_quality_multiclass_classificator_by_label.items() + }, + } + for index, label in self.index_label_map.items() + ] + + def __evaluate_multi_class_classification( + self, dataset: DataFrame, metric_name: str, class_index: float + ) -> float: + try: + return MulticlassClassificationEvaluator( + metricName=metric_name, + predictionCol=f"{self.current.model.outputs.prediction.name}-idx", + labelCol=f"{self.current.model.target.name}-idx", + metricLabel=class_index, + ).evaluate(dataset) + except Exception: + return float("nan") + + def __calc_multiclass_global_metrics(self) -> Dict: + return { + metric_label: self.__evaluate_multi_class_classification( + self.indexed_current, metric_name, 0.0 + ) + for ( + metric_name, + metric_label, + ) in self.model_quality_multiclass_classificator_global.items() + } + + def __calc_confusion_matrix(self): + prediction_and_labels = self.indexed_current.select( + *[ + f"{self.reference.model.outputs.prediction.name}-idx", + f"{self.reference.model.target.name}-idx", + ] + ).rdd + multiclass_metrics_calculator = MulticlassMetrics(prediction_and_labels) + return multiclass_metrics_calculator.confusionMatrix().toArray().tolist() + + def calculate_model_quality(self) -> Dict: + metrics_by_label = self.calculate_multiclass_model_quality_group_by_timestamp() + global_metrics = self.__calc_multiclass_global_metrics() + global_metrics["confusion_matrix"] = self.__calc_confusion_matrix() + metrics = { + "classes": list(self.index_label_map.values()), + "class_metrics": metrics_by_label, + "global_metrics": global_metrics, + } + + return metrics + def calculate_data_quality(self) -> MultiClassDataQuality: feature_metrics = [] - if self.model.get_numerical_features(): + if self.current.model.get_numerical_features(): feature_metrics.extend(self.calculate_data_quality_numerical()) - if self.model.get_categorical_features(): + if self.current.model.get_categorical_features(): feature_metrics.extend(self.calculate_data_quality_categorical()) return MultiClassDataQuality( - n_observations=self.reference_count, + n_observations=self.current.current_count, class_metrics=self.calculate_class_metrics(), feature_metrics=feature_metrics, ) + + def calculate_drift(self): + return DriftCalculator.calculate_drift( + spark_session=self.spark_session, + reference_dataset=self.reference, + current_dataset=self.current, + ) diff --git a/spark/jobs/utils/misc.py b/spark/jobs/utils/misc.py index 244010b..23c4dd2 100644 --- a/spark/jobs/utils/misc.py +++ b/spark/jobs/utils/misc.py @@ -1,6 +1,21 @@ +from utils.models import Granularity + + def split_dict(dictionary): cleaned_dict = dict() for k, v in dictionary.items(): feature, metric = tuple(k.rsplit("-", 1)) cleaned_dict.setdefault(feature, dict())[metric] = v return cleaned_dict + + +def create_time_format(granularity: Granularity): + match granularity: + case Granularity.HOUR: + return "yyyy-MM-dd HH" + case Granularity.DAY: + return "yyyy-MM-dd" + case Granularity.WEEK: + return "yyyy-MM-dd" + case Granularity.MONTH: + return "yyyy-MM" diff --git a/spark/jobs/utils/reference_binary.py b/spark/jobs/utils/reference_binary.py index 9592baa..6682085 100644 --- a/spark/jobs/utils/reference_binary.py +++ b/spark/jobs/utils/reference_binary.py @@ -15,7 +15,7 @@ ClassMetrics, BinaryClassDataQuality, ) -from .models import ModelOut +from models.reference_dataset import ReferenceDataset class ReferenceMetricsService: @@ -40,10 +40,8 @@ class ReferenceMetricsService: "fMeasureByLabel": "f_measure", } - def __init__(self, reference: DataFrame, model: ModelOut): - self.model = model + def __init__(self, reference: ReferenceDataset): self.reference = reference - self.reference_count = self.reference.count() def __evaluate_binary_classification( self, dataset: DataFrame, metric_name: str @@ -51,8 +49,8 @@ def __evaluate_binary_classification( try: return BinaryClassificationEvaluator( metricName=metric_name, - labelCol=self.model.target.name, - rawPredictionCol=self.model.outputs.prediction_proba.name, + labelCol=self.reference.model.target.name, + rawPredictionCol=self.reference.model.outputs.prediction_proba.name, ).evaluate(dataset) except Exception: return float("nan") @@ -65,8 +63,8 @@ def __evaluate_multi_class_classification( try: return MulticlassClassificationEvaluator( metricName=metric_name, - predictionCol=self.model.outputs.prediction.name, - labelCol=self.model.target.name, + predictionCol=self.reference.model.outputs.prediction.name, + labelCol=self.reference.model.target.name, metricLabel=1, ).evaluate(dataset) except Exception: @@ -75,14 +73,16 @@ def __evaluate_multi_class_classification( # FIXME use pydantic struct like data quality def __calc_bc_metrics(self) -> dict[str, float]: return { - label: self.__evaluate_binary_classification(self.reference, name) + label: self.__evaluate_binary_classification(self.reference.reference, name) for (name, label) in self.model_quality_binary_classificator.items() } # FIXME use pydantic struct like data quality def __calc_mc_metrics(self) -> dict[str, float]: return { - label: self.__evaluate_multi_class_classification(self.reference, name) + label: self.__evaluate_multi_class_classification( + self.reference.reference, name + ) for (name, label) in self.model_quality_multiclass_classificator.items() } @@ -90,7 +90,7 @@ def __calc_mc_metrics(self) -> dict[str, float]: def calculate_model_quality(self) -> dict[str, float]: metrics = self.__calc_mc_metrics() metrics.update(self.calculate_confusion_matrix()) - if self.model.outputs.prediction_proba is not None: + if self.reference.model.outputs.prediction_proba is not None: metrics.update(self.__calc_bc_metrics()) return metrics @@ -98,28 +98,34 @@ def calculate_model_quality(self) -> dict[str, float]: # FIXME use pydantic struct like data quality def calculate_confusion_matrix(self) -> dict[str, float]: prediction_and_label = ( - self.reference.select( - [self.model.outputs.prediction.name, self.model.target.name] + self.reference.reference.select( + [ + self.reference.model.outputs.prediction.name, + self.reference.model.target.name, + ] + ) + .withColumn( + self.reference.model.target.name, + f.col(self.reference.model.target.name), ) - .withColumn(self.model.target.name, f.col(self.model.target.name)) - .orderBy(self.model.target.name) + .orderBy(self.reference.model.target.name) ) tp = prediction_and_label.filter( - (col(self.model.outputs.prediction.name) == 1) - & (col(self.model.target.name) == 1) + (col(self.reference.model.outputs.prediction.name) == 1) + & (col(self.reference.model.target.name) == 1) ).count() tn = prediction_and_label.filter( - (col(self.model.outputs.prediction.name) == 0) - & (col(self.model.target.name) == 0) + (col(self.reference.model.outputs.prediction.name) == 0) + & (col(self.reference.model.target.name) == 0) ).count() fp = prediction_and_label.filter( - (col(self.model.outputs.prediction.name) == 1) - & (col(self.model.target.name) == 0) + (col(self.reference.model.outputs.prediction.name) == 1) + & (col(self.reference.model.target.name) == 0) ).count() fn = prediction_and_label.filter( - (col(self.model.outputs.prediction.name) == 0) - & (col(self.model.target.name) == 1) + (col(self.reference.model.outputs.prediction.name) == 0) + & (col(self.reference.model.target.name) == 1) ).count() return { @@ -131,23 +137,23 @@ def calculate_confusion_matrix(self) -> dict[str, float]: def calculate_data_quality_numerical(self) -> List[NumericalFeatureMetrics]: return DataQualityCalculator.numerical_metrics( - model=self.model, - dataframe=self.reference, - dataframe_count=self.reference_count, + model=self.reference.model, + dataframe=self.reference.reference, + dataframe_count=self.reference.reference_count, ) def calculate_data_quality_categorical(self) -> List[CategoricalFeatureMetrics]: return DataQualityCalculator.categorical_metrics( - model=self.model, - dataframe=self.reference, - dataframe_count=self.reference_count, + model=self.reference.model, + dataframe=self.reference.reference, + dataframe_count=self.reference.reference_count, ) def calculate_class_metrics(self) -> List[ClassMetrics]: metrics = DataQualityCalculator.class_metrics( - class_column=self.model.target.name, - dataframe=self.reference, - dataframe_count=self.reference_count, + class_column=self.reference.model.target.name, + dataframe=self.reference.reference, + dataframe_count=self.reference.reference_count, ) # FIXME this should be avoided if we are sure that we have all classes in the file @@ -174,12 +180,12 @@ def calculate_class_metrics(self) -> List[ClassMetrics]: def calculate_data_quality(self) -> BinaryClassDataQuality: feature_metrics = [] - if self.model.get_numerical_features(): + if self.reference.model.get_numerical_features(): feature_metrics.extend(self.calculate_data_quality_numerical()) - if self.model.get_categorical_features(): + if self.reference.model.get_categorical_features(): feature_metrics.extend(self.calculate_data_quality_categorical()) return BinaryClassDataQuality( - n_observations=self.reference_count, + n_observations=self.reference.reference_count, class_metrics=self.calculate_class_metrics(), feature_metrics=feature_metrics, ) diff --git a/spark/tests/binary_current_test.py b/spark/tests/binary_current_test.py index 282ce6d..cc39571 100644 --- a/spark/tests/binary_current_test.py +++ b/spark/tests/binary_current_test.py @@ -185,9 +185,8 @@ def test_calculation(spark_fixture, dataset): metrics_service = CurrentMetricsService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) stats = calculate_statistics_current(current_dataset) @@ -423,9 +422,8 @@ def test_calculation_current_joined(spark_fixture, current_joined): metrics_service = CurrentMetricsService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) stats = calculate_statistics_current(current_dataset) @@ -816,9 +814,8 @@ def test_calculation_complete(spark_fixture, complete_dataset): metrics_service = CurrentMetricsService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) stats = calculate_statistics_current(current_dataset) @@ -965,9 +962,8 @@ def test_calculation_easy_dataset(spark_fixture, easy_dataset): metrics_service = CurrentMetricsService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) stats = calculate_statistics_current(current_dataset) @@ -1114,9 +1110,8 @@ def test_calculation_dataset_cat_missing(spark_fixture, dataset_cat_missing): metrics_service = CurrentMetricsService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) stats = calculate_statistics_current(current_dataset) @@ -1278,9 +1273,8 @@ def test_calculation_dataset_with_datetime(spark_fixture, dataset_with_datetime) metrics_service = CurrentMetricsService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) stats = calculate_statistics_current(current_dataset) @@ -1442,9 +1436,8 @@ def test_calculation_easy_dataset_bucket_test(spark_fixture, easy_dataset_bucket metrics_service = CurrentMetricsService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) stats = calculate_statistics_current(current_dataset) @@ -1619,9 +1612,8 @@ def test_calculation_for_hour(spark_fixture, dataset_for_hour): metrics_service = CurrentMetricsService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) stats = calculate_statistics_current(current_dataset) @@ -1912,9 +1904,8 @@ def test_calculation_for_day(spark_fixture, dataset_for_day): metrics_service = CurrentMetricsService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) stats = calculate_statistics_current(current_dataset) @@ -2191,9 +2182,8 @@ def test_calculation_for_week(spark_fixture, dataset_for_week): metrics_service = CurrentMetricsService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) stats = calculate_statistics_current(current_dataset) @@ -2470,9 +2460,8 @@ def test_calculation_for_month(spark_fixture, dataset_for_month): metrics_service = CurrentMetricsService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) stats = calculate_statistics_current(current_dataset) diff --git a/spark/tests/binary_reference_test.py b/spark/tests/binary_reference_test.py index 06d7e64..31d0163 100644 --- a/spark/tests/binary_reference_test.py +++ b/spark/tests/binary_reference_test.py @@ -112,7 +112,7 @@ def test_calculation(spark_fixture, dataset): ) reference_dataset = ReferenceDataset(model=model, raw_dataframe=dataset) - metrics_service = ReferenceMetricsService(reference_dataset.reference, model=model) + metrics_service = ReferenceMetricsService(reference_dataset) stats = calculate_statistics_reference(reference_dataset) model_quality = metrics_service.calculate_model_quality() @@ -294,7 +294,7 @@ def test_calculation_reference_joined(spark_fixture, reference_joined): ) reference_dataset = ReferenceDataset(model=model, raw_dataframe=reference_joined) - metrics_service = ReferenceMetricsService(reference_dataset.reference, model=model) + metrics_service = ReferenceMetricsService(reference_dataset) stats = calculate_statistics_reference(reference_dataset) model_quality = metrics_service.calculate_model_quality() @@ -690,7 +690,7 @@ def test_calculation_complete(spark_fixture, complete_dataset): ) reference_dataset = ReferenceDataset(model=model, raw_dataframe=complete_dataset) - metrics_service = ReferenceMetricsService(reference_dataset.reference, model=model) + metrics_service = ReferenceMetricsService(reference_dataset) stats = calculate_statistics_reference(reference_dataset) model_quality = metrics_service.calculate_model_quality() @@ -845,7 +845,7 @@ def test_calculation_easy_dataset(spark_fixture, easy_dataset): ) reference_dataset = ReferenceDataset(model=model, raw_dataframe=easy_dataset) - metrics_service = ReferenceMetricsService(reference_dataset.reference, model=model) + metrics_service = ReferenceMetricsService(reference_dataset) stats = calculate_statistics_reference(reference_dataset) model_quality = metrics_service.calculate_model_quality() @@ -999,7 +999,7 @@ def test_calculation_dataset_cat_missing(spark_fixture, dataset_cat_missing): ) reference_dataset = ReferenceDataset(model=model, raw_dataframe=dataset_cat_missing) - metrics_service = ReferenceMetricsService(reference_dataset.reference, model=model) + metrics_service = ReferenceMetricsService(reference_dataset) stats = calculate_statistics_reference(reference_dataset) model_quality = metrics_service.calculate_model_quality() @@ -1176,7 +1176,7 @@ def test_calculation_dataset_with_datetime(spark_fixture, dataset_with_datetime) reference_dataset = ReferenceDataset( model=model, raw_dataframe=dataset_with_datetime ) - metrics_service = ReferenceMetricsService(reference_dataset.reference, model=model) + metrics_service = ReferenceMetricsService(reference_dataset) stats = calculate_statistics_reference(reference_dataset) model_quality = metrics_service.calculate_model_quality() @@ -1359,7 +1359,7 @@ def test_calculation_enhanced_data(spark_fixture, enhanced_data): ) reference_dataset = ReferenceDataset(model=model, raw_dataframe=enhanced_data) - metrics_service = ReferenceMetricsService(reference_dataset.reference, model=model) + metrics_service = ReferenceMetricsService(reference_dataset) stats = calculate_statistics_reference(reference_dataset) model_quality = metrics_service.calculate_model_quality() @@ -1899,7 +1899,7 @@ def test_calculation_dataset_bool_missing(spark_fixture, dataset_bool_missing): reference_dataset = ReferenceDataset( model=model, raw_dataframe=dataset_bool_missing ) - metrics_service = ReferenceMetricsService(reference_dataset.reference, model=model) + metrics_service = ReferenceMetricsService(reference_dataset) stats = calculate_statistics_reference(reference_dataset) model_quality = metrics_service.calculate_model_quality() diff --git a/spark/tests/binary_drift_test.py b/spark/tests/drift_calculator_test.py similarity index 93% rename from spark/tests/binary_drift_test.py rename to spark/tests/drift_calculator_test.py index 2d1f681..0d16407 100644 --- a/spark/tests/binary_drift_test.py +++ b/spark/tests/drift_calculator_test.py @@ -6,7 +6,6 @@ from jobs.models.current_dataset import CurrentDataset from jobs.models.reference_dataset import ReferenceDataset -from jobs.utils.current_binary import CurrentMetricsService from jobs.utils.models import ( ModelOut, ModelType, @@ -16,6 +15,7 @@ SupportedTypes, Granularity, ) +from metrics.drift_calculator import DriftCalculator @pytest.fixture() @@ -102,15 +102,13 @@ def test_drift(spark_fixture, drift_dataset): reference_dataset = ReferenceDataset( model=model, raw_dataframe=raw_reference_dataset ) - metrics_service = CurrentMetricsService( + + drift = DriftCalculator.calculate_drift( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current_dataset=current_dataset, + reference_dataset=reference_dataset, ) - drift = metrics_service.calculate_drift() - assert not deepdiff.DeepDiff( drift, { @@ -188,15 +186,12 @@ def test_drift_small(spark_fixture, drift_small_dataset): reference_dataset = ReferenceDataset( model=model, raw_dataframe=raw_reference_dataset ) - metrics_service = CurrentMetricsService( + drift = DriftCalculator.calculate_drift( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current_dataset=current_dataset, + reference_dataset=reference_dataset, ) - drift = metrics_service.calculate_drift() - assert not deepdiff.DeepDiff( drift, { @@ -266,15 +261,12 @@ def test_drift_boolean(spark_fixture, drift_dataset_bool): reference_dataset = ReferenceDataset( model=model, raw_dataframe=raw_reference_dataset ) - metrics_service = CurrentMetricsService( + drift = DriftCalculator.calculate_drift( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current_dataset=current_dataset, + reference_dataset=reference_dataset, ) - drift = metrics_service.calculate_drift() - assert not deepdiff.DeepDiff( drift, { @@ -352,15 +344,12 @@ def test_drift_bigger_file(spark_fixture, drift_dataset_bigger_file): reference_dataset = ReferenceDataset( model=model, raw_dataframe=raw_reference_dataset ) - metrics_service = CurrentMetricsService( + drift = DriftCalculator.calculate_drift( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current_dataset=current_dataset, + reference_dataset=reference_dataset, ) - drift = metrics_service.calculate_drift() - assert not deepdiff.DeepDiff( drift, { diff --git a/spark/tests/models/current_dataset_test.py b/spark/tests/models/current_dataset_test.py new file mode 100644 index 0000000..bdcfaa7 --- /dev/null +++ b/spark/tests/models/current_dataset_test.py @@ -0,0 +1,78 @@ +import datetime +import uuid + +import pytest + +from models.current_dataset import CurrentDataset +from models.reference_dataset import ReferenceDataset +from utils.models import ( + SupportedTypes, + OutputType, + ColumnDefinition, + Granularity, + ModelOut, + ModelType, + DataType, +) + + +@pytest.fixture() +def dataset_target_string(spark_fixture, test_data_dir): + yield ( + spark_fixture.read.csv( + f"{test_data_dir}/current/multiclass/dataset_target_string_missing_classes.csv", + header=True, + ), + spark_fixture.read.csv( + f"{test_data_dir}/reference/multiclass/dataset_target_string.csv", + header=True, + ), + ) + + +def test_indexer(spark_fixture, dataset_target_string): + output = OutputType( + prediction=ColumnDefinition(name="prediction", type=SupportedTypes.string), + prediction_proba=None, + output=[ColumnDefinition(name="prediction", type=SupportedTypes.string)], + ) + target = ColumnDefinition(name="target", type=SupportedTypes.string) + timestamp = ColumnDefinition(name="datetime", type=SupportedTypes.datetime) + granularity = Granularity.HOUR + features = [ + ColumnDefinition(name="cat1", type=SupportedTypes.string), + ColumnDefinition(name="cat2", type=SupportedTypes.string), + ColumnDefinition(name="num1", type=SupportedTypes.float), + ColumnDefinition(name="num2", type=SupportedTypes.float), + ] + model = ModelOut( + uuid=uuid.uuid4(), + name="model", + description="description", + model_type=ModelType.MULTI_CLASS, + data_type=DataType.TABULAR, + timestamp=timestamp, + granularity=granularity, + outputs=output, + target=target, + features=features, + frameworks="framework", + algorithm="algorithm", + created_at=str(datetime.datetime.now()), + updated_at=str(datetime.datetime.now()), + ) + + current_dataframe, reference_dataframe = dataset_target_string + current_dataset = CurrentDataset(model=model, raw_dataframe=current_dataframe) + reference_dataset = ReferenceDataset(model=model, raw_dataframe=reference_dataframe) + + index_label_map, indexed_dataset = current_dataset.get_string_indexed_dataframe( + reference_dataset + ) + + assert index_label_map == { + "0.0": "HEALTHY", + "1.0": "ORPHAN", + "2.0": "UNHEALTHY", + "3.0": "UNKNOWN", + } diff --git a/spark/tests/multiclass_current_test.py b/spark/tests/multiclass_current_test.py index 39e8ae0..3b26961 100644 --- a/spark/tests/multiclass_current_test.py +++ b/spark/tests/multiclass_current_test.py @@ -62,6 +62,20 @@ def dataset_perfect_classes(spark_fixture, test_data_dir): ) +@pytest.fixture() +def dataset_for_hour(spark_fixture, test_data_dir): + yield ( + spark_fixture.read.csv( + f"{test_data_dir}/reference/multiclass/dataset_for_hour.csv", + header=True, + ), + spark_fixture.read.csv( + f"{test_data_dir}/current/multiclass/dataset_for_hour.csv", + header=True, + ), + ) + + def test_calculation_dataset_target_int(spark_fixture, dataset_target_int): output = OutputType( prediction=ColumnDefinition(name="prediction", type=SupportedTypes.int), @@ -100,12 +114,12 @@ def test_calculation_dataset_target_int(spark_fixture, dataset_target_int): metrics_service = CurrentMetricsMulticlassService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) data_quality = metrics_service.calculate_data_quality() + model_quality = metrics_service.calculate_model_quality() stats = calculate_statistics_current(current_dataset) @@ -221,6 +235,160 @@ def test_calculation_dataset_target_int(spark_fixture, dataset_target_int): significant_digits=6, ) + assert not deepdiff.DeepDiff( + model_quality, + { + "classes": ["0", "1", "2", "3"], + "class_metrics": [ + { + "class_name": "0", + "metrics": { + "true_positive_rate": 0.6666666666666666, + "false_positive_rate": 0.0, + "precision": 1.0, + "recall": 0.6666666666666666, + "f_measure": 0.8, + }, + "grouped_metrics": { + "true_positive_rate": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.6666666666666666, + } + ], + "false_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "recall": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.6666666666666666, + } + ], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.8} + ], + }, + }, + { + "class_name": "1", + "metrics": { + "true_positive_rate": 1.0, + "false_positive_rate": 0.14285714285714285, + "precision": 0.75, + "recall": 1.0, + "f_measure": 0.8571428571428571, + }, + "grouped_metrics": { + "true_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "false_positive_rate": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.14285714285714285, + } + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.75} + ], + "recall": [{"timestamp": "2024-06-16 00:00:00", "value": 1.0}], + "f_measure": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.8571428571428571, + } + ], + }, + }, + { + "class_name": "2", + "metrics": { + "true_positive_rate": 0.3333333333333333, + "false_positive_rate": 0.14285714285714285, + "precision": 0.5, + "recall": 0.3333333333333333, + "f_measure": 0.4, + }, + "grouped_metrics": { + "true_positive_rate": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.3333333333333333, + } + ], + "false_positive_rate": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.14285714285714285, + } + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.5} + ], + "recall": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.3333333333333333, + } + ], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.4} + ], + }, + }, + { + "class_name": "3", + "metrics": { + "true_positive_rate": 0.0, + "false_positive_rate": 0.2222222222222222, + "precision": 0.0, + "recall": 0.0, + "f_measure": 0.0, + }, + "grouped_metrics": { + "true_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + "false_positive_rate": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.2222222222222222, + } + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + "recall": [{"timestamp": "2024-06-16 00:00:00", "value": 0.0}], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + }, + }, + ], + "global_metrics": { + "f1": 0.6171428571428572, + "accuracy": 0.6, + "weighted_precision": 0.675, + "weighted_recall": 0.6000000000000001, + "weighted_true_positive_rate": 0.6000000000000001, + "weighted_false_positive_rate": 0.10793650793650794, + "weighted_f_measure": 0.6171428571428572, + "confusion_matrix": [ + [2.0, 0.0, 1.0, 0.0], + [0.0, 3.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 2.0], + [0.0, 1.0, 0.0, 0.0], + ], + }, + }, + ignore_order=True, + significant_digits=6, + ) + def test_calculation_dataset_target_string(spark_fixture, dataset_target_string): output = OutputType( @@ -260,13 +428,12 @@ def test_calculation_dataset_target_string(spark_fixture, dataset_target_string) metrics_service = CurrentMetricsMulticlassService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) data_quality = metrics_service.calculate_data_quality() - + model_quality = metrics_service.calculate_model_quality() stats = calculate_statistics_current(current_dataset) assert stats.model_dump(serialize_as_any=True) == my_approx( @@ -381,6 +548,160 @@ def test_calculation_dataset_target_string(spark_fixture, dataset_target_string) significant_digits=6, ) + assert not deepdiff.DeepDiff( + model_quality, + { + "classes": ["HEALTHY", "ORPHAN", "UNHEALTHY", "UNKNOWN"], + "class_metrics": [ + { + "class_name": "HEALTHY", + "metrics": { + "true_positive_rate": 1.0, + "false_positive_rate": 0.14285714285714285, + "precision": 0.75, + "recall": 1.0, + "f_measure": 0.8571428571428571, + }, + "grouped_metrics": { + "true_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "false_positive_rate": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.14285714285714285, + } + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.75} + ], + "recall": [{"timestamp": "2024-06-16 00:00:00", "value": 1.0}], + "f_measure": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.8571428571428571, + } + ], + }, + }, + { + "class_name": "ORPHAN", + "metrics": { + "true_positive_rate": 0.0, + "false_positive_rate": 0.2222222222222222, + "precision": 0.0, + "recall": 0.0, + "f_measure": 0.0, + }, + "grouped_metrics": { + "true_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + "false_positive_rate": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.2222222222222222, + } + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + "recall": [{"timestamp": "2024-06-16 00:00:00", "value": 0.0}], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + }, + }, + { + "class_name": "UNHEALTHY", + "metrics": { + "true_positive_rate": 0.6666666666666666, + "false_positive_rate": 0.0, + "precision": 1.0, + "recall": 0.6666666666666666, + "f_measure": 0.8, + }, + "grouped_metrics": { + "true_positive_rate": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.6666666666666666, + } + ], + "false_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "recall": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.6666666666666666, + } + ], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.8} + ], + }, + }, + { + "class_name": "UNKNOWN", + "metrics": { + "true_positive_rate": 0.3333333333333333, + "false_positive_rate": 0.14285714285714285, + "precision": 0.5, + "recall": 0.3333333333333333, + "f_measure": 0.4, + }, + "grouped_metrics": { + "true_positive_rate": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.3333333333333333, + } + ], + "false_positive_rate": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.14285714285714285, + } + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.5} + ], + "recall": [ + { + "timestamp": "2024-06-16 00:00:00", + "value": 0.3333333333333333, + } + ], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.4} + ], + }, + }, + ], + "global_metrics": { + "f1": 0.6171428571428572, + "accuracy": 0.6, + "weighted_precision": 0.6749999999999999, + "weighted_recall": 0.6000000000000001, + "weighted_true_positive_rate": 0.6000000000000001, + "weighted_false_positive_rate": 0.10793650793650793, + "weighted_f_measure": 0.6171428571428572, + "confusion_matrix": [ + [3.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 2.0, 1.0], + [0.0, 2.0, 0.0, 1.0], + ], + }, + }, + ignore_order=True, + significant_digits=6, + ) + def test_calculation_dataset_perfect_classes(spark_fixture, dataset_perfect_classes): output = OutputType( @@ -420,13 +741,12 @@ def test_calculation_dataset_perfect_classes(spark_fixture, dataset_perfect_clas metrics_service = CurrentMetricsMulticlassService( spark_session=spark_fixture, - current=current_dataset.current, - reference=reference_dataset.reference, - model=model, + current=current_dataset, + reference=reference_dataset, ) data_quality = metrics_service.calculate_data_quality() - + model_quality = metrics_service.calculate_model_quality() stats = calculate_statistics_current(current_dataset) assert stats.model_dump(serialize_as_any=True) == my_approx( @@ -540,3 +860,457 @@ def test_calculation_dataset_perfect_classes(spark_fixture, dataset_perfect_clas ignore_order=True, significant_digits=6, ) + + assert not deepdiff.DeepDiff( + model_quality, + { + "classes": ["HEALTHY", "ORPHAN", "UNHEALTHY", "UNKNOWN"], + "class_metrics": [ + { + "class_name": "HEALTHY", + "metrics": { + "true_positive_rate": 1.0, + "false_positive_rate": 0.0, + "precision": 1.0, + "recall": 1.0, + "f_measure": 1.0, + }, + "grouped_metrics": { + "true_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "false_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "recall": [{"timestamp": "2024-06-16 00:00:00", "value": 1.0}], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + }, + }, + { + "class_name": "ORPHAN", + "metrics": { + "true_positive_rate": 1.0, + "false_positive_rate": 0.0, + "precision": 1.0, + "recall": 1.0, + "f_measure": 1.0, + }, + "grouped_metrics": { + "true_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "false_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "recall": [{"timestamp": "2024-06-16 00:00:00", "value": 1.0}], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + }, + }, + { + "class_name": "UNHEALTHY", + "metrics": { + "true_positive_rate": 1.0, + "false_positive_rate": 0.0, + "precision": 1.0, + "recall": 1.0, + "f_measure": 1.0, + }, + "grouped_metrics": { + "true_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "false_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "recall": [{"timestamp": "2024-06-16 00:00:00", "value": 1.0}], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + }, + }, + { + "class_name": "UNKNOWN", + "metrics": { + "true_positive_rate": 1.0, + "false_positive_rate": 0.0, + "precision": 1.0, + "recall": 1.0, + "f_measure": 1.0, + }, + "grouped_metrics": { + "true_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "false_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0} + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + "recall": [{"timestamp": "2024-06-16 00:00:00", "value": 1.0}], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0} + ], + }, + }, + ], + "global_metrics": { + "f1": 1.0, + "accuracy": 1.0, + "weighted_precision": 1.0, + "weighted_recall": 1.0, + "weighted_true_positive_rate": 1.0, + "weighted_false_positive_rate": 0.0, + "weighted_f_measure": 1.0, + "confusion_matrix": [ + [4.0, 0.0, 0.0, 0.0], + [0.0, 2.0, 0.0, 0.0], + [0.0, 0.0, 2.0, 0.0], + [0.0, 0.0, 0.0, 2.0], + ], + }, + }, + ignore_order=True, + significant_digits=6, + ) + + +def test_calculation_dataset_for_hour(spark_fixture, dataset_for_hour): + output = OutputType( + prediction=ColumnDefinition(name="prediction", type=SupportedTypes.string), + prediction_proba=None, + output=[ColumnDefinition(name="prediction", type=SupportedTypes.string)], + ) + target = ColumnDefinition(name="target", type=SupportedTypes.string) + timestamp = ColumnDefinition(name="datetime", type=SupportedTypes.datetime) + granularity = Granularity.HOUR + features = [ + ColumnDefinition(name="cat1", type=SupportedTypes.string), + ColumnDefinition(name="cat2", type=SupportedTypes.string), + ColumnDefinition(name="num1", type=SupportedTypes.float), + ColumnDefinition(name="num2", type=SupportedTypes.float), + ] + model = ModelOut( + uuid=uuid.uuid4(), + name="model", + description="description", + model_type=ModelType.MULTI_CLASS, + data_type=DataType.TABULAR, + timestamp=timestamp, + granularity=granularity, + outputs=output, + target=target, + features=features, + frameworks="framework", + algorithm="algorithm", + created_at=str(datetime.datetime.now()), + updated_at=str(datetime.datetime.now()), + ) + + current_dataframe, reference_dataframe = dataset_for_hour + current_dataset = CurrentDataset(model=model, raw_dataframe=current_dataframe) + reference_dataset = ReferenceDataset(model=model, raw_dataframe=reference_dataframe) + + metrics_service = CurrentMetricsMulticlassService( + spark_session=spark_fixture, + current=current_dataset, + reference=reference_dataset, + ) + + data_quality = metrics_service.calculate_data_quality() + + model_quality = metrics_service.calculate_model_quality() + + stats = calculate_statistics_current(current_dataset) + + assert stats.model_dump(serialize_as_any=True) == my_approx( + { + "categorical": 4, + "datetime": 1, + "duplicate_rows": 1, + "duplicate_rows_perc": 10.0, + "missing_cells": 3, + "missing_cells_perc": 4.285714285714286, + "n_observations": 10, + "n_variables": 7, + "numeric": 2, + } + ) + + assert not deepdiff.DeepDiff( + data_quality.model_dump(serialize_as_any=True, exclude_none=True), + { + "n_observations": 10, + "class_metrics": [ + {"name": "DOG", "count": 2, "percentage": 20.0}, + {"name": "COW", "count": 3, "percentage": 30.0}, + {"name": "CAT", "count": 5, "percentage": 50.0}, + ], + "feature_metrics": [ + { + "feature_name": "num1", + "type": "numerical", + "missing_value": {"count": 1, "percentage": 10.0}, + "mean": 1.1666666666666667, + "std": 0.75, + "min": 0.5, + "max": 3.0, + "median_metrics": {"perc_25": 1.0, "median": 1.0, "perc_75": 1.0}, + "class_median_metrics": [], + "histogram": { + "buckets": [ + 0.5, + 0.75, + 1.0, + 1.25, + 1.5, + 1.75, + 2.0, + 2.25, + 2.5, + 2.75, + 3.0, + ], + "reference_values": [2, 0, 5, 0, 1, 0, 0, 0, 0, 1], + "current_values": [2, 0, 5, 0, 1, 0, 0, 0, 0, 1], + }, + }, + { + "feature_name": "num2", + "type": "numerical", + "missing_value": {"count": 2, "percentage": 20.0}, + "mean": 277.675, + "std": 201.88635947695215, + "min": 1.4, + "max": 499.0, + "median_metrics": { + "perc_25": 117.25, + "median": 250.0, + "perc_75": 499.0, + }, + "class_median_metrics": [], + "histogram": { + "buckets": [ + 1.4, + 51.160000000000004, + 100.92000000000002, + 150.68000000000004, + 200.44000000000003, + 250.20000000000002, + 299.96000000000004, + 349.72, + 399.48, + 449.24, + 499.0, + ], + "reference_values": [1, 1, 1, 1, 0, 0, 1, 0, 0, 3], + "current_values": [1, 1, 1, 1, 0, 0, 1, 0, 0, 3], + }, + }, + { + "feature_name": "cat1", + "type": "categorical", + "missing_value": {"count": 0, "percentage": 0.0}, + "category_frequency": [ + {"name": "B", "count": 4, "frequency": 0.4}, + {"name": "C", "count": 1, "frequency": 0.1}, + {"name": "A", "count": 5, "frequency": 0.5}, + ], + "distinct_value": 3, + }, + { + "feature_name": "cat2", + "type": "categorical", + "missing_value": {"count": 0, "percentage": 0.0}, + "category_frequency": [ + {"name": "Y", "count": 1, "frequency": 0.1}, + {"name": "X", "count": 9, "frequency": 0.9}, + ], + "distinct_value": 2, + }, + ], + }, + ignore_order=True, + significant_digits=6, + ) + + assert not deepdiff.DeepDiff( + model_quality, + { + "classes": ["CAT", "COW", "DOG"], + "class_metrics": [ + { + "class_name": "CAT", + "metrics": { + "true_positive_rate": 0.6, + "false_positive_rate": 0.2, + "precision": 0.75, + "recall": 0.6, + "f_measure": 0.6666666666666665, + }, + "grouped_metrics": { + "true_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 01:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 02:00:00", "value": 0.0}, + {"timestamp": "2024-06-16 03:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 04:00:00", "value": 1.0}, + ], + "false_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0}, + {"timestamp": "2024-06-16 01:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 02:00:00", "value": 0.0}, + {"timestamp": "2024-06-16 03:00:00", "value": 0.0}, + {"timestamp": "2024-06-16 04:00:00", "value": float("nan")}, + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 01:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 02:00:00", "value": 0.0}, + {"timestamp": "2024-06-16 03:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 04:00:00", "value": 1.0}, + ], + "recall": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 01:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 02:00:00", "value": 0.0}, + {"timestamp": "2024-06-16 03:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 04:00:00", "value": 1.0}, + ], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 01:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 02:00:00", "value": 0.0}, + {"timestamp": "2024-06-16 03:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 04:00:00", "value": 1.0}, + ], + }, + }, + { + "class_name": "COW", + "metrics": { + "true_positive_rate": 0.6666666666666666, + "false_positive_rate": 0.14285714285714285, + "precision": 0.6666666666666666, + "recall": 0.6666666666666666, + "f_measure": 0.6666666666666666, + }, + "grouped_metrics": { + "true_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 01:00:00", "value": 0.5}, + {"timestamp": "2024-06-16 02:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 03:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 04:00:00", "value": float("nan")}, + ], + "false_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 01:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 02:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 03:00:00", "value": 0.0}, + {"timestamp": "2024-06-16 04:00:00", "value": float("nan")}, + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 01:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 02:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 03:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 04:00:00", "value": float("nan")}, + ], + "recall": [ + {"timestamp": "2024-06-16 00:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 01:00:00", "value": 0.5}, + {"timestamp": "2024-06-16 02:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 03:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 04:00:00", "value": float("nan")}, + ], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": float("nan")}, + { + "timestamp": "2024-06-16 01:00:00", + "value": 0.6666666666666666, + }, + {"timestamp": "2024-06-16 02:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 03:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 04:00:00", "value": float("nan")}, + ], + }, + }, + { + "class_name": "DOG", + "metrics": { + "true_positive_rate": 1.0, + "false_positive_rate": 0.125, + "precision": 0.6666666666666666, + "recall": 1.0, + "f_measure": 0.8, + }, + "grouped_metrics": { + "true_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 01:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 02:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 03:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 04:00:00", "value": float("nan")}, + ], + "false_positive_rate": [ + {"timestamp": "2024-06-16 00:00:00", "value": 0.0}, + {"timestamp": "2024-06-16 01:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 02:00:00", "value": 0.5}, + {"timestamp": "2024-06-16 03:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 04:00:00", "value": float("nan")}, + ], + "precision": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 01:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 02:00:00", "value": 0.5}, + {"timestamp": "2024-06-16 03:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 04:00:00", "value": float("nan")}, + ], + "recall": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 01:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 02:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 03:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 04:00:00", "value": float("nan")}, + ], + "f_measure": [ + {"timestamp": "2024-06-16 00:00:00", "value": 1.0}, + {"timestamp": "2024-06-16 01:00:00", "value": float("nan")}, + { + "timestamp": "2024-06-16 02:00:00", + "value": 0.6666666666666666, + }, + {"timestamp": "2024-06-16 03:00:00", "value": float("nan")}, + {"timestamp": "2024-06-16 04:00:00", "value": float("nan")}, + ], + }, + }, + ], + "global_metrics": { + "f1": 0.6933333333333332, + "accuracy": 0.7, + "weighted_precision": 0.7083333333333334, + "weighted_recall": 0.7, + "weighted_true_positive_rate": 0.7, + "weighted_false_positive_rate": 0.16785714285714287, + "weighted_f_measure": 0.6933333333333332, + "confusion_matrix": [[3.0, 1.0, 1.0], [1.0, 2.0, 0.0], [0.0, 0.0, 2.0]], + }, + }, + ignore_order=True, + significant_digits=6, + ) diff --git a/spark/tests/resources/current/complete_dataset.csv b/spark/tests/resources/current/complete_dataset.csv index c187969..8b2baad 100644 --- a/spark/tests/resources/current/complete_dataset.csv +++ b/spark/tests/resources/current/complete_dataset.csv @@ -1,8 +1,8 @@ id,cat1,cat2,num1,num2,prediction,prediction_proba,target,datetime -1,A,A,1,100,1,1,1,2024-06-16 00:01:00-05:00 -2,B,B,1,100,1,1,1,2024-06-16 00:02:00-05:00 -3,C,C,1,100,1,1,1,2024-06-16 00:03:00-05:00 -4,D,D,1,100,1,1,1,2024-06-16 00:04:00-05:00 -5,E,E,1,100,1,1,1,2024-06-16 00:05:00-05:00 -6,F,F,1,100,1,1,1,2024-06-16 00:06:00-05:00 -7,G,G,1,100,1,1,1,2024-06-16 00:07:00-05:00 \ No newline at end of file +1,A,A,1,100,1,1,1,2024-06-16 00:01:00 +2,B,B,1,100,1,1,1,2024-06-16 00:02:00 +3,C,C,1,100,1,1,1,2024-06-16 00:03:00 +4,D,D,1,100,1,1,1,2024-06-16 00:04:00 +5,E,E,1,100,1,1,1,2024-06-16 00:05:00 +6,F,F,1,100,1,1,1,2024-06-16 00:06:00 +7,G,G,1,100,1,1,1,2024-06-16 00:07:00 \ No newline at end of file diff --git a/spark/tests/resources/current/current_joined.csv b/spark/tests/resources/current/current_joined.csv index bd02f0e..2e6abfd 100644 --- a/spark/tests/resources/current/current_joined.csv +++ b/spark/tests/resources/current/current_joined.csv @@ -1,239 +1,239 @@ age,chest_pain_type,resting_blood_pressure,cholesterol,fasting_blood_sugar,resting_ecg,max_heart_rate_achieved,exercise_induced_angina,st_depression,st_slope,prediction,prediction_proba,target,datetime,sex -61,4,160,0,1,1,145,0,1.0,2,1,0.94,1,2024-06-16 06:30:00-05:00,M -54,3,120,237,0,0,150,1,1.5,2,0,0.54,1,2024-06-16 04:07:00-05:00,M -55,2,122,320,0,0,155,0,0.0,1,0,0.97,0,2024-06-16 04:20:00-05:00,F -56,4,155,342,1,0,150,1,3.0,2,1,0.97,1,2024-06-16 02:35:00-05:00,M -60,4,150,258,0,2,157,0,2.6,2,1,0.85,1,2024-06-16 16:24:00-05:00,F -56,4,125,0,1,0,103,1,1.0,2,1,0.99,1,2024-06-16 06:53:00-05:00,M -43,3,130,315,0,0,162,0,1.9,1,0,0.97,0,2024-06-16 13:22:00-05:00,M -54,4,150,365,0,1,134,0,1.0,1,0,0.61,0,2024-06-16 00:58:00-05:00,M -46,3,142,177,0,2,160,1,1.4,3,0,0.82,0,2024-06-16 12:32:00-05:00,F -52,3,172,199,1,0,162,0,0.5,1,0,0.99,0,2024-06-16 15:01:00-05:00,M -60,3,120,178,1,0,96,0,0.0,1,0,0.98,0,2024-06-16 18:51:00-05:00,F -50,3,140,233,0,0,163,0,0.6,2,1,0.83,1,2024-06-16 12:39:00-05:00,M -64,3,140,313,0,0,133,0,0.2,1,0,0.94,0,2024-06-16 11:55:00-05:00,F -68,3,120,211,0,2,115,0,1.5,2,0,0.61,0,2024-06-16 13:10:00-05:00,F -50,4,150,243,0,2,128,0,2.6,2,1,0.91,1,2024-06-16 11:46:00-05:00,M -57,2,140,265,0,1,145,1,1.0,2,1,0.71,1,2024-06-16 01:10:00-05:00,M -61,4,120,282,0,1,135,1,4.0,3,1,0.91,1,2024-06-16 08:42:00-05:00,M -60,4,140,293,0,2,170,0,1.2,2,1,0.82,1,2024-06-16 17:25:00-05:00,M -54,2,192,283,0,2,195,0,0.0,1,0,0.93,1,2024-06-16 12:41:00-05:00,M -65,4,170,263,1,0,112,1,2.0,2,1,0.92,1,2024-06-16 01:26:00-05:00,M -70,3,160,269,0,0,112,1,2.9,2,1,0.93,1,2024-06-16 17:37:00-05:00,M -47,4,110,275,0,2,118,1,1.0,2,1,0.95,1,2024-06-16 18:54:00-05:00,M -51,3,135,160,0,0,150,0,2.0,2,1,0.58,1,2024-06-16 04:37:00-05:00,M -56,4,115,0,1,1,82,0,-1.0,1,1,0.67,1,2024-06-16 05:32:00-05:00,M -54,4,125,216,0,0,140,0,0.0,2,1,0.79,1,2024-06-16 03:30:00-05:00,M -52,4,160,331,0,0,94,1,2.5,2,1,0.99,1,2024-06-16 02:20:00-05:00,M -56,1,120,193,0,2,162,0,1.9,2,0,0.86,0,2024-06-16 18:22:00-05:00,M -58,4,128,259,0,2,130,1,3.0,2,1,0.97,1,2024-06-16 18:13:00-05:00,M -59,4,174,249,0,0,143,1,0.0,2,1,0.88,1,2024-06-16 11:15:00-05:00,F -74,1,145,216,1,0,116,1,1.8,2,1,0.69,1,2024-06-16 09:14:00-05:00,M -43,4,140,0,0,1,140,1,0.5,1,1,0.57,1,2024-06-16 05:52:00-05:00,M -60,3,140,185,0,2,155,0,3.0,2,1,0.9,1,2024-06-16 14:25:00-05:00,M -54,3,133,203,0,1,137,0,0.2,1,0,0.98,0,2024-06-16 09:21:00-05:00,M -54,4,130,0,1,0,110,1,3.0,2,1,0.94,1,2024-06-16 06:55:00-05:00,M -48,4,160,193,0,0,102,1,3.0,2,1,0.97,1,2024-06-16 04:00:00-05:00,M -66,3,120,0,0,1,120,0,-0.5,1,0,0.56,0,2024-06-16 07:02:00-05:00,M -44,2,150,288,0,0,150,1,3.0,2,1,0.67,1,2024-06-16 00:23:00-05:00,M -52,4,130,0,1,0,120,0,0.0,2,1,1.0,1,2024-06-16 05:44:00-05:00,M -41,4,110,289,0,0,170,0,0.0,2,1,0.81,1,2024-06-16 00:49:00-05:00,M -64,4,110,0,1,0,114,1,1.3,3,1,0.93,1,2024-06-16 05:42:00-05:00,M -35,2,110,257,0,0,140,0,0.0,2,1,0.52,1,2024-06-16 03:28:00-05:00,M -45,2,128,308,0,2,170,0,0.0,1,0,0.99,0,2024-06-16 17:15:00-05:00,M -38,4,105,0,1,0,166,0,2.8,1,1,0.56,1,2024-06-16 05:23:00-05:00,F -34,2,150,214,0,1,168,0,0.0,1,0,0.98,0,2024-06-16 01:47:00-05:00,M -42,4,136,315,0,0,125,1,1.8,2,1,0.99,1,2024-06-16 11:07:00-05:00,M -55,4,180,327,0,1,117,1,3.4,2,1,0.91,1,2024-06-16 12:07:00-05:00,F -57,4,130,311,1,1,148,1,2.0,2,1,0.94,1,2024-06-16 07:59:00-05:00,M -54,4,136,220,0,0,140,1,3.0,2,1,0.99,1,2024-06-16 08:10:00-05:00,M -58,4,130,0,0,1,100,1,1.0,2,1,0.91,1,2024-06-16 05:54:00-05:00,M -62,4,120,267,0,0,99,1,1.8,2,1,0.98,1,2024-06-16 15:59:00-05:00,M -63,4,170,177,0,0,84,1,2.5,3,1,0.99,1,2024-06-16 07:13:00-05:00,M -55,3,133,185,0,1,136,0,0.2,1,0,0.99,0,2024-06-16 09:58:00-05:00,M -47,4,160,0,0,0,124,1,0.0,2,1,0.97,1,2024-06-16 06:02:00-05:00,M -52,1,118,186,0,2,190,0,0.0,2,0,0.87,0,2024-06-16 16:39:00-05:00,M -55,4,158,217,0,0,110,1,2.5,2,1,0.99,1,2024-06-16 09:30:00-05:00,M -60,4,135,0,0,0,63,1,0.5,1,1,0.65,1,2024-06-16 06:11:00-05:00,M -57,4,128,303,0,2,159,0,0.0,1,0,0.73,0,2024-06-16 10:31:00-05:00,F -51,3,120,295,0,2,157,0,0.6,1,0,0.99,0,2024-06-16 19:00:00-05:00,F -47,3,138,257,0,2,156,0,0.0,1,0,0.86,0,2024-06-16 14:15:00-05:00,M -38,4,150,0,1,0,120,1,0.7,2,1,0.98,1,2024-06-16 06:34:00-05:00,M -52,4,130,180,0,0,140,1,1.5,2,1,0.88,0,2024-06-16 00:54:00-05:00,F -47,3,130,235,0,0,145,0,2.0,2,0,0.6,0,2024-06-16 04:30:00-05:00,F -54,3,120,217,0,0,137,0,0.0,1,0,1.0,0,2024-06-16 03:38:00-05:00,M -58,3,105,240,0,2,154,1,0.6,2,0,0.79,0,2024-06-16 17:11:00-05:00,M -31,4,120,270,0,0,153,1,1.5,2,1,0.78,1,2024-06-16 00:56:00-05:00,M -52,3,122,0,0,0,110,1,2.0,3,1,0.84,1,2024-06-16 07:31:00-05:00,M -50,2,120,244,0,0,162,0,1.1,1,0,0.98,0,2024-06-16 11:56:00-05:00,F -52,4,128,204,1,0,156,1,1.0,2,1,0.86,1,2024-06-16 19:13:00-05:00,M -67,4,100,299,0,2,125,1,0.9,2,1,0.91,1,2024-06-16 13:09:00-05:00,M -63,4,150,407,0,2,154,0,4.0,2,1,0.78,1,2024-06-16 10:26:00-05:00,F -56,2,120,236,0,0,178,0,0.8,1,0,0.96,0,2024-06-16 14:52:00-05:00,M -51,3,94,227,0,0,154,1,0.0,1,0,0.72,0,2024-06-16 16:58:00-05:00,M -64,4,143,306,1,1,115,1,1.8,2,1,0.99,1,2024-06-16 08:52:00-05:00,M -47,3,155,0,0,0,118,1,1.0,2,1,0.93,1,2024-06-16 06:17:00-05:00,M -51,4,140,299,0,0,173,1,1.6,1,1,0.82,1,2024-06-16 13:40:00-05:00,M -58,1,150,283,1,2,162,0,1.0,1,0,0.8,0,2024-06-16 15:08:00-05:00,F -57,4,110,201,0,0,126,1,1.5,2,0,0.64,0,2024-06-16 18:57:00-05:00,M -54,4,110,239,0,0,126,1,2.8,2,1,0.9,1,2024-06-16 16:54:00-05:00,M -53,4,120,246,0,0,116,1,0.0,2,1,0.94,1,2024-06-16 03:19:00-05:00,M -55,4,122,223,1,1,100,0,0.0,2,1,0.96,1,2024-06-16 10:14:00-05:00,M -38,2,140,297,0,0,150,0,0.0,1,0,0.99,0,2024-06-16 01:53:00-05:00,M -68,3,118,277,0,0,151,0,1.0,1,0,0.87,0,2024-06-16 17:26:00-05:00,M -39,2,190,241,0,0,106,0,0.0,1,0,0.95,0,2024-06-16 01:49:00-05:00,M -51,4,130,179,0,0,100,0,0.0,1,0,0.93,0,2024-06-16 01:41:00-05:00,M -49,3,140,187,0,0,172,0,0.0,1,0,0.99,0,2024-06-16 02:54:00-05:00,M -54,2,120,238,0,0,154,0,0.0,1,0,0.96,0,2024-06-16 01:21:00-05:00,M -59,2,140,221,0,0,164,1,0.0,1,0,0.95,0,2024-06-16 11:47:00-05:00,M -37,3,118,240,0,2,165,0,1.0,2,0,0.78,0,2024-06-16 09:48:00-05:00,M -56,3,155,0,0,1,99,0,0.0,2,1,0.7,1,2024-06-16 06:03:00-05:00,M -62,3,160,0,0,0,72,1,0.0,2,1,0.95,1,2024-06-16 06:01:00-05:00,M -38,2,120,275,0,0,129,0,0.0,1,0,0.98,0,2024-06-16 03:53:00-05:00,F -45,2,180,295,0,0,180,0,0.0,1,0,0.97,0,2024-06-16 04:35:00-05:00,F -67,4,160,286,0,2,108,1,1.5,2,1,1.0,1,2024-06-16 14:48:00-05:00,M -44,4,130,290,0,0,100,1,2.0,2,1,0.97,1,2024-06-16 02:38:00-05:00,M -48,2,133,308,0,1,156,0,2.0,1,0,0.94,0,2024-06-16 04:49:00-05:00,F -54,2,160,305,0,0,175,0,0.0,1,0,0.97,0,2024-06-16 04:26:00-05:00,M -54,2,120,230,1,0,140,0,0.0,1,0,1.0,0,2024-06-16 02:08:00-05:00,F -54,2,192,283,0,2,195,0,0.0,1,0,0.93,1,2024-06-16 17:55:00-05:00,M -61,4,130,0,1,0,77,0,2.5,2,1,0.98,1,2024-06-16 05:19:00-05:00,M -49,1,130,0,0,1,145,0,3.0,2,1,0.63,1,2024-06-16 08:13:00-05:00,M -44,2,130,219,0,2,188,0,0.0,1,0,0.99,0,2024-06-16 14:28:00-05:00,M -59,1,160,273,0,2,125,0,0.0,1,1,0.67,1,2024-06-16 10:54:00-05:00,M -54,4,130,0,0,1,117,1,1.4,2,1,0.96,1,2024-06-16 07:27:00-05:00,M -55,3,136,245,1,1,131,1,1.2,2,1,0.83,1,2024-06-16 08:26:00-05:00,M -47,3,140,193,0,0,145,1,1.0,2,1,0.66,1,2024-06-16 04:25:00-05:00,M -42,2,150,268,0,0,136,0,0.0,1,0,1.0,0,2024-06-16 03:18:00-05:00,M -67,4,106,223,0,0,142,0,0.3,1,0,0.92,0,2024-06-16 19:03:00-05:00,F -54,3,110,214,0,0,158,0,1.6,2,0,0.94,0,2024-06-16 17:29:00-05:00,F -43,4,120,175,0,0,120,1,1.0,2,1,0.96,1,2024-06-16 00:44:00-05:00,M -50,4,120,0,0,1,156,1,0.0,1,1,0.73,1,2024-06-16 06:20:00-05:00,M -49,2,134,271,0,0,162,0,0.0,2,0,0.81,0,2024-06-16 18:45:00-05:00,F -64,4,134,273,0,0,102,1,4.0,3,1,0.98,1,2024-06-16 09:20:00-05:00,M -51,3,140,308,0,2,142,0,1.5,1,0,0.94,0,2024-06-16 14:21:00-05:00,F -67,4,125,254,1,0,163,0,0.2,2,1,0.9,1,2024-06-16 15:58:00-05:00,M -64,4,120,0,1,1,106,0,2.0,2,1,0.92,1,2024-06-16 07:18:00-05:00,M -45,4,142,309,0,2,147,1,0.0,2,1,0.71,1,2024-06-16 11:32:00-05:00,M -56,4,125,249,1,2,144,1,1.2,2,1,0.98,1,2024-06-16 12:34:00-05:00,M -38,3,100,0,0,0,179,0,-1.1,1,0,0.75,0,2024-06-16 05:33:00-05:00,M -47,4,120,205,0,0,98,1,2.0,2,1,0.79,1,2024-06-16 00:51:00-05:00,F -63,4,124,197,0,0,136,1,0.0,2,1,0.66,1,2024-06-16 19:41:00-05:00,F -61,4,105,0,1,0,110,1,1.5,1,1,0.84,1,2024-06-16 04:56:00-05:00,M -58,4,128,259,0,2,130,1,3.0,2,1,0.97,1,2024-06-16 13:00:00-05:00,M -41,2,110,235,0,0,153,0,0.0,1,0,0.95,0,2024-06-16 12:08:00-05:00,M -43,4,110,211,0,0,161,0,0.0,1,0,0.78,0,2024-06-16 13:01:00-05:00,M -74,2,120,269,0,2,121,1,0.2,1,0,0.88,0,2024-06-16 10:21:00-05:00,F -64,4,150,193,0,1,135,1,0.5,2,1,0.79,1,2024-06-16 09:33:00-05:00,M -55,2,160,292,1,0,143,1,2.0,2,1,0.64,1,2024-06-16 04:03:00-05:00,M -63,3,133,0,0,2,120,1,1.0,2,1,0.86,1,2024-06-16 07:10:00-05:00,M -70,4,130,322,0,2,109,0,2.4,2,1,0.9,1,2024-06-16 10:17:00-05:00,M -48,2,140,238,0,0,118,0,0.0,1,0,0.99,0,2024-06-16 02:48:00-05:00,M -58,3,140,179,0,0,160,0,0.0,1,0,0.82,0,2024-06-16 02:50:00-05:00,M -29,2,140,263,0,0,170,0,0.0,1,0,1.0,0,2024-06-16 03:40:00-05:00,M -38,3,138,175,0,0,173,0,0.0,1,0,1.0,0,2024-06-16 19:49:00-05:00,M -62,4,120,220,0,1,86,0,0.0,1,0,0.78,0,2024-06-16 07:12:00-05:00,M -53,2,120,0,0,0,95,0,0.0,2,0,0.55,1,2024-06-16 05:58:00-05:00,M -65,4,120,177,0,0,140,0,0.4,1,0,0.71,0,2024-06-16 15:38:00-05:00,M -44,3,108,141,0,0,175,0,0.6,2,0,0.86,0,2024-06-16 11:57:00-05:00,F -53,3,130,197,1,2,152,0,1.2,3,0,0.87,0,2024-06-16 15:36:00-05:00,M -36,2,120,166,0,0,180,0,0.0,1,0,1.0,0,2024-06-16 04:50:00-05:00,M -63,4,110,252,0,1,140,1,2.0,2,1,0.87,1,2024-06-16 08:54:00-05:00,M -49,3,131,142,0,0,127,1,1.5,2,1,0.86,1,2024-06-16 08:49:00-05:00,M -69,1,140,239,0,0,151,0,1.8,1,0,0.93,0,2024-06-16 15:17:00-05:00,F -57,4,110,0,1,1,131,1,1.4,1,1,0.73,1,2024-06-16 04:58:00-05:00,M -40,4,152,223,0,0,181,0,0.0,1,1,0.65,1,2024-06-16 19:15:00-05:00,M -50,4,150,243,0,2,128,0,2.6,2,1,0.91,1,2024-06-16 15:34:00-05:00,M -43,4,132,341,1,2,136,1,3.0,2,1,0.95,1,2024-06-16 16:40:00-05:00,F -38,4,110,190,0,0,150,1,1.0,2,1,0.76,1,2024-06-16 02:36:00-05:00,M -57,4,152,274,0,0,88,1,1.2,2,1,1.0,1,2024-06-16 17:42:00-05:00,M -68,3,180,274,1,2,150,1,1.6,2,1,0.88,1,2024-06-16 16:10:00-05:00,M -66,1,150,226,0,0,114,0,2.6,3,0,0.8,0,2024-06-16 12:15:00-05:00,F -28,2,130,132,0,2,185,0,0.0,1,0,0.9,0,2024-06-16 03:29:00-05:00,M -34,2,118,210,0,0,192,0,0.7,1,0,1.0,0,2024-06-16 12:35:00-05:00,F -43,2,142,207,0,0,138,0,0.0,1,0,0.99,0,2024-06-16 01:36:00-05:00,M -50,4,110,254,0,2,159,0,0.0,1,0,0.88,0,2024-06-16 18:07:00-05:00,F -56,3,130,167,0,0,114,0,0.0,1,0,1.0,0,2024-06-16 00:31:00-05:00,M -51,3,94,227,0,0,154,1,0.0,1,0,0.72,0,2024-06-16 13:54:00-05:00,M -65,4,135,254,0,2,127,0,2.8,2,1,0.97,1,2024-06-16 13:58:00-05:00,M -58,4,132,458,1,0,69,0,1.0,3,1,0.92,0,2024-06-16 08:17:00-05:00,M -68,3,120,211,0,2,115,0,1.5,2,0,0.61,0,2024-06-16 18:01:00-05:00,F -54,3,135,304,1,0,170,0,0.0,1,0,0.98,0,2024-06-16 12:10:00-05:00,F -54,3,160,201,0,0,163,0,0.0,1,0,0.99,0,2024-06-16 18:41:00-05:00,F -49,2,110,208,0,0,160,0,0.0,1,0,1.0,0,2024-06-16 02:43:00-05:00,F -51,3,110,190,0,0,120,0,0.0,1,0,1.0,0,2024-06-16 04:46:00-05:00,F -43,1,120,291,0,1,155,0,0.0,2,0,0.56,1,2024-06-16 01:28:00-05:00,M -64,4,130,258,1,2,130,0,0.0,2,1,0.77,1,2024-06-16 09:54:00-05:00,M -57,4,120,354,0,0,163,1,0.6,1,0,0.63,0,2024-06-16 14:54:00-05:00,F -43,2,120,266,0,0,118,0,0.0,1,0,0.99,0,2024-06-16 04:04:00-05:00,F -50,2,170,209,0,1,116,0,0.0,1,0,0.96,0,2024-06-16 03:12:00-05:00,M -52,2,128,205,1,0,184,0,0.0,1,0,1.0,0,2024-06-16 17:09:00-05:00,M -55,3,120,0,0,1,125,1,2.5,2,1,0.91,1,2024-06-16 07:47:00-05:00,M -62,3,130,231,0,0,146,0,1.8,2,0,0.62,0,2024-06-16 16:19:00-05:00,M -63,2,139,217,1,1,128,1,1.2,2,1,0.81,1,2024-06-16 08:06:00-05:00,M -61,4,150,0,0,0,117,1,2.0,2,1,0.99,1,2024-06-16 06:07:00-05:00,M -52,1,152,298,1,0,178,0,1.2,2,1,0.67,0,2024-06-16 13:42:00-05:00,M -67,4,140,219,0,1,122,1,2.0,2,1,1.0,1,2024-06-16 09:49:00-05:00,M -57,4,122,264,0,2,100,0,0.0,2,1,0.81,1,2024-06-16 09:57:00-05:00,M -43,4,132,247,1,2,143,1,0.1,2,1,0.87,1,2024-06-16 17:59:00-05:00,M -37,3,130,211,0,0,142,0,0.0,1,0,1.0,0,2024-06-16 00:10:00-05:00,F -65,1,138,282,1,2,174,0,1.4,2,1,0.77,1,2024-06-16 16:51:00-05:00,M -67,4,146,369,0,0,110,1,1.9,2,1,1.0,1,2024-06-16 09:38:00-05:00,M -57,3,128,229,0,2,150,0,0.4,2,1,0.82,1,2024-06-16 16:34:00-05:00,M -52,4,112,342,0,1,96,1,1.0,2,1,0.72,1,2024-06-16 00:59:00-05:00,M -44,4,112,290,0,2,153,0,0.0,1,1,0.87,1,2024-06-16 14:29:00-05:00,M -37,4,130,315,0,0,158,0,0.0,1,0,0.76,0,2024-06-16 03:51:00-05:00,M -60,4,140,281,0,1,118,1,1.5,2,1,0.98,1,2024-06-16 07:33:00-05:00,M -40,4,125,0,1,0,165,0,0.0,2,1,0.98,1,2024-06-16 07:38:00-05:00,M -57,4,120,354,0,0,163,1,0.6,1,0,0.63,0,2024-06-16 11:28:00-05:00,F -59,4,138,271,0,2,182,0,0.0,1,0,0.89,0,2024-06-16 18:26:00-05:00,M -60,4,130,206,0,2,132,1,2.4,2,1,0.99,1,2024-06-16 15:11:00-05:00,M -67,3,152,277,0,0,172,0,0.0,1,0,0.96,0,2024-06-16 12:33:00-05:00,F -45,2,128,308,0,2,170,0,0.0,1,0,0.99,0,2024-06-16 12:54:00-05:00,M -41,4,150,171,0,0,128,1,1.5,2,1,0.97,0,2024-06-16 09:27:00-05:00,M -45,4,142,309,0,2,147,1,0.0,2,1,0.71,1,2024-06-16 18:12:00-05:00,M -56,4,130,283,1,2,103,1,1.6,3,1,0.99,1,2024-06-16 12:16:00-05:00,M -47,4,112,204,0,0,143,0,0.1,1,0,0.93,0,2024-06-16 18:33:00-05:00,M -60,4,130,253,0,0,144,1,1.4,1,1,0.88,1,2024-06-16 15:41:00-05:00,M -66,4,160,228,0,2,138,0,2.3,1,0,0.74,0,2024-06-16 19:18:00-05:00,M -58,3,132,224,0,2,173,0,3.2,1,1,0.8,1,2024-06-16 15:10:00-05:00,M -59,3,131,0,0,0,128,1,2.0,3,1,0.76,1,2024-06-16 07:45:00-05:00,M -66,4,120,302,0,2,151,0,0.4,2,0,0.77,0,2024-06-16 11:53:00-05:00,M -65,4,120,177,0,0,140,0,0.4,1,0,0.71,0,2024-06-16 10:22:00-05:00,M -53,2,130,0,0,1,120,0,0.7,3,1,0.59,0,2024-06-16 05:08:00-05:00,M -58,4,115,0,1,0,138,0,0.5,1,1,0.71,1,2024-06-16 05:09:00-05:00,M -66,4,112,261,0,0,140,0,1.5,1,1,0.52,1,2024-06-16 09:09:00-05:00,M -45,4,138,236,0,2,152,1,0.2,2,0,0.93,0,2024-06-16 13:05:00-05:00,F -67,3,115,564,0,2,160,0,1.6,2,0,0.86,0,2024-06-16 10:18:00-05:00,F -47,3,138,257,0,2,156,0,0.0,1,0,0.86,0,2024-06-16 16:13:00-05:00,M -44,2,120,220,0,0,170,0,0.0,1,0,1.0,0,2024-06-16 13:56:00-05:00,M -60,3,180,0,0,1,140,1,1.5,2,1,0.79,0,2024-06-16 07:04:00-05:00,M -58,4,125,300,0,2,171,0,0.0,1,1,0.78,1,2024-06-16 13:38:00-05:00,M -61,3,150,243,1,0,137,1,1.0,2,0,0.65,0,2024-06-16 14:33:00-05:00,M -52,2,160,196,0,0,165,0,0.0,1,0,0.97,0,2024-06-16 01:23:00-05:00,M -59,4,125,0,1,0,119,1,0.9,1,1,0.8,1,2024-06-16 05:28:00-05:00,M -53,4,130,264,0,2,143,0,0.4,2,0,0.85,0,2024-06-16 16:08:00-05:00,F -45,3,130,236,0,0,144,0,0.1,1,0,0.98,0,2024-06-16 08:46:00-05:00,M -42,3,120,228,0,0,152,1,1.5,2,0,0.53,0,2024-06-16 02:09:00-05:00,M -58,2,120,284,0,2,160,0,1.8,2,1,0.82,1,2024-06-16 14:39:00-05:00,M -59,4,110,0,1,0,94,0,0.0,2,1,0.98,1,2024-06-16 06:49:00-05:00,M -60,4,117,230,1,0,160,1,1.4,1,1,0.9,1,2024-06-16 15:18:00-05:00,M -54,4,130,202,1,0,112,1,2.0,2,1,0.93,1,2024-06-16 08:59:00-05:00,M -50,4,144,349,0,2,120,1,1.0,1,1,0.7,1,2024-06-16 08:43:00-05:00,M -55,4,116,186,1,1,102,0,0.0,2,1,0.96,1,2024-06-16 08:53:00-05:00,M -53,4,126,0,0,0,106,0,0.0,2,1,0.95,1,2024-06-16 07:51:00-05:00,M -39,3,140,321,0,2,182,0,0.0,1,0,1.0,0,2024-06-16 14:18:00-05:00,M -45,4,115,260,0,2,185,0,0.0,1,0,0.81,0,2024-06-16 14:09:00-05:00,M -32,4,118,529,0,0,130,0,0.0,2,1,0.79,1,2024-06-16 01:16:00-05:00,M -51,3,100,222,0,0,143,1,1.2,2,0,0.84,0,2024-06-16 12:03:00-05:00,M -54,4,140,166,0,0,118,1,0.0,2,1,0.9,1,2024-06-16 02:18:00-05:00,M -51,4,120,0,1,0,127,1,1.5,1,1,0.61,1,2024-06-16 05:06:00-05:00,F -65,4,150,225,0,2,114,0,1.0,2,1,0.81,1,2024-06-16 11:06:00-05:00,F -54,4,140,239,0,0,160,0,1.2,1,0,0.8,0,2024-06-16 15:04:00-05:00,M -46,4,120,277,0,0,125,1,1.0,2,1,0.87,1,2024-06-16 01:03:00-05:00,M -50,4,140,341,0,1,125,1,2.5,2,1,0.98,1,2024-06-16 02:21:00-05:00,M -59,4,125,222,0,0,135,1,2.5,3,1,0.92,1,2024-06-16 08:55:00-05:00,M -52,1,152,298,1,0,178,0,1.2,2,1,0.67,0,2024-06-16 17:17:00-05:00,M -55,2,135,250,0,2,161,0,1.4,2,0,0.95,0,2024-06-16 13:32:00-05:00,F -43,4,150,247,0,0,130,1,2.0,2,1,0.96,1,2024-06-16 02:19:00-05:00,M -55,4,120,226,0,2,127,1,1.7,3,1,0.97,1,2024-06-16 09:59:00-05:00,M +61,4,160,0,1,1,145,0,1.0,2,1,0.94,1,2024-06-16 06:30:00,M +54,3,120,237,0,0,150,1,1.5,2,0,0.54,1,2024-06-16 04:07:00,M +55,2,122,320,0,0,155,0,0.0,1,0,0.97,0,2024-06-16 04:20:00,F +56,4,155,342,1,0,150,1,3.0,2,1,0.97,1,2024-06-16 02:35:00,M +60,4,150,258,0,2,157,0,2.6,2,1,0.85,1,2024-06-16 16:24:00,F +56,4,125,0,1,0,103,1,1.0,2,1,0.99,1,2024-06-16 06:53:00,M +43,3,130,315,0,0,162,0,1.9,1,0,0.97,0,2024-06-16 13:22:00,M +54,4,150,365,0,1,134,0,1.0,1,0,0.61,0,2024-06-16 00:58:00,M +46,3,142,177,0,2,160,1,1.4,3,0,0.82,0,2024-06-16 12:32:00,F +52,3,172,199,1,0,162,0,0.5,1,0,0.99,0,2024-06-16 15:01:00,M +60,3,120,178,1,0,96,0,0.0,1,0,0.98,0,2024-06-16 18:51:00,F +50,3,140,233,0,0,163,0,0.6,2,1,0.83,1,2024-06-16 12:39:00,M +64,3,140,313,0,0,133,0,0.2,1,0,0.94,0,2024-06-16 11:55:00,F +68,3,120,211,0,2,115,0,1.5,2,0,0.61,0,2024-06-16 13:10:00,F +50,4,150,243,0,2,128,0,2.6,2,1,0.91,1,2024-06-16 11:46:00,M +57,2,140,265,0,1,145,1,1.0,2,1,0.71,1,2024-06-16 01:10:00,M +61,4,120,282,0,1,135,1,4.0,3,1,0.91,1,2024-06-16 08:42:00,M +60,4,140,293,0,2,170,0,1.2,2,1,0.82,1,2024-06-16 17:25:00,M +54,2,192,283,0,2,195,0,0.0,1,0,0.93,1,2024-06-16 12:41:00,M +65,4,170,263,1,0,112,1,2.0,2,1,0.92,1,2024-06-16 01:26:00,M +70,3,160,269,0,0,112,1,2.9,2,1,0.93,1,2024-06-16 17:37:00,M +47,4,110,275,0,2,118,1,1.0,2,1,0.95,1,2024-06-16 18:54:00,M +51,3,135,160,0,0,150,0,2.0,2,1,0.58,1,2024-06-16 04:37:00,M +56,4,115,0,1,1,82,0,-1.0,1,1,0.67,1,2024-06-16 05:32:00,M +54,4,125,216,0,0,140,0,0.0,2,1,0.79,1,2024-06-16 03:30:00,M +52,4,160,331,0,0,94,1,2.5,2,1,0.99,1,2024-06-16 02:20:00,M +56,1,120,193,0,2,162,0,1.9,2,0,0.86,0,2024-06-16 18:22:00,M +58,4,128,259,0,2,130,1,3.0,2,1,0.97,1,2024-06-16 18:13:00,M +59,4,174,249,0,0,143,1,0.0,2,1,0.88,1,2024-06-16 11:15:00,F +74,1,145,216,1,0,116,1,1.8,2,1,0.69,1,2024-06-16 09:14:00,M +43,4,140,0,0,1,140,1,0.5,1,1,0.57,1,2024-06-16 05:52:00,M +60,3,140,185,0,2,155,0,3.0,2,1,0.9,1,2024-06-16 14:25:00,M +54,3,133,203,0,1,137,0,0.2,1,0,0.98,0,2024-06-16 09:21:00,M +54,4,130,0,1,0,110,1,3.0,2,1,0.94,1,2024-06-16 06:55:00,M +48,4,160,193,0,0,102,1,3.0,2,1,0.97,1,2024-06-16 04:00:00,M +66,3,120,0,0,1,120,0,-0.5,1,0,0.56,0,2024-06-16 07:02:00,M +44,2,150,288,0,0,150,1,3.0,2,1,0.67,1,2024-06-16 00:23:00,M +52,4,130,0,1,0,120,0,0.0,2,1,1.0,1,2024-06-16 05:44:00,M +41,4,110,289,0,0,170,0,0.0,2,1,0.81,1,2024-06-16 00:49:00,M +64,4,110,0,1,0,114,1,1.3,3,1,0.93,1,2024-06-16 05:42:00,M +35,2,110,257,0,0,140,0,0.0,2,1,0.52,1,2024-06-16 03:28:00,M +45,2,128,308,0,2,170,0,0.0,1,0,0.99,0,2024-06-16 17:15:00,M +38,4,105,0,1,0,166,0,2.8,1,1,0.56,1,2024-06-16 05:23:00,F +34,2,150,214,0,1,168,0,0.0,1,0,0.98,0,2024-06-16 01:47:00,M +42,4,136,315,0,0,125,1,1.8,2,1,0.99,1,2024-06-16 11:07:00,M +55,4,180,327,0,1,117,1,3.4,2,1,0.91,1,2024-06-16 12:07:00,F +57,4,130,311,1,1,148,1,2.0,2,1,0.94,1,2024-06-16 07:59:00,M +54,4,136,220,0,0,140,1,3.0,2,1,0.99,1,2024-06-16 08:10:00,M +58,4,130,0,0,1,100,1,1.0,2,1,0.91,1,2024-06-16 05:54:00,M +62,4,120,267,0,0,99,1,1.8,2,1,0.98,1,2024-06-16 15:59:00,M +63,4,170,177,0,0,84,1,2.5,3,1,0.99,1,2024-06-16 07:13:00,M +55,3,133,185,0,1,136,0,0.2,1,0,0.99,0,2024-06-16 09:58:00,M +47,4,160,0,0,0,124,1,0.0,2,1,0.97,1,2024-06-16 06:02:00,M +52,1,118,186,0,2,190,0,0.0,2,0,0.87,0,2024-06-16 16:39:00,M +55,4,158,217,0,0,110,1,2.5,2,1,0.99,1,2024-06-16 09:30:00,M +60,4,135,0,0,0,63,1,0.5,1,1,0.65,1,2024-06-16 06:11:00,M +57,4,128,303,0,2,159,0,0.0,1,0,0.73,0,2024-06-16 10:31:00,F +51,3,120,295,0,2,157,0,0.6,1,0,0.99,0,2024-06-16 19:00:00,F +47,3,138,257,0,2,156,0,0.0,1,0,0.86,0,2024-06-16 14:15:00,M +38,4,150,0,1,0,120,1,0.7,2,1,0.98,1,2024-06-16 06:34:00,M +52,4,130,180,0,0,140,1,1.5,2,1,0.88,0,2024-06-16 00:54:00,F +47,3,130,235,0,0,145,0,2.0,2,0,0.6,0,2024-06-16 04:30:00,F +54,3,120,217,0,0,137,0,0.0,1,0,1.0,0,2024-06-16 03:38:00,M +58,3,105,240,0,2,154,1,0.6,2,0,0.79,0,2024-06-16 17:11:00,M +31,4,120,270,0,0,153,1,1.5,2,1,0.78,1,2024-06-16 00:56:00,M +52,3,122,0,0,0,110,1,2.0,3,1,0.84,1,2024-06-16 07:31:00,M +50,2,120,244,0,0,162,0,1.1,1,0,0.98,0,2024-06-16 11:56:00,F +52,4,128,204,1,0,156,1,1.0,2,1,0.86,1,2024-06-16 19:13:00,M +67,4,100,299,0,2,125,1,0.9,2,1,0.91,1,2024-06-16 13:09:00,M +63,4,150,407,0,2,154,0,4.0,2,1,0.78,1,2024-06-16 10:26:00,F +56,2,120,236,0,0,178,0,0.8,1,0,0.96,0,2024-06-16 14:52:00,M +51,3,94,227,0,0,154,1,0.0,1,0,0.72,0,2024-06-16 16:58:00,M +64,4,143,306,1,1,115,1,1.8,2,1,0.99,1,2024-06-16 08:52:00,M +47,3,155,0,0,0,118,1,1.0,2,1,0.93,1,2024-06-16 06:17:00,M +51,4,140,299,0,0,173,1,1.6,1,1,0.82,1,2024-06-16 13:40:00,M +58,1,150,283,1,2,162,0,1.0,1,0,0.8,0,2024-06-16 15:08:00,F +57,4,110,201,0,0,126,1,1.5,2,0,0.64,0,2024-06-16 18:57:00,M +54,4,110,239,0,0,126,1,2.8,2,1,0.9,1,2024-06-16 16:54:00,M +53,4,120,246,0,0,116,1,0.0,2,1,0.94,1,2024-06-16 03:19:00,M +55,4,122,223,1,1,100,0,0.0,2,1,0.96,1,2024-06-16 10:14:00,M +38,2,140,297,0,0,150,0,0.0,1,0,0.99,0,2024-06-16 01:53:00,M +68,3,118,277,0,0,151,0,1.0,1,0,0.87,0,2024-06-16 17:26:00,M +39,2,190,241,0,0,106,0,0.0,1,0,0.95,0,2024-06-16 01:49:00,M +51,4,130,179,0,0,100,0,0.0,1,0,0.93,0,2024-06-16 01:41:00,M +49,3,140,187,0,0,172,0,0.0,1,0,0.99,0,2024-06-16 02:54:00,M +54,2,120,238,0,0,154,0,0.0,1,0,0.96,0,2024-06-16 01:21:00,M +59,2,140,221,0,0,164,1,0.0,1,0,0.95,0,2024-06-16 11:47:00,M +37,3,118,240,0,2,165,0,1.0,2,0,0.78,0,2024-06-16 09:48:00,M +56,3,155,0,0,1,99,0,0.0,2,1,0.7,1,2024-06-16 06:03:00,M +62,3,160,0,0,0,72,1,0.0,2,1,0.95,1,2024-06-16 06:01:00,M +38,2,120,275,0,0,129,0,0.0,1,0,0.98,0,2024-06-16 03:53:00,F +45,2,180,295,0,0,180,0,0.0,1,0,0.97,0,2024-06-16 04:35:00,F +67,4,160,286,0,2,108,1,1.5,2,1,1.0,1,2024-06-16 14:48:00,M +44,4,130,290,0,0,100,1,2.0,2,1,0.97,1,2024-06-16 02:38:00,M +48,2,133,308,0,1,156,0,2.0,1,0,0.94,0,2024-06-16 04:49:00,F +54,2,160,305,0,0,175,0,0.0,1,0,0.97,0,2024-06-16 04:26:00,M +54,2,120,230,1,0,140,0,0.0,1,0,1.0,0,2024-06-16 02:08:00,F +54,2,192,283,0,2,195,0,0.0,1,0,0.93,1,2024-06-16 17:55:00,M +61,4,130,0,1,0,77,0,2.5,2,1,0.98,1,2024-06-16 05:19:00,M +49,1,130,0,0,1,145,0,3.0,2,1,0.63,1,2024-06-16 08:13:00,M +44,2,130,219,0,2,188,0,0.0,1,0,0.99,0,2024-06-16 14:28:00,M +59,1,160,273,0,2,125,0,0.0,1,1,0.67,1,2024-06-16 10:54:00,M +54,4,130,0,0,1,117,1,1.4,2,1,0.96,1,2024-06-16 07:27:00,M +55,3,136,245,1,1,131,1,1.2,2,1,0.83,1,2024-06-16 08:26:00,M +47,3,140,193,0,0,145,1,1.0,2,1,0.66,1,2024-06-16 04:25:00,M +42,2,150,268,0,0,136,0,0.0,1,0,1.0,0,2024-06-16 03:18:00,M +67,4,106,223,0,0,142,0,0.3,1,0,0.92,0,2024-06-16 19:03:00,F +54,3,110,214,0,0,158,0,1.6,2,0,0.94,0,2024-06-16 17:29:00,F +43,4,120,175,0,0,120,1,1.0,2,1,0.96,1,2024-06-16 00:44:00,M +50,4,120,0,0,1,156,1,0.0,1,1,0.73,1,2024-06-16 06:20:00,M +49,2,134,271,0,0,162,0,0.0,2,0,0.81,0,2024-06-16 18:45:00,F +64,4,134,273,0,0,102,1,4.0,3,1,0.98,1,2024-06-16 09:20:00,M +51,3,140,308,0,2,142,0,1.5,1,0,0.94,0,2024-06-16 14:21:00,F +67,4,125,254,1,0,163,0,0.2,2,1,0.9,1,2024-06-16 15:58:00,M +64,4,120,0,1,1,106,0,2.0,2,1,0.92,1,2024-06-16 07:18:00,M +45,4,142,309,0,2,147,1,0.0,2,1,0.71,1,2024-06-16 11:32:00,M +56,4,125,249,1,2,144,1,1.2,2,1,0.98,1,2024-06-16 12:34:00,M +38,3,100,0,0,0,179,0,-1.1,1,0,0.75,0,2024-06-16 05:33:00,M +47,4,120,205,0,0,98,1,2.0,2,1,0.79,1,2024-06-16 00:51:00,F +63,4,124,197,0,0,136,1,0.0,2,1,0.66,1,2024-06-16 19:41:00,F +61,4,105,0,1,0,110,1,1.5,1,1,0.84,1,2024-06-16 04:56:00,M +58,4,128,259,0,2,130,1,3.0,2,1,0.97,1,2024-06-16 13:00:00,M +41,2,110,235,0,0,153,0,0.0,1,0,0.95,0,2024-06-16 12:08:00,M +43,4,110,211,0,0,161,0,0.0,1,0,0.78,0,2024-06-16 13:01:00,M +74,2,120,269,0,2,121,1,0.2,1,0,0.88,0,2024-06-16 10:21:00,F +64,4,150,193,0,1,135,1,0.5,2,1,0.79,1,2024-06-16 09:33:00,M +55,2,160,292,1,0,143,1,2.0,2,1,0.64,1,2024-06-16 04:03:00,M +63,3,133,0,0,2,120,1,1.0,2,1,0.86,1,2024-06-16 07:10:00,M +70,4,130,322,0,2,109,0,2.4,2,1,0.9,1,2024-06-16 10:17:00,M +48,2,140,238,0,0,118,0,0.0,1,0,0.99,0,2024-06-16 02:48:00,M +58,3,140,179,0,0,160,0,0.0,1,0,0.82,0,2024-06-16 02:50:00,M +29,2,140,263,0,0,170,0,0.0,1,0,1.0,0,2024-06-16 03:40:00,M +38,3,138,175,0,0,173,0,0.0,1,0,1.0,0,2024-06-16 19:49:00,M +62,4,120,220,0,1,86,0,0.0,1,0,0.78,0,2024-06-16 07:12:00,M +53,2,120,0,0,0,95,0,0.0,2,0,0.55,1,2024-06-16 05:58:00,M +65,4,120,177,0,0,140,0,0.4,1,0,0.71,0,2024-06-16 15:38:00,M +44,3,108,141,0,0,175,0,0.6,2,0,0.86,0,2024-06-16 11:57:00,F +53,3,130,197,1,2,152,0,1.2,3,0,0.87,0,2024-06-16 15:36:00,M +36,2,120,166,0,0,180,0,0.0,1,0,1.0,0,2024-06-16 04:50:00,M +63,4,110,252,0,1,140,1,2.0,2,1,0.87,1,2024-06-16 08:54:00,M +49,3,131,142,0,0,127,1,1.5,2,1,0.86,1,2024-06-16 08:49:00,M +69,1,140,239,0,0,151,0,1.8,1,0,0.93,0,2024-06-16 15:17:00,F +57,4,110,0,1,1,131,1,1.4,1,1,0.73,1,2024-06-16 04:58:00,M +40,4,152,223,0,0,181,0,0.0,1,1,0.65,1,2024-06-16 19:15:00,M +50,4,150,243,0,2,128,0,2.6,2,1,0.91,1,2024-06-16 15:34:00,M +43,4,132,341,1,2,136,1,3.0,2,1,0.95,1,2024-06-16 16:40:00,F +38,4,110,190,0,0,150,1,1.0,2,1,0.76,1,2024-06-16 02:36:00,M +57,4,152,274,0,0,88,1,1.2,2,1,1.0,1,2024-06-16 17:42:00,M +68,3,180,274,1,2,150,1,1.6,2,1,0.88,1,2024-06-16 16:10:00,M +66,1,150,226,0,0,114,0,2.6,3,0,0.8,0,2024-06-16 12:15:00,F +28,2,130,132,0,2,185,0,0.0,1,0,0.9,0,2024-06-16 03:29:00,M +34,2,118,210,0,0,192,0,0.7,1,0,1.0,0,2024-06-16 12:35:00,F +43,2,142,207,0,0,138,0,0.0,1,0,0.99,0,2024-06-16 01:36:00,M +50,4,110,254,0,2,159,0,0.0,1,0,0.88,0,2024-06-16 18:07:00,F +56,3,130,167,0,0,114,0,0.0,1,0,1.0,0,2024-06-16 00:31:00,M +51,3,94,227,0,0,154,1,0.0,1,0,0.72,0,2024-06-16 13:54:00,M +65,4,135,254,0,2,127,0,2.8,2,1,0.97,1,2024-06-16 13:58:00,M +58,4,132,458,1,0,69,0,1.0,3,1,0.92,0,2024-06-16 08:17:00,M +68,3,120,211,0,2,115,0,1.5,2,0,0.61,0,2024-06-16 18:01:00,F +54,3,135,304,1,0,170,0,0.0,1,0,0.98,0,2024-06-16 12:10:00,F +54,3,160,201,0,0,163,0,0.0,1,0,0.99,0,2024-06-16 18:41:00,F +49,2,110,208,0,0,160,0,0.0,1,0,1.0,0,2024-06-16 02:43:00,F +51,3,110,190,0,0,120,0,0.0,1,0,1.0,0,2024-06-16 04:46:00,F +43,1,120,291,0,1,155,0,0.0,2,0,0.56,1,2024-06-16 01:28:00,M +64,4,130,258,1,2,130,0,0.0,2,1,0.77,1,2024-06-16 09:54:00,M +57,4,120,354,0,0,163,1,0.6,1,0,0.63,0,2024-06-16 14:54:00,F +43,2,120,266,0,0,118,0,0.0,1,0,0.99,0,2024-06-16 04:04:00,F +50,2,170,209,0,1,116,0,0.0,1,0,0.96,0,2024-06-16 03:12:00,M +52,2,128,205,1,0,184,0,0.0,1,0,1.0,0,2024-06-16 17:09:00,M +55,3,120,0,0,1,125,1,2.5,2,1,0.91,1,2024-06-16 07:47:00,M +62,3,130,231,0,0,146,0,1.8,2,0,0.62,0,2024-06-16 16:19:00,M +63,2,139,217,1,1,128,1,1.2,2,1,0.81,1,2024-06-16 08:06:00,M +61,4,150,0,0,0,117,1,2.0,2,1,0.99,1,2024-06-16 06:07:00,M +52,1,152,298,1,0,178,0,1.2,2,1,0.67,0,2024-06-16 13:42:00,M +67,4,140,219,0,1,122,1,2.0,2,1,1.0,1,2024-06-16 09:49:00,M +57,4,122,264,0,2,100,0,0.0,2,1,0.81,1,2024-06-16 09:57:00,M +43,4,132,247,1,2,143,1,0.1,2,1,0.87,1,2024-06-16 17:59:00,M +37,3,130,211,0,0,142,0,0.0,1,0,1.0,0,2024-06-16 00:10:00,F +65,1,138,282,1,2,174,0,1.4,2,1,0.77,1,2024-06-16 16:51:00,M +67,4,146,369,0,0,110,1,1.9,2,1,1.0,1,2024-06-16 09:38:00,M +57,3,128,229,0,2,150,0,0.4,2,1,0.82,1,2024-06-16 16:34:00,M +52,4,112,342,0,1,96,1,1.0,2,1,0.72,1,2024-06-16 00:59:00,M +44,4,112,290,0,2,153,0,0.0,1,1,0.87,1,2024-06-16 14:29:00,M +37,4,130,315,0,0,158,0,0.0,1,0,0.76,0,2024-06-16 03:51:00,M +60,4,140,281,0,1,118,1,1.5,2,1,0.98,1,2024-06-16 07:33:00,M +40,4,125,0,1,0,165,0,0.0,2,1,0.98,1,2024-06-16 07:38:00,M +57,4,120,354,0,0,163,1,0.6,1,0,0.63,0,2024-06-16 11:28:00,F +59,4,138,271,0,2,182,0,0.0,1,0,0.89,0,2024-06-16 18:26:00,M +60,4,130,206,0,2,132,1,2.4,2,1,0.99,1,2024-06-16 15:11:00,M +67,3,152,277,0,0,172,0,0.0,1,0,0.96,0,2024-06-16 12:33:00,F +45,2,128,308,0,2,170,0,0.0,1,0,0.99,0,2024-06-16 12:54:00,M +41,4,150,171,0,0,128,1,1.5,2,1,0.97,0,2024-06-16 09:27:00,M +45,4,142,309,0,2,147,1,0.0,2,1,0.71,1,2024-06-16 18:12:00,M +56,4,130,283,1,2,103,1,1.6,3,1,0.99,1,2024-06-16 12:16:00,M +47,4,112,204,0,0,143,0,0.1,1,0,0.93,0,2024-06-16 18:33:00,M +60,4,130,253,0,0,144,1,1.4,1,1,0.88,1,2024-06-16 15:41:00,M +66,4,160,228,0,2,138,0,2.3,1,0,0.74,0,2024-06-16 19:18:00,M +58,3,132,224,0,2,173,0,3.2,1,1,0.8,1,2024-06-16 15:10:00,M +59,3,131,0,0,0,128,1,2.0,3,1,0.76,1,2024-06-16 07:45:00,M +66,4,120,302,0,2,151,0,0.4,2,0,0.77,0,2024-06-16 11:53:00,M +65,4,120,177,0,0,140,0,0.4,1,0,0.71,0,2024-06-16 10:22:00,M +53,2,130,0,0,1,120,0,0.7,3,1,0.59,0,2024-06-16 05:08:00,M +58,4,115,0,1,0,138,0,0.5,1,1,0.71,1,2024-06-16 05:09:00,M +66,4,112,261,0,0,140,0,1.5,1,1,0.52,1,2024-06-16 09:09:00,M +45,4,138,236,0,2,152,1,0.2,2,0,0.93,0,2024-06-16 13:05:00,F +67,3,115,564,0,2,160,0,1.6,2,0,0.86,0,2024-06-16 10:18:00,F +47,3,138,257,0,2,156,0,0.0,1,0,0.86,0,2024-06-16 16:13:00,M +44,2,120,220,0,0,170,0,0.0,1,0,1.0,0,2024-06-16 13:56:00,M +60,3,180,0,0,1,140,1,1.5,2,1,0.79,0,2024-06-16 07:04:00,M +58,4,125,300,0,2,171,0,0.0,1,1,0.78,1,2024-06-16 13:38:00,M +61,3,150,243,1,0,137,1,1.0,2,0,0.65,0,2024-06-16 14:33:00,M +52,2,160,196,0,0,165,0,0.0,1,0,0.97,0,2024-06-16 01:23:00,M +59,4,125,0,1,0,119,1,0.9,1,1,0.8,1,2024-06-16 05:28:00,M +53,4,130,264,0,2,143,0,0.4,2,0,0.85,0,2024-06-16 16:08:00,F +45,3,130,236,0,0,144,0,0.1,1,0,0.98,0,2024-06-16 08:46:00,M +42,3,120,228,0,0,152,1,1.5,2,0,0.53,0,2024-06-16 02:09:00,M +58,2,120,284,0,2,160,0,1.8,2,1,0.82,1,2024-06-16 14:39:00,M +59,4,110,0,1,0,94,0,0.0,2,1,0.98,1,2024-06-16 06:49:00,M +60,4,117,230,1,0,160,1,1.4,1,1,0.9,1,2024-06-16 15:18:00,M +54,4,130,202,1,0,112,1,2.0,2,1,0.93,1,2024-06-16 08:59:00,M +50,4,144,349,0,2,120,1,1.0,1,1,0.7,1,2024-06-16 08:43:00,M +55,4,116,186,1,1,102,0,0.0,2,1,0.96,1,2024-06-16 08:53:00,M +53,4,126,0,0,0,106,0,0.0,2,1,0.95,1,2024-06-16 07:51:00,M +39,3,140,321,0,2,182,0,0.0,1,0,1.0,0,2024-06-16 14:18:00,M +45,4,115,260,0,2,185,0,0.0,1,0,0.81,0,2024-06-16 14:09:00,M +32,4,118,529,0,0,130,0,0.0,2,1,0.79,1,2024-06-16 01:16:00,M +51,3,100,222,0,0,143,1,1.2,2,0,0.84,0,2024-06-16 12:03:00,M +54,4,140,166,0,0,118,1,0.0,2,1,0.9,1,2024-06-16 02:18:00,M +51,4,120,0,1,0,127,1,1.5,1,1,0.61,1,2024-06-16 05:06:00,F +65,4,150,225,0,2,114,0,1.0,2,1,0.81,1,2024-06-16 11:06:00,F +54,4,140,239,0,0,160,0,1.2,1,0,0.8,0,2024-06-16 15:04:00,M +46,4,120,277,0,0,125,1,1.0,2,1,0.87,1,2024-06-16 01:03:00,M +50,4,140,341,0,1,125,1,2.5,2,1,0.98,1,2024-06-16 02:21:00,M +59,4,125,222,0,0,135,1,2.5,3,1,0.92,1,2024-06-16 08:55:00,M +52,1,152,298,1,0,178,0,1.2,2,1,0.67,0,2024-06-16 17:17:00,M +55,2,135,250,0,2,161,0,1.4,2,0,0.95,0,2024-06-16 13:32:00,F +43,4,150,247,0,0,130,1,2.0,2,1,0.96,1,2024-06-16 02:19:00,M +55,4,120,226,0,2,127,1,1.7,3,1,0.97,1,2024-06-16 09:59:00,M diff --git a/spark/tests/resources/current/dataset_bool_missing.csv b/spark/tests/resources/current/dataset_bool_missing.csv index 99e46c5..9a90484 100644 --- a/spark/tests/resources/current/dataset_bool_missing.csv +++ b/spark/tests/resources/current/dataset_bool_missing.csv @@ -1,11 +1,11 @@ id,cat1,bool1,num1,num2,prediction,prediction_proba,target,datetime -1,A,true,1.0,1.4,1,0.9,1,2024-06-16 00:01:00-05:00 -2,B,true,1.5,100.0,0,0.95,0,2024-06-16 00:02:00-05:00 -3,A,false,3.0,123.0,1,1.0,1,2024-06-16 00:03:00-05:00 -4,B,true,0.5,,0,1.0,0,2024-06-16 00:04:00-05:00 -5,,true,0.5,,0,1.0,0,2024-06-16 00:05:00-05:00 -6,B,true,,200.0,0,0.8,1,2024-06-16 00:06:00-05:00 -7,C,,1.0,300.0,0,1.0,0,2024-06-16 00:07:00-05:00 -8,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:08:00-05:00 -9,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:09:00-05:00 -10,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:10:00-05:00 \ No newline at end of file +1,A,true,1.0,1.4,1,0.9,1,2024-06-16 00:01:00 +2,B,true,1.5,100.0,0,0.95,0,2024-06-16 00:02:00 +3,A,false,3.0,123.0,1,1.0,1,2024-06-16 00:03:00 +4,B,true,0.5,,0,1.0,0,2024-06-16 00:04:00 +5,,true,0.5,,0,1.0,0,2024-06-16 00:05:00 +6,B,true,,200.0,0,0.8,1,2024-06-16 00:06:00 +7,C,,1.0,300.0,0,1.0,0,2024-06-16 00:07:00 +8,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:08:00 +9,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:09:00 +10,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:10:00 \ No newline at end of file diff --git a/spark/tests/resources/current/dataset_cat_missing.csv b/spark/tests/resources/current/dataset_cat_missing.csv index 8200dac..804fe61 100644 --- a/spark/tests/resources/current/dataset_cat_missing.csv +++ b/spark/tests/resources/current/dataset_cat_missing.csv @@ -1,11 +1,11 @@ id,cat1,cat2,num1,num2,prediction,prediction_proba,target,datetime -1,A,X,1.0,1.4,1,0.9,1,2024-06-16 00:01:00-05:00 -2,B,X,1.5,100.0,0,0.95,0,2024-06-16 00:02:00-05:00 -3,A,Y,3.0,123.0,1,1.0,1,2024-06-16 00:03:00-05:00 -4,B,X,0.5,,0,1.0,0,2024-06-16 00:04:00-05:00 -5,,X,0.5,,0,1.0,0,2024-06-16 00:05:00-05:00 -6,B,X,,200.0,0,0.8,1,2024-06-16 00:06:00-05:00 -7,C,,1.0,300.0,0,1.0,0,2024-06-16 00:07:00-05:00 -8,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:08:00-05:00 -9,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:09:00-05:00 -10,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:10:00-05:00 +1,A,X,1.0,1.4,1,0.9,1,2024-06-16 00:01:00 +2,B,X,1.5,100.0,0,0.95,0,2024-06-16 00:02:00 +3,A,Y,3.0,123.0,1,1.0,1,2024-06-16 00:03:00 +4,B,X,0.5,,0,1.0,0,2024-06-16 00:04:00 +5,,X,0.5,,0,1.0,0,2024-06-16 00:05:00 +6,B,X,,200.0,0,0.8,1,2024-06-16 00:06:00 +7,C,,1.0,300.0,0,1.0,0,2024-06-16 00:07:00 +8,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:08:00 +9,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:09:00 +10,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:10:00 diff --git a/spark/tests/resources/current/easy_dataset.csv b/spark/tests/resources/current/easy_dataset.csv index 7c58f72..d6165bb 100644 --- a/spark/tests/resources/current/easy_dataset.csv +++ b/spark/tests/resources/current/easy_dataset.csv @@ -1,8 +1,8 @@ id,cat1,cat2,num1,num2,prediction,prediction_proba,target,datetime -1,A,A,1,100,1,1,1,2024-06-16 00:01:00-05:00 -2,B,B,1,100,1,1,1,2024-06-16 00:02:00-05:00 -3,C,C,1,100,1,1,1,2024-06-16 00:03:00-05:00 -4,D,D,1,100,1,1,1,2024-06-16 00:04:00-05:00 -5,E,E,1,100,1,1,1,2024-06-16 00:05:00-05:00 -6,F,F,1,100,0,1,1,2024-06-16 00:06:00-05:00 -7,G,G,1,100,1,1,0,2024-06-16 00:07:00-05:00 +1,A,A,1,100,1,1,1,2024-06-16 00:01:00 +2,B,B,1,100,1,1,1,2024-06-16 00:02:00 +3,C,C,1,100,1,1,1,2024-06-16 00:03:00 +4,D,D,1,100,1,1,1,2024-06-16 00:04:00 +5,E,E,1,100,1,1,1,2024-06-16 00:05:00 +6,F,F,1,100,0,1,1,2024-06-16 00:06:00 +7,G,G,1,100,1,1,0,2024-06-16 00:07:00 diff --git a/spark/tests/resources/current/easy_dataset_bucket_test.csv b/spark/tests/resources/current/easy_dataset_bucket_test.csv index e11968e..c79dd8c 100644 --- a/spark/tests/resources/current/easy_dataset_bucket_test.csv +++ b/spark/tests/resources/current/easy_dataset_bucket_test.csv @@ -1,8 +1,8 @@ id,cat1,cat2,num1,num2,prediction,prediction_proba,target,datetime -1,A,A,-0.00001,100,1,1,1,2024-06-16 00:01:00-05:00 -2,B,B,-0.00001,100,1,1,1,2024-06-16 00:02:00-05:00 -3,C,C,-0.00001,100,1,1,1,2024-06-16 00:03:00-05:00 -4,D,D,-0.00001,100,1,1,1,2024-06-16 00:04:00-05:00 -5,E,E,-0.00001,100,1,1,1,2024-06-16 00:05:00-05:00 -6,F,F,-0.00001,100,0,1,1,2024-06-16 00:06:00-05:00 -7,G,G,-0.00002,100.000001,1,1,0,2024-06-16 00:07:00-05:00 +1,A,A,-0.00001,100,1,1,1,2024-06-16 00:01:00 +2,B,B,-0.00001,100,1,1,1,2024-06-16 00:02:00 +3,C,C,-0.00001,100,1,1,1,2024-06-16 00:03:00 +4,D,D,-0.00001,100,1,1,1,2024-06-16 00:04:00 +5,E,E,-0.00001,100,1,1,1,2024-06-16 00:05:00 +6,F,F,-0.00001,100,0,1,1,2024-06-16 00:06:00 +7,G,G,-0.00002,100.000001,1,1,0,2024-06-16 00:07:00 diff --git a/spark/tests/resources/current/multiclass/dataset_for_hour.csv b/spark/tests/resources/current/multiclass/dataset_for_hour.csv new file mode 100644 index 0000000..953a649 --- /dev/null +++ b/spark/tests/resources/current/multiclass/dataset_for_hour.csv @@ -0,0 +1,11 @@ +cat1,cat2,num1,num2,prediction,target,datetime +A,X,1.0,1.4,CAT,CAT,2024-06-16 00:01:00 +B,X,1.5,100.0,DOG,DOG,2024-06-16 00:02:00 +A,Y,3.0,123.0,COW,COW,2024-06-16 01:03:00 +B,X,0.5,,CAT,COW,2024-06-16 01:04:00 +B,X,0.5,,DOG,CAT,2024-06-16 02:05:00 +B,X,,200.0,DOG,DOG,2024-06-16 02:06:00 +C,X,1.0,300.0,COW,CAT,2024-06-16 02:07:00 +A,X,1.0,499.0,COW,COW,2024-06-16 03:08:00 +A,X,1.0,499.0,CAT,CAT,2024-06-16 03:09:00 +A,X,1.0,499.0,CAT,CAT,2024-06-16 04:10:00 diff --git a/spark/tests/resources/current/multiclass/dataset_perfect_classes.csv b/spark/tests/resources/current/multiclass/dataset_perfect_classes.csv index a2b82dd..17e61f1 100644 --- a/spark/tests/resources/current/multiclass/dataset_perfect_classes.csv +++ b/spark/tests/resources/current/multiclass/dataset_perfect_classes.csv @@ -1,11 +1,11 @@ cat1,cat2,num1,num2,prediction,target,datetime -A,X,1.0,1.4,HEALTHY,HEALTHY,2024-06-16 00:01:00-05:00 -B,X,1.5,100.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:02:00-05:00 -A,Y,3.0,123.0,HEALTHY,HEALTHY,2024-06-16 00:03:00-05:00 -B,X,0.5,,UNKNOWN,UNKNOWN,2024-06-16 00:04:00-05:00 -B,X,0.5,,ORPHAN,ORPHAN,2024-06-16 00:05:00-05:00 -B,X,,200.0,HEALTHY,HEALTHY,2024-06-16 00:06:00-05:00 -C,X,1.0,300.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:07:00-05:00 -A,X,1.0,499.0,UNKNOWN,UNKNOWN,2024-06-16 00:08:00-05:00 -A,X,1.0,499.0,HEALTHY,HEALTHY,2024-06-16 00:09:00-05:00 -A,X,1.0,499.0,ORPHAN,ORPHAN,2024-06-16 00:10:00-05:00 \ No newline at end of file +A,X,1.0,1.4,HEALTHY,HEALTHY,2024-06-16 00:01:00 +B,X,1.5,100.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:02:00 +A,Y,3.0,123.0,HEALTHY,HEALTHY,2024-06-16 00:03:00 +B,X,0.5,,UNKNOWN,UNKNOWN,2024-06-16 00:04:00 +B,X,0.5,,ORPHAN,ORPHAN,2024-06-16 00:05:00 +B,X,,200.0,HEALTHY,HEALTHY,2024-06-16 00:06:00 +C,X,1.0,300.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:07:00 +A,X,1.0,499.0,UNKNOWN,UNKNOWN,2024-06-16 00:08:00 +A,X,1.0,499.0,HEALTHY,HEALTHY,2024-06-16 00:09:00 +A,X,1.0,499.0,ORPHAN,ORPHAN,2024-06-16 00:10:00 \ No newline at end of file diff --git a/spark/tests/resources/current/multiclass/dataset_target_int.csv b/spark/tests/resources/current/multiclass/dataset_target_int.csv index 6f231d8..8fafdf2 100644 --- a/spark/tests/resources/current/multiclass/dataset_target_int.csv +++ b/spark/tests/resources/current/multiclass/dataset_target_int.csv @@ -1,11 +1,11 @@ cat1,cat2,num1,num2,prediction,target,datetime -A,X,1.0,1.4,1,1,2024-06-16 00:01:00-05:00 -B,X,1.5,100.0,0,0,2024-06-16 00:02:00-05:00 -A,Y,3.0,123.0,1,1,2024-06-16 00:03:00-05:00 -B,X,0.5,,2,0,2024-06-16 00:04:00-05:00 -B,X,0.5,,3,2,2024-06-16 00:05:00-05:00 -B,X,,200.0,1,3,2024-06-16 00:06:00-05:00 -C,X,1.0,300.0,0,0,2024-06-16 00:07:00-05:00 -A,X,1.0,499.0,2,2,2024-06-16 00:08:00-05:00 -A,X,1.0,499.0,1,1,2024-06-16 00:09:00-05:00 -A,X,1.0,499.0,3,2,2024-06-16 00:10:00-05:00 \ No newline at end of file +A,X,1.0,1.4,1,1,2024-06-16 00:01:00 +B,X,1.5,100.0,0,0,2024-06-16 00:02:00 +A,Y,3.0,123.0,1,1,2024-06-16 00:03:00 +B,X,0.5,,2,0,2024-06-16 00:04:00 +B,X,0.5,,3,2,2024-06-16 00:05:00 +B,X,,200.0,1,3,2024-06-16 00:06:00 +C,X,1.0,300.0,0,0,2024-06-16 00:07:00 +A,X,1.0,499.0,2,2,2024-06-16 00:08:00 +A,X,1.0,499.0,1,1,2024-06-16 00:09:00 +A,X,1.0,499.0,3,2,2024-06-16 00:10:00 \ No newline at end of file diff --git a/spark/tests/resources/current/multiclass/dataset_target_string.csv b/spark/tests/resources/current/multiclass/dataset_target_string.csv index b3e618e..4d8b466 100644 --- a/spark/tests/resources/current/multiclass/dataset_target_string.csv +++ b/spark/tests/resources/current/multiclass/dataset_target_string.csv @@ -1,11 +1,11 @@ cat1,cat2,num1,num2,prediction,target,datetime -A,X,1.0,1.4,HEALTHY,HEALTHY,2024-06-16 00:01:00-05:00 -B,X,1.5,100.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:02:00-05:00 -A,Y,3.0,123.0,HEALTHY,HEALTHY,2024-06-16 00:03:00-05:00 -B,X,0.5,,UNKNOWN,UNHEALTHY,2024-06-16 00:04:00-05:00 -B,X,0.5,,ORPHAN,UNKNOWN,2024-06-16 00:05:00-05:00 -B,X,,200.0,HEALTHY,ORPHAN,2024-06-16 00:06:00-05:00 -C,X,1.0,300.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:07:00-05:00 -A,X,1.0,499.0,UNKNOWN,UNKNOWN,2024-06-16 00:08:00-05:00 -A,X,1.0,499.0,HEALTHY,HEALTHY,2024-06-16 00:09:00-05:00 -A,X,1.0,499.0,ORPHAN,UNKNOWN,2024-06-16 00:10:00-05:00 \ No newline at end of file +A,X,1.0,1.4,HEALTHY,HEALTHY,2024-06-16 00:01:00 +B,X,1.5,100.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:02:00 +A,Y,3.0,123.0,HEALTHY,HEALTHY,2024-06-16 00:03:00 +B,X,0.5,,UNKNOWN,UNHEALTHY,2024-06-16 00:04:00 +B,X,0.5,,ORPHAN,UNKNOWN,2024-06-16 00:05:00 +B,X,,200.0,HEALTHY,ORPHAN,2024-06-16 00:06:00 +C,X,1.0,300.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:07:00 +A,X,1.0,499.0,UNKNOWN,UNKNOWN,2024-06-16 00:08:00 +A,X,1.0,499.0,HEALTHY,HEALTHY,2024-06-16 00:09:00 +A,X,1.0,499.0,ORPHAN,UNKNOWN,2024-06-16 00:10:00 \ No newline at end of file diff --git a/spark/tests/resources/current/multiclass/dataset_target_string_missing_classes.csv b/spark/tests/resources/current/multiclass/dataset_target_string_missing_classes.csv new file mode 100644 index 0000000..8f1d269 --- /dev/null +++ b/spark/tests/resources/current/multiclass/dataset_target_string_missing_classes.csv @@ -0,0 +1,8 @@ +cat1,cat2,num1,num2,prediction,target,datetime +A,X,1.0,1.4,HEALTHY,HEALTHY,2024-06-16 00:01:00 +B,X,1.5,100.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:02:00 +A,Y,3.0,123.0,HEALTHY,HEALTHY,2024-06-16 00:03:00 +B,X,0.5,,ORPHAN,UNHEALTHY,2024-06-16 00:04:00 +C,X,1.0,300.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:07:00 +A,X,1.0,499.0,ORPHAN,UNKNOWN,2024-06-16 00:08:00 +A,X,1.0,499.0,HEALTHY,HEALTHY,2024-06-16 00:09:00 diff --git a/spark/tests/resources/reference/complete_dataset.csv b/spark/tests/resources/reference/complete_dataset.csv index c187969..8b2baad 100644 --- a/spark/tests/resources/reference/complete_dataset.csv +++ b/spark/tests/resources/reference/complete_dataset.csv @@ -1,8 +1,8 @@ id,cat1,cat2,num1,num2,prediction,prediction_proba,target,datetime -1,A,A,1,100,1,1,1,2024-06-16 00:01:00-05:00 -2,B,B,1,100,1,1,1,2024-06-16 00:02:00-05:00 -3,C,C,1,100,1,1,1,2024-06-16 00:03:00-05:00 -4,D,D,1,100,1,1,1,2024-06-16 00:04:00-05:00 -5,E,E,1,100,1,1,1,2024-06-16 00:05:00-05:00 -6,F,F,1,100,1,1,1,2024-06-16 00:06:00-05:00 -7,G,G,1,100,1,1,1,2024-06-16 00:07:00-05:00 \ No newline at end of file +1,A,A,1,100,1,1,1,2024-06-16 00:01:00 +2,B,B,1,100,1,1,1,2024-06-16 00:02:00 +3,C,C,1,100,1,1,1,2024-06-16 00:03:00 +4,D,D,1,100,1,1,1,2024-06-16 00:04:00 +5,E,E,1,100,1,1,1,2024-06-16 00:05:00 +6,F,F,1,100,1,1,1,2024-06-16 00:06:00 +7,G,G,1,100,1,1,1,2024-06-16 00:07:00 \ No newline at end of file diff --git a/spark/tests/resources/reference/dataset.csv b/spark/tests/resources/reference/dataset.csv index c8eae44..3788b87 100644 --- a/spark/tests/resources/reference/dataset.csv +++ b/spark/tests/resources/reference/dataset.csv @@ -1,11 +1,11 @@ cat1,cat2,num1,num2,prediction,prediction_proba,target,datetime -A,X,1.0,1.4,1,0.9,1,2024-06-16 00:01:00-05:00 -B,X,1.5,100.0,0,0.95,0,2024-06-16 00:02:00-05:00 -A,Y,3.0,123.0,1,1.0,1,2024-06-16 00:03:00-05:00 -B,X,0.5,,0,1.0,0,2024-06-16 00:04:00-05:00 -B,X,0.5,,0,1.0,0,2024-06-16 00:05:00-05:00 -B,X,,200.0,0,0.8,1,2024-06-16 00:06:00-05:00 -C,X,1.0,300.0,0,1.0,0,2024-06-16 00:07:00-05:00 -A,X,1.0,499.0,1,1.0,1,2024-06-16 00:08:00-05:00 -A,X,1.0,499.0,1,1.0,1,2024-06-16 00:09:00-05:00 -A,X,1.0,499.0,1,1.0,1,2024-06-16 00:10:00-05:00 +A,X,1.0,1.4,1,0.9,1,2024-06-16 00:01:00 +B,X,1.5,100.0,0,0.95,0,2024-06-16 00:02:00 +A,Y,3.0,123.0,1,1.0,1,2024-06-16 00:03:00 +B,X,0.5,,0,1.0,0,2024-06-16 00:04:00 +B,X,0.5,,0,1.0,0,2024-06-16 00:05:00 +B,X,,200.0,0,0.8,1,2024-06-16 00:06:00 +C,X,1.0,300.0,0,1.0,0,2024-06-16 00:07:00 +A,X,1.0,499.0,1,1.0,1,2024-06-16 00:08:00 +A,X,1.0,499.0,1,1.0,1,2024-06-16 00:09:00 +A,X,1.0,499.0,1,1.0,1,2024-06-16 00:10:00 diff --git a/spark/tests/resources/reference/dataset_bool_missing.csv b/spark/tests/resources/reference/dataset_bool_missing.csv index 99e46c5..9a90484 100644 --- a/spark/tests/resources/reference/dataset_bool_missing.csv +++ b/spark/tests/resources/reference/dataset_bool_missing.csv @@ -1,11 +1,11 @@ id,cat1,bool1,num1,num2,prediction,prediction_proba,target,datetime -1,A,true,1.0,1.4,1,0.9,1,2024-06-16 00:01:00-05:00 -2,B,true,1.5,100.0,0,0.95,0,2024-06-16 00:02:00-05:00 -3,A,false,3.0,123.0,1,1.0,1,2024-06-16 00:03:00-05:00 -4,B,true,0.5,,0,1.0,0,2024-06-16 00:04:00-05:00 -5,,true,0.5,,0,1.0,0,2024-06-16 00:05:00-05:00 -6,B,true,,200.0,0,0.8,1,2024-06-16 00:06:00-05:00 -7,C,,1.0,300.0,0,1.0,0,2024-06-16 00:07:00-05:00 -8,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:08:00-05:00 -9,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:09:00-05:00 -10,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:10:00-05:00 \ No newline at end of file +1,A,true,1.0,1.4,1,0.9,1,2024-06-16 00:01:00 +2,B,true,1.5,100.0,0,0.95,0,2024-06-16 00:02:00 +3,A,false,3.0,123.0,1,1.0,1,2024-06-16 00:03:00 +4,B,true,0.5,,0,1.0,0,2024-06-16 00:04:00 +5,,true,0.5,,0,1.0,0,2024-06-16 00:05:00 +6,B,true,,200.0,0,0.8,1,2024-06-16 00:06:00 +7,C,,1.0,300.0,0,1.0,0,2024-06-16 00:07:00 +8,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:08:00 +9,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:09:00 +10,A,true,1.0,499.0,1,1.0,1,2024-06-16 00:10:00 \ No newline at end of file diff --git a/spark/tests/resources/reference/dataset_cat_missing.csv b/spark/tests/resources/reference/dataset_cat_missing.csv index 8200dac..804fe61 100644 --- a/spark/tests/resources/reference/dataset_cat_missing.csv +++ b/spark/tests/resources/reference/dataset_cat_missing.csv @@ -1,11 +1,11 @@ id,cat1,cat2,num1,num2,prediction,prediction_proba,target,datetime -1,A,X,1.0,1.4,1,0.9,1,2024-06-16 00:01:00-05:00 -2,B,X,1.5,100.0,0,0.95,0,2024-06-16 00:02:00-05:00 -3,A,Y,3.0,123.0,1,1.0,1,2024-06-16 00:03:00-05:00 -4,B,X,0.5,,0,1.0,0,2024-06-16 00:04:00-05:00 -5,,X,0.5,,0,1.0,0,2024-06-16 00:05:00-05:00 -6,B,X,,200.0,0,0.8,1,2024-06-16 00:06:00-05:00 -7,C,,1.0,300.0,0,1.0,0,2024-06-16 00:07:00-05:00 -8,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:08:00-05:00 -9,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:09:00-05:00 -10,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:10:00-05:00 +1,A,X,1.0,1.4,1,0.9,1,2024-06-16 00:01:00 +2,B,X,1.5,100.0,0,0.95,0,2024-06-16 00:02:00 +3,A,Y,3.0,123.0,1,1.0,1,2024-06-16 00:03:00 +4,B,X,0.5,,0,1.0,0,2024-06-16 00:04:00 +5,,X,0.5,,0,1.0,0,2024-06-16 00:05:00 +6,B,X,,200.0,0,0.8,1,2024-06-16 00:06:00 +7,C,,1.0,300.0,0,1.0,0,2024-06-16 00:07:00 +8,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:08:00 +9,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:09:00 +10,A,X,1.0,499.0,1,1.0,1,2024-06-16 00:10:00 diff --git a/spark/tests/resources/reference/easy_dataset.csv b/spark/tests/resources/reference/easy_dataset.csv index 7c58f72..d6165bb 100644 --- a/spark/tests/resources/reference/easy_dataset.csv +++ b/spark/tests/resources/reference/easy_dataset.csv @@ -1,8 +1,8 @@ id,cat1,cat2,num1,num2,prediction,prediction_proba,target,datetime -1,A,A,1,100,1,1,1,2024-06-16 00:01:00-05:00 -2,B,B,1,100,1,1,1,2024-06-16 00:02:00-05:00 -3,C,C,1,100,1,1,1,2024-06-16 00:03:00-05:00 -4,D,D,1,100,1,1,1,2024-06-16 00:04:00-05:00 -5,E,E,1,100,1,1,1,2024-06-16 00:05:00-05:00 -6,F,F,1,100,0,1,1,2024-06-16 00:06:00-05:00 -7,G,G,1,100,1,1,0,2024-06-16 00:07:00-05:00 +1,A,A,1,100,1,1,1,2024-06-16 00:01:00 +2,B,B,1,100,1,1,1,2024-06-16 00:02:00 +3,C,C,1,100,1,1,1,2024-06-16 00:03:00 +4,D,D,1,100,1,1,1,2024-06-16 00:04:00 +5,E,E,1,100,1,1,1,2024-06-16 00:05:00 +6,F,F,1,100,0,1,1,2024-06-16 00:06:00 +7,G,G,1,100,1,1,0,2024-06-16 00:07:00 diff --git a/spark/tests/resources/reference/multiclass/dataset_for_hour.csv b/spark/tests/resources/reference/multiclass/dataset_for_hour.csv new file mode 100644 index 0000000..953a649 --- /dev/null +++ b/spark/tests/resources/reference/multiclass/dataset_for_hour.csv @@ -0,0 +1,11 @@ +cat1,cat2,num1,num2,prediction,target,datetime +A,X,1.0,1.4,CAT,CAT,2024-06-16 00:01:00 +B,X,1.5,100.0,DOG,DOG,2024-06-16 00:02:00 +A,Y,3.0,123.0,COW,COW,2024-06-16 01:03:00 +B,X,0.5,,CAT,COW,2024-06-16 01:04:00 +B,X,0.5,,DOG,CAT,2024-06-16 02:05:00 +B,X,,200.0,DOG,DOG,2024-06-16 02:06:00 +C,X,1.0,300.0,COW,CAT,2024-06-16 02:07:00 +A,X,1.0,499.0,COW,COW,2024-06-16 03:08:00 +A,X,1.0,499.0,CAT,CAT,2024-06-16 03:09:00 +A,X,1.0,499.0,CAT,CAT,2024-06-16 04:10:00 diff --git a/spark/tests/resources/reference/multiclass/dataset_perfect_classes.csv b/spark/tests/resources/reference/multiclass/dataset_perfect_classes.csv index a2b82dd..17e61f1 100644 --- a/spark/tests/resources/reference/multiclass/dataset_perfect_classes.csv +++ b/spark/tests/resources/reference/multiclass/dataset_perfect_classes.csv @@ -1,11 +1,11 @@ cat1,cat2,num1,num2,prediction,target,datetime -A,X,1.0,1.4,HEALTHY,HEALTHY,2024-06-16 00:01:00-05:00 -B,X,1.5,100.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:02:00-05:00 -A,Y,3.0,123.0,HEALTHY,HEALTHY,2024-06-16 00:03:00-05:00 -B,X,0.5,,UNKNOWN,UNKNOWN,2024-06-16 00:04:00-05:00 -B,X,0.5,,ORPHAN,ORPHAN,2024-06-16 00:05:00-05:00 -B,X,,200.0,HEALTHY,HEALTHY,2024-06-16 00:06:00-05:00 -C,X,1.0,300.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:07:00-05:00 -A,X,1.0,499.0,UNKNOWN,UNKNOWN,2024-06-16 00:08:00-05:00 -A,X,1.0,499.0,HEALTHY,HEALTHY,2024-06-16 00:09:00-05:00 -A,X,1.0,499.0,ORPHAN,ORPHAN,2024-06-16 00:10:00-05:00 \ No newline at end of file +A,X,1.0,1.4,HEALTHY,HEALTHY,2024-06-16 00:01:00 +B,X,1.5,100.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:02:00 +A,Y,3.0,123.0,HEALTHY,HEALTHY,2024-06-16 00:03:00 +B,X,0.5,,UNKNOWN,UNKNOWN,2024-06-16 00:04:00 +B,X,0.5,,ORPHAN,ORPHAN,2024-06-16 00:05:00 +B,X,,200.0,HEALTHY,HEALTHY,2024-06-16 00:06:00 +C,X,1.0,300.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:07:00 +A,X,1.0,499.0,UNKNOWN,UNKNOWN,2024-06-16 00:08:00 +A,X,1.0,499.0,HEALTHY,HEALTHY,2024-06-16 00:09:00 +A,X,1.0,499.0,ORPHAN,ORPHAN,2024-06-16 00:10:00 \ No newline at end of file diff --git a/spark/tests/resources/reference/multiclass/dataset_target_int.csv b/spark/tests/resources/reference/multiclass/dataset_target_int.csv index 6f231d8..8fafdf2 100644 --- a/spark/tests/resources/reference/multiclass/dataset_target_int.csv +++ b/spark/tests/resources/reference/multiclass/dataset_target_int.csv @@ -1,11 +1,11 @@ cat1,cat2,num1,num2,prediction,target,datetime -A,X,1.0,1.4,1,1,2024-06-16 00:01:00-05:00 -B,X,1.5,100.0,0,0,2024-06-16 00:02:00-05:00 -A,Y,3.0,123.0,1,1,2024-06-16 00:03:00-05:00 -B,X,0.5,,2,0,2024-06-16 00:04:00-05:00 -B,X,0.5,,3,2,2024-06-16 00:05:00-05:00 -B,X,,200.0,1,3,2024-06-16 00:06:00-05:00 -C,X,1.0,300.0,0,0,2024-06-16 00:07:00-05:00 -A,X,1.0,499.0,2,2,2024-06-16 00:08:00-05:00 -A,X,1.0,499.0,1,1,2024-06-16 00:09:00-05:00 -A,X,1.0,499.0,3,2,2024-06-16 00:10:00-05:00 \ No newline at end of file +A,X,1.0,1.4,1,1,2024-06-16 00:01:00 +B,X,1.5,100.0,0,0,2024-06-16 00:02:00 +A,Y,3.0,123.0,1,1,2024-06-16 00:03:00 +B,X,0.5,,2,0,2024-06-16 00:04:00 +B,X,0.5,,3,2,2024-06-16 00:05:00 +B,X,,200.0,1,3,2024-06-16 00:06:00 +C,X,1.0,300.0,0,0,2024-06-16 00:07:00 +A,X,1.0,499.0,2,2,2024-06-16 00:08:00 +A,X,1.0,499.0,1,1,2024-06-16 00:09:00 +A,X,1.0,499.0,3,2,2024-06-16 00:10:00 \ No newline at end of file diff --git a/spark/tests/resources/reference/multiclass/dataset_target_string.csv b/spark/tests/resources/reference/multiclass/dataset_target_string.csv index b3e618e..4d8b466 100644 --- a/spark/tests/resources/reference/multiclass/dataset_target_string.csv +++ b/spark/tests/resources/reference/multiclass/dataset_target_string.csv @@ -1,11 +1,11 @@ cat1,cat2,num1,num2,prediction,target,datetime -A,X,1.0,1.4,HEALTHY,HEALTHY,2024-06-16 00:01:00-05:00 -B,X,1.5,100.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:02:00-05:00 -A,Y,3.0,123.0,HEALTHY,HEALTHY,2024-06-16 00:03:00-05:00 -B,X,0.5,,UNKNOWN,UNHEALTHY,2024-06-16 00:04:00-05:00 -B,X,0.5,,ORPHAN,UNKNOWN,2024-06-16 00:05:00-05:00 -B,X,,200.0,HEALTHY,ORPHAN,2024-06-16 00:06:00-05:00 -C,X,1.0,300.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:07:00-05:00 -A,X,1.0,499.0,UNKNOWN,UNKNOWN,2024-06-16 00:08:00-05:00 -A,X,1.0,499.0,HEALTHY,HEALTHY,2024-06-16 00:09:00-05:00 -A,X,1.0,499.0,ORPHAN,UNKNOWN,2024-06-16 00:10:00-05:00 \ No newline at end of file +A,X,1.0,1.4,HEALTHY,HEALTHY,2024-06-16 00:01:00 +B,X,1.5,100.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:02:00 +A,Y,3.0,123.0,HEALTHY,HEALTHY,2024-06-16 00:03:00 +B,X,0.5,,UNKNOWN,UNHEALTHY,2024-06-16 00:04:00 +B,X,0.5,,ORPHAN,UNKNOWN,2024-06-16 00:05:00 +B,X,,200.0,HEALTHY,ORPHAN,2024-06-16 00:06:00 +C,X,1.0,300.0,UNHEALTHY,UNHEALTHY,2024-06-16 00:07:00 +A,X,1.0,499.0,UNKNOWN,UNKNOWN,2024-06-16 00:08:00 +A,X,1.0,499.0,HEALTHY,HEALTHY,2024-06-16 00:09:00 +A,X,1.0,499.0,ORPHAN,UNKNOWN,2024-06-16 00:10:00 \ No newline at end of file diff --git a/spark/tests/resources/reference/reference_joined.csv b/spark/tests/resources/reference/reference_joined.csv index bd02f0e..2e6abfd 100644 --- a/spark/tests/resources/reference/reference_joined.csv +++ b/spark/tests/resources/reference/reference_joined.csv @@ -1,239 +1,239 @@ age,chest_pain_type,resting_blood_pressure,cholesterol,fasting_blood_sugar,resting_ecg,max_heart_rate_achieved,exercise_induced_angina,st_depression,st_slope,prediction,prediction_proba,target,datetime,sex -61,4,160,0,1,1,145,0,1.0,2,1,0.94,1,2024-06-16 06:30:00-05:00,M -54,3,120,237,0,0,150,1,1.5,2,0,0.54,1,2024-06-16 04:07:00-05:00,M -55,2,122,320,0,0,155,0,0.0,1,0,0.97,0,2024-06-16 04:20:00-05:00,F -56,4,155,342,1,0,150,1,3.0,2,1,0.97,1,2024-06-16 02:35:00-05:00,M -60,4,150,258,0,2,157,0,2.6,2,1,0.85,1,2024-06-16 16:24:00-05:00,F -56,4,125,0,1,0,103,1,1.0,2,1,0.99,1,2024-06-16 06:53:00-05:00,M -43,3,130,315,0,0,162,0,1.9,1,0,0.97,0,2024-06-16 13:22:00-05:00,M -54,4,150,365,0,1,134,0,1.0,1,0,0.61,0,2024-06-16 00:58:00-05:00,M -46,3,142,177,0,2,160,1,1.4,3,0,0.82,0,2024-06-16 12:32:00-05:00,F -52,3,172,199,1,0,162,0,0.5,1,0,0.99,0,2024-06-16 15:01:00-05:00,M -60,3,120,178,1,0,96,0,0.0,1,0,0.98,0,2024-06-16 18:51:00-05:00,F -50,3,140,233,0,0,163,0,0.6,2,1,0.83,1,2024-06-16 12:39:00-05:00,M -64,3,140,313,0,0,133,0,0.2,1,0,0.94,0,2024-06-16 11:55:00-05:00,F -68,3,120,211,0,2,115,0,1.5,2,0,0.61,0,2024-06-16 13:10:00-05:00,F -50,4,150,243,0,2,128,0,2.6,2,1,0.91,1,2024-06-16 11:46:00-05:00,M -57,2,140,265,0,1,145,1,1.0,2,1,0.71,1,2024-06-16 01:10:00-05:00,M -61,4,120,282,0,1,135,1,4.0,3,1,0.91,1,2024-06-16 08:42:00-05:00,M -60,4,140,293,0,2,170,0,1.2,2,1,0.82,1,2024-06-16 17:25:00-05:00,M -54,2,192,283,0,2,195,0,0.0,1,0,0.93,1,2024-06-16 12:41:00-05:00,M -65,4,170,263,1,0,112,1,2.0,2,1,0.92,1,2024-06-16 01:26:00-05:00,M -70,3,160,269,0,0,112,1,2.9,2,1,0.93,1,2024-06-16 17:37:00-05:00,M -47,4,110,275,0,2,118,1,1.0,2,1,0.95,1,2024-06-16 18:54:00-05:00,M -51,3,135,160,0,0,150,0,2.0,2,1,0.58,1,2024-06-16 04:37:00-05:00,M -56,4,115,0,1,1,82,0,-1.0,1,1,0.67,1,2024-06-16 05:32:00-05:00,M -54,4,125,216,0,0,140,0,0.0,2,1,0.79,1,2024-06-16 03:30:00-05:00,M -52,4,160,331,0,0,94,1,2.5,2,1,0.99,1,2024-06-16 02:20:00-05:00,M -56,1,120,193,0,2,162,0,1.9,2,0,0.86,0,2024-06-16 18:22:00-05:00,M -58,4,128,259,0,2,130,1,3.0,2,1,0.97,1,2024-06-16 18:13:00-05:00,M -59,4,174,249,0,0,143,1,0.0,2,1,0.88,1,2024-06-16 11:15:00-05:00,F -74,1,145,216,1,0,116,1,1.8,2,1,0.69,1,2024-06-16 09:14:00-05:00,M -43,4,140,0,0,1,140,1,0.5,1,1,0.57,1,2024-06-16 05:52:00-05:00,M -60,3,140,185,0,2,155,0,3.0,2,1,0.9,1,2024-06-16 14:25:00-05:00,M -54,3,133,203,0,1,137,0,0.2,1,0,0.98,0,2024-06-16 09:21:00-05:00,M -54,4,130,0,1,0,110,1,3.0,2,1,0.94,1,2024-06-16 06:55:00-05:00,M -48,4,160,193,0,0,102,1,3.0,2,1,0.97,1,2024-06-16 04:00:00-05:00,M -66,3,120,0,0,1,120,0,-0.5,1,0,0.56,0,2024-06-16 07:02:00-05:00,M -44,2,150,288,0,0,150,1,3.0,2,1,0.67,1,2024-06-16 00:23:00-05:00,M -52,4,130,0,1,0,120,0,0.0,2,1,1.0,1,2024-06-16 05:44:00-05:00,M -41,4,110,289,0,0,170,0,0.0,2,1,0.81,1,2024-06-16 00:49:00-05:00,M -64,4,110,0,1,0,114,1,1.3,3,1,0.93,1,2024-06-16 05:42:00-05:00,M -35,2,110,257,0,0,140,0,0.0,2,1,0.52,1,2024-06-16 03:28:00-05:00,M -45,2,128,308,0,2,170,0,0.0,1,0,0.99,0,2024-06-16 17:15:00-05:00,M -38,4,105,0,1,0,166,0,2.8,1,1,0.56,1,2024-06-16 05:23:00-05:00,F -34,2,150,214,0,1,168,0,0.0,1,0,0.98,0,2024-06-16 01:47:00-05:00,M -42,4,136,315,0,0,125,1,1.8,2,1,0.99,1,2024-06-16 11:07:00-05:00,M -55,4,180,327,0,1,117,1,3.4,2,1,0.91,1,2024-06-16 12:07:00-05:00,F -57,4,130,311,1,1,148,1,2.0,2,1,0.94,1,2024-06-16 07:59:00-05:00,M -54,4,136,220,0,0,140,1,3.0,2,1,0.99,1,2024-06-16 08:10:00-05:00,M -58,4,130,0,0,1,100,1,1.0,2,1,0.91,1,2024-06-16 05:54:00-05:00,M -62,4,120,267,0,0,99,1,1.8,2,1,0.98,1,2024-06-16 15:59:00-05:00,M -63,4,170,177,0,0,84,1,2.5,3,1,0.99,1,2024-06-16 07:13:00-05:00,M -55,3,133,185,0,1,136,0,0.2,1,0,0.99,0,2024-06-16 09:58:00-05:00,M -47,4,160,0,0,0,124,1,0.0,2,1,0.97,1,2024-06-16 06:02:00-05:00,M -52,1,118,186,0,2,190,0,0.0,2,0,0.87,0,2024-06-16 16:39:00-05:00,M -55,4,158,217,0,0,110,1,2.5,2,1,0.99,1,2024-06-16 09:30:00-05:00,M -60,4,135,0,0,0,63,1,0.5,1,1,0.65,1,2024-06-16 06:11:00-05:00,M -57,4,128,303,0,2,159,0,0.0,1,0,0.73,0,2024-06-16 10:31:00-05:00,F -51,3,120,295,0,2,157,0,0.6,1,0,0.99,0,2024-06-16 19:00:00-05:00,F -47,3,138,257,0,2,156,0,0.0,1,0,0.86,0,2024-06-16 14:15:00-05:00,M -38,4,150,0,1,0,120,1,0.7,2,1,0.98,1,2024-06-16 06:34:00-05:00,M -52,4,130,180,0,0,140,1,1.5,2,1,0.88,0,2024-06-16 00:54:00-05:00,F -47,3,130,235,0,0,145,0,2.0,2,0,0.6,0,2024-06-16 04:30:00-05:00,F -54,3,120,217,0,0,137,0,0.0,1,0,1.0,0,2024-06-16 03:38:00-05:00,M -58,3,105,240,0,2,154,1,0.6,2,0,0.79,0,2024-06-16 17:11:00-05:00,M -31,4,120,270,0,0,153,1,1.5,2,1,0.78,1,2024-06-16 00:56:00-05:00,M -52,3,122,0,0,0,110,1,2.0,3,1,0.84,1,2024-06-16 07:31:00-05:00,M -50,2,120,244,0,0,162,0,1.1,1,0,0.98,0,2024-06-16 11:56:00-05:00,F -52,4,128,204,1,0,156,1,1.0,2,1,0.86,1,2024-06-16 19:13:00-05:00,M -67,4,100,299,0,2,125,1,0.9,2,1,0.91,1,2024-06-16 13:09:00-05:00,M -63,4,150,407,0,2,154,0,4.0,2,1,0.78,1,2024-06-16 10:26:00-05:00,F -56,2,120,236,0,0,178,0,0.8,1,0,0.96,0,2024-06-16 14:52:00-05:00,M -51,3,94,227,0,0,154,1,0.0,1,0,0.72,0,2024-06-16 16:58:00-05:00,M -64,4,143,306,1,1,115,1,1.8,2,1,0.99,1,2024-06-16 08:52:00-05:00,M -47,3,155,0,0,0,118,1,1.0,2,1,0.93,1,2024-06-16 06:17:00-05:00,M -51,4,140,299,0,0,173,1,1.6,1,1,0.82,1,2024-06-16 13:40:00-05:00,M -58,1,150,283,1,2,162,0,1.0,1,0,0.8,0,2024-06-16 15:08:00-05:00,F -57,4,110,201,0,0,126,1,1.5,2,0,0.64,0,2024-06-16 18:57:00-05:00,M -54,4,110,239,0,0,126,1,2.8,2,1,0.9,1,2024-06-16 16:54:00-05:00,M -53,4,120,246,0,0,116,1,0.0,2,1,0.94,1,2024-06-16 03:19:00-05:00,M -55,4,122,223,1,1,100,0,0.0,2,1,0.96,1,2024-06-16 10:14:00-05:00,M -38,2,140,297,0,0,150,0,0.0,1,0,0.99,0,2024-06-16 01:53:00-05:00,M -68,3,118,277,0,0,151,0,1.0,1,0,0.87,0,2024-06-16 17:26:00-05:00,M -39,2,190,241,0,0,106,0,0.0,1,0,0.95,0,2024-06-16 01:49:00-05:00,M -51,4,130,179,0,0,100,0,0.0,1,0,0.93,0,2024-06-16 01:41:00-05:00,M -49,3,140,187,0,0,172,0,0.0,1,0,0.99,0,2024-06-16 02:54:00-05:00,M -54,2,120,238,0,0,154,0,0.0,1,0,0.96,0,2024-06-16 01:21:00-05:00,M -59,2,140,221,0,0,164,1,0.0,1,0,0.95,0,2024-06-16 11:47:00-05:00,M -37,3,118,240,0,2,165,0,1.0,2,0,0.78,0,2024-06-16 09:48:00-05:00,M -56,3,155,0,0,1,99,0,0.0,2,1,0.7,1,2024-06-16 06:03:00-05:00,M -62,3,160,0,0,0,72,1,0.0,2,1,0.95,1,2024-06-16 06:01:00-05:00,M -38,2,120,275,0,0,129,0,0.0,1,0,0.98,0,2024-06-16 03:53:00-05:00,F -45,2,180,295,0,0,180,0,0.0,1,0,0.97,0,2024-06-16 04:35:00-05:00,F -67,4,160,286,0,2,108,1,1.5,2,1,1.0,1,2024-06-16 14:48:00-05:00,M -44,4,130,290,0,0,100,1,2.0,2,1,0.97,1,2024-06-16 02:38:00-05:00,M -48,2,133,308,0,1,156,0,2.0,1,0,0.94,0,2024-06-16 04:49:00-05:00,F -54,2,160,305,0,0,175,0,0.0,1,0,0.97,0,2024-06-16 04:26:00-05:00,M -54,2,120,230,1,0,140,0,0.0,1,0,1.0,0,2024-06-16 02:08:00-05:00,F -54,2,192,283,0,2,195,0,0.0,1,0,0.93,1,2024-06-16 17:55:00-05:00,M -61,4,130,0,1,0,77,0,2.5,2,1,0.98,1,2024-06-16 05:19:00-05:00,M -49,1,130,0,0,1,145,0,3.0,2,1,0.63,1,2024-06-16 08:13:00-05:00,M -44,2,130,219,0,2,188,0,0.0,1,0,0.99,0,2024-06-16 14:28:00-05:00,M -59,1,160,273,0,2,125,0,0.0,1,1,0.67,1,2024-06-16 10:54:00-05:00,M -54,4,130,0,0,1,117,1,1.4,2,1,0.96,1,2024-06-16 07:27:00-05:00,M -55,3,136,245,1,1,131,1,1.2,2,1,0.83,1,2024-06-16 08:26:00-05:00,M -47,3,140,193,0,0,145,1,1.0,2,1,0.66,1,2024-06-16 04:25:00-05:00,M -42,2,150,268,0,0,136,0,0.0,1,0,1.0,0,2024-06-16 03:18:00-05:00,M -67,4,106,223,0,0,142,0,0.3,1,0,0.92,0,2024-06-16 19:03:00-05:00,F -54,3,110,214,0,0,158,0,1.6,2,0,0.94,0,2024-06-16 17:29:00-05:00,F -43,4,120,175,0,0,120,1,1.0,2,1,0.96,1,2024-06-16 00:44:00-05:00,M -50,4,120,0,0,1,156,1,0.0,1,1,0.73,1,2024-06-16 06:20:00-05:00,M -49,2,134,271,0,0,162,0,0.0,2,0,0.81,0,2024-06-16 18:45:00-05:00,F -64,4,134,273,0,0,102,1,4.0,3,1,0.98,1,2024-06-16 09:20:00-05:00,M -51,3,140,308,0,2,142,0,1.5,1,0,0.94,0,2024-06-16 14:21:00-05:00,F -67,4,125,254,1,0,163,0,0.2,2,1,0.9,1,2024-06-16 15:58:00-05:00,M -64,4,120,0,1,1,106,0,2.0,2,1,0.92,1,2024-06-16 07:18:00-05:00,M -45,4,142,309,0,2,147,1,0.0,2,1,0.71,1,2024-06-16 11:32:00-05:00,M -56,4,125,249,1,2,144,1,1.2,2,1,0.98,1,2024-06-16 12:34:00-05:00,M -38,3,100,0,0,0,179,0,-1.1,1,0,0.75,0,2024-06-16 05:33:00-05:00,M -47,4,120,205,0,0,98,1,2.0,2,1,0.79,1,2024-06-16 00:51:00-05:00,F -63,4,124,197,0,0,136,1,0.0,2,1,0.66,1,2024-06-16 19:41:00-05:00,F -61,4,105,0,1,0,110,1,1.5,1,1,0.84,1,2024-06-16 04:56:00-05:00,M -58,4,128,259,0,2,130,1,3.0,2,1,0.97,1,2024-06-16 13:00:00-05:00,M -41,2,110,235,0,0,153,0,0.0,1,0,0.95,0,2024-06-16 12:08:00-05:00,M -43,4,110,211,0,0,161,0,0.0,1,0,0.78,0,2024-06-16 13:01:00-05:00,M -74,2,120,269,0,2,121,1,0.2,1,0,0.88,0,2024-06-16 10:21:00-05:00,F -64,4,150,193,0,1,135,1,0.5,2,1,0.79,1,2024-06-16 09:33:00-05:00,M -55,2,160,292,1,0,143,1,2.0,2,1,0.64,1,2024-06-16 04:03:00-05:00,M -63,3,133,0,0,2,120,1,1.0,2,1,0.86,1,2024-06-16 07:10:00-05:00,M -70,4,130,322,0,2,109,0,2.4,2,1,0.9,1,2024-06-16 10:17:00-05:00,M -48,2,140,238,0,0,118,0,0.0,1,0,0.99,0,2024-06-16 02:48:00-05:00,M -58,3,140,179,0,0,160,0,0.0,1,0,0.82,0,2024-06-16 02:50:00-05:00,M -29,2,140,263,0,0,170,0,0.0,1,0,1.0,0,2024-06-16 03:40:00-05:00,M -38,3,138,175,0,0,173,0,0.0,1,0,1.0,0,2024-06-16 19:49:00-05:00,M -62,4,120,220,0,1,86,0,0.0,1,0,0.78,0,2024-06-16 07:12:00-05:00,M -53,2,120,0,0,0,95,0,0.0,2,0,0.55,1,2024-06-16 05:58:00-05:00,M -65,4,120,177,0,0,140,0,0.4,1,0,0.71,0,2024-06-16 15:38:00-05:00,M -44,3,108,141,0,0,175,0,0.6,2,0,0.86,0,2024-06-16 11:57:00-05:00,F -53,3,130,197,1,2,152,0,1.2,3,0,0.87,0,2024-06-16 15:36:00-05:00,M -36,2,120,166,0,0,180,0,0.0,1,0,1.0,0,2024-06-16 04:50:00-05:00,M -63,4,110,252,0,1,140,1,2.0,2,1,0.87,1,2024-06-16 08:54:00-05:00,M -49,3,131,142,0,0,127,1,1.5,2,1,0.86,1,2024-06-16 08:49:00-05:00,M -69,1,140,239,0,0,151,0,1.8,1,0,0.93,0,2024-06-16 15:17:00-05:00,F -57,4,110,0,1,1,131,1,1.4,1,1,0.73,1,2024-06-16 04:58:00-05:00,M -40,4,152,223,0,0,181,0,0.0,1,1,0.65,1,2024-06-16 19:15:00-05:00,M -50,4,150,243,0,2,128,0,2.6,2,1,0.91,1,2024-06-16 15:34:00-05:00,M -43,4,132,341,1,2,136,1,3.0,2,1,0.95,1,2024-06-16 16:40:00-05:00,F -38,4,110,190,0,0,150,1,1.0,2,1,0.76,1,2024-06-16 02:36:00-05:00,M -57,4,152,274,0,0,88,1,1.2,2,1,1.0,1,2024-06-16 17:42:00-05:00,M -68,3,180,274,1,2,150,1,1.6,2,1,0.88,1,2024-06-16 16:10:00-05:00,M -66,1,150,226,0,0,114,0,2.6,3,0,0.8,0,2024-06-16 12:15:00-05:00,F -28,2,130,132,0,2,185,0,0.0,1,0,0.9,0,2024-06-16 03:29:00-05:00,M -34,2,118,210,0,0,192,0,0.7,1,0,1.0,0,2024-06-16 12:35:00-05:00,F -43,2,142,207,0,0,138,0,0.0,1,0,0.99,0,2024-06-16 01:36:00-05:00,M -50,4,110,254,0,2,159,0,0.0,1,0,0.88,0,2024-06-16 18:07:00-05:00,F -56,3,130,167,0,0,114,0,0.0,1,0,1.0,0,2024-06-16 00:31:00-05:00,M -51,3,94,227,0,0,154,1,0.0,1,0,0.72,0,2024-06-16 13:54:00-05:00,M -65,4,135,254,0,2,127,0,2.8,2,1,0.97,1,2024-06-16 13:58:00-05:00,M -58,4,132,458,1,0,69,0,1.0,3,1,0.92,0,2024-06-16 08:17:00-05:00,M -68,3,120,211,0,2,115,0,1.5,2,0,0.61,0,2024-06-16 18:01:00-05:00,F -54,3,135,304,1,0,170,0,0.0,1,0,0.98,0,2024-06-16 12:10:00-05:00,F -54,3,160,201,0,0,163,0,0.0,1,0,0.99,0,2024-06-16 18:41:00-05:00,F -49,2,110,208,0,0,160,0,0.0,1,0,1.0,0,2024-06-16 02:43:00-05:00,F -51,3,110,190,0,0,120,0,0.0,1,0,1.0,0,2024-06-16 04:46:00-05:00,F -43,1,120,291,0,1,155,0,0.0,2,0,0.56,1,2024-06-16 01:28:00-05:00,M -64,4,130,258,1,2,130,0,0.0,2,1,0.77,1,2024-06-16 09:54:00-05:00,M -57,4,120,354,0,0,163,1,0.6,1,0,0.63,0,2024-06-16 14:54:00-05:00,F -43,2,120,266,0,0,118,0,0.0,1,0,0.99,0,2024-06-16 04:04:00-05:00,F -50,2,170,209,0,1,116,0,0.0,1,0,0.96,0,2024-06-16 03:12:00-05:00,M -52,2,128,205,1,0,184,0,0.0,1,0,1.0,0,2024-06-16 17:09:00-05:00,M -55,3,120,0,0,1,125,1,2.5,2,1,0.91,1,2024-06-16 07:47:00-05:00,M -62,3,130,231,0,0,146,0,1.8,2,0,0.62,0,2024-06-16 16:19:00-05:00,M -63,2,139,217,1,1,128,1,1.2,2,1,0.81,1,2024-06-16 08:06:00-05:00,M -61,4,150,0,0,0,117,1,2.0,2,1,0.99,1,2024-06-16 06:07:00-05:00,M -52,1,152,298,1,0,178,0,1.2,2,1,0.67,0,2024-06-16 13:42:00-05:00,M -67,4,140,219,0,1,122,1,2.0,2,1,1.0,1,2024-06-16 09:49:00-05:00,M -57,4,122,264,0,2,100,0,0.0,2,1,0.81,1,2024-06-16 09:57:00-05:00,M -43,4,132,247,1,2,143,1,0.1,2,1,0.87,1,2024-06-16 17:59:00-05:00,M -37,3,130,211,0,0,142,0,0.0,1,0,1.0,0,2024-06-16 00:10:00-05:00,F -65,1,138,282,1,2,174,0,1.4,2,1,0.77,1,2024-06-16 16:51:00-05:00,M -67,4,146,369,0,0,110,1,1.9,2,1,1.0,1,2024-06-16 09:38:00-05:00,M -57,3,128,229,0,2,150,0,0.4,2,1,0.82,1,2024-06-16 16:34:00-05:00,M -52,4,112,342,0,1,96,1,1.0,2,1,0.72,1,2024-06-16 00:59:00-05:00,M -44,4,112,290,0,2,153,0,0.0,1,1,0.87,1,2024-06-16 14:29:00-05:00,M -37,4,130,315,0,0,158,0,0.0,1,0,0.76,0,2024-06-16 03:51:00-05:00,M -60,4,140,281,0,1,118,1,1.5,2,1,0.98,1,2024-06-16 07:33:00-05:00,M -40,4,125,0,1,0,165,0,0.0,2,1,0.98,1,2024-06-16 07:38:00-05:00,M -57,4,120,354,0,0,163,1,0.6,1,0,0.63,0,2024-06-16 11:28:00-05:00,F -59,4,138,271,0,2,182,0,0.0,1,0,0.89,0,2024-06-16 18:26:00-05:00,M -60,4,130,206,0,2,132,1,2.4,2,1,0.99,1,2024-06-16 15:11:00-05:00,M -67,3,152,277,0,0,172,0,0.0,1,0,0.96,0,2024-06-16 12:33:00-05:00,F -45,2,128,308,0,2,170,0,0.0,1,0,0.99,0,2024-06-16 12:54:00-05:00,M -41,4,150,171,0,0,128,1,1.5,2,1,0.97,0,2024-06-16 09:27:00-05:00,M -45,4,142,309,0,2,147,1,0.0,2,1,0.71,1,2024-06-16 18:12:00-05:00,M -56,4,130,283,1,2,103,1,1.6,3,1,0.99,1,2024-06-16 12:16:00-05:00,M -47,4,112,204,0,0,143,0,0.1,1,0,0.93,0,2024-06-16 18:33:00-05:00,M -60,4,130,253,0,0,144,1,1.4,1,1,0.88,1,2024-06-16 15:41:00-05:00,M -66,4,160,228,0,2,138,0,2.3,1,0,0.74,0,2024-06-16 19:18:00-05:00,M -58,3,132,224,0,2,173,0,3.2,1,1,0.8,1,2024-06-16 15:10:00-05:00,M -59,3,131,0,0,0,128,1,2.0,3,1,0.76,1,2024-06-16 07:45:00-05:00,M -66,4,120,302,0,2,151,0,0.4,2,0,0.77,0,2024-06-16 11:53:00-05:00,M -65,4,120,177,0,0,140,0,0.4,1,0,0.71,0,2024-06-16 10:22:00-05:00,M -53,2,130,0,0,1,120,0,0.7,3,1,0.59,0,2024-06-16 05:08:00-05:00,M -58,4,115,0,1,0,138,0,0.5,1,1,0.71,1,2024-06-16 05:09:00-05:00,M -66,4,112,261,0,0,140,0,1.5,1,1,0.52,1,2024-06-16 09:09:00-05:00,M -45,4,138,236,0,2,152,1,0.2,2,0,0.93,0,2024-06-16 13:05:00-05:00,F -67,3,115,564,0,2,160,0,1.6,2,0,0.86,0,2024-06-16 10:18:00-05:00,F -47,3,138,257,0,2,156,0,0.0,1,0,0.86,0,2024-06-16 16:13:00-05:00,M -44,2,120,220,0,0,170,0,0.0,1,0,1.0,0,2024-06-16 13:56:00-05:00,M -60,3,180,0,0,1,140,1,1.5,2,1,0.79,0,2024-06-16 07:04:00-05:00,M -58,4,125,300,0,2,171,0,0.0,1,1,0.78,1,2024-06-16 13:38:00-05:00,M -61,3,150,243,1,0,137,1,1.0,2,0,0.65,0,2024-06-16 14:33:00-05:00,M -52,2,160,196,0,0,165,0,0.0,1,0,0.97,0,2024-06-16 01:23:00-05:00,M -59,4,125,0,1,0,119,1,0.9,1,1,0.8,1,2024-06-16 05:28:00-05:00,M -53,4,130,264,0,2,143,0,0.4,2,0,0.85,0,2024-06-16 16:08:00-05:00,F -45,3,130,236,0,0,144,0,0.1,1,0,0.98,0,2024-06-16 08:46:00-05:00,M -42,3,120,228,0,0,152,1,1.5,2,0,0.53,0,2024-06-16 02:09:00-05:00,M -58,2,120,284,0,2,160,0,1.8,2,1,0.82,1,2024-06-16 14:39:00-05:00,M -59,4,110,0,1,0,94,0,0.0,2,1,0.98,1,2024-06-16 06:49:00-05:00,M -60,4,117,230,1,0,160,1,1.4,1,1,0.9,1,2024-06-16 15:18:00-05:00,M -54,4,130,202,1,0,112,1,2.0,2,1,0.93,1,2024-06-16 08:59:00-05:00,M -50,4,144,349,0,2,120,1,1.0,1,1,0.7,1,2024-06-16 08:43:00-05:00,M -55,4,116,186,1,1,102,0,0.0,2,1,0.96,1,2024-06-16 08:53:00-05:00,M -53,4,126,0,0,0,106,0,0.0,2,1,0.95,1,2024-06-16 07:51:00-05:00,M -39,3,140,321,0,2,182,0,0.0,1,0,1.0,0,2024-06-16 14:18:00-05:00,M -45,4,115,260,0,2,185,0,0.0,1,0,0.81,0,2024-06-16 14:09:00-05:00,M -32,4,118,529,0,0,130,0,0.0,2,1,0.79,1,2024-06-16 01:16:00-05:00,M -51,3,100,222,0,0,143,1,1.2,2,0,0.84,0,2024-06-16 12:03:00-05:00,M -54,4,140,166,0,0,118,1,0.0,2,1,0.9,1,2024-06-16 02:18:00-05:00,M -51,4,120,0,1,0,127,1,1.5,1,1,0.61,1,2024-06-16 05:06:00-05:00,F -65,4,150,225,0,2,114,0,1.0,2,1,0.81,1,2024-06-16 11:06:00-05:00,F -54,4,140,239,0,0,160,0,1.2,1,0,0.8,0,2024-06-16 15:04:00-05:00,M -46,4,120,277,0,0,125,1,1.0,2,1,0.87,1,2024-06-16 01:03:00-05:00,M -50,4,140,341,0,1,125,1,2.5,2,1,0.98,1,2024-06-16 02:21:00-05:00,M -59,4,125,222,0,0,135,1,2.5,3,1,0.92,1,2024-06-16 08:55:00-05:00,M -52,1,152,298,1,0,178,0,1.2,2,1,0.67,0,2024-06-16 17:17:00-05:00,M -55,2,135,250,0,2,161,0,1.4,2,0,0.95,0,2024-06-16 13:32:00-05:00,F -43,4,150,247,0,0,130,1,2.0,2,1,0.96,1,2024-06-16 02:19:00-05:00,M -55,4,120,226,0,2,127,1,1.7,3,1,0.97,1,2024-06-16 09:59:00-05:00,M +61,4,160,0,1,1,145,0,1.0,2,1,0.94,1,2024-06-16 06:30:00,M +54,3,120,237,0,0,150,1,1.5,2,0,0.54,1,2024-06-16 04:07:00,M +55,2,122,320,0,0,155,0,0.0,1,0,0.97,0,2024-06-16 04:20:00,F +56,4,155,342,1,0,150,1,3.0,2,1,0.97,1,2024-06-16 02:35:00,M +60,4,150,258,0,2,157,0,2.6,2,1,0.85,1,2024-06-16 16:24:00,F +56,4,125,0,1,0,103,1,1.0,2,1,0.99,1,2024-06-16 06:53:00,M +43,3,130,315,0,0,162,0,1.9,1,0,0.97,0,2024-06-16 13:22:00,M +54,4,150,365,0,1,134,0,1.0,1,0,0.61,0,2024-06-16 00:58:00,M +46,3,142,177,0,2,160,1,1.4,3,0,0.82,0,2024-06-16 12:32:00,F +52,3,172,199,1,0,162,0,0.5,1,0,0.99,0,2024-06-16 15:01:00,M +60,3,120,178,1,0,96,0,0.0,1,0,0.98,0,2024-06-16 18:51:00,F +50,3,140,233,0,0,163,0,0.6,2,1,0.83,1,2024-06-16 12:39:00,M +64,3,140,313,0,0,133,0,0.2,1,0,0.94,0,2024-06-16 11:55:00,F +68,3,120,211,0,2,115,0,1.5,2,0,0.61,0,2024-06-16 13:10:00,F +50,4,150,243,0,2,128,0,2.6,2,1,0.91,1,2024-06-16 11:46:00,M +57,2,140,265,0,1,145,1,1.0,2,1,0.71,1,2024-06-16 01:10:00,M +61,4,120,282,0,1,135,1,4.0,3,1,0.91,1,2024-06-16 08:42:00,M +60,4,140,293,0,2,170,0,1.2,2,1,0.82,1,2024-06-16 17:25:00,M +54,2,192,283,0,2,195,0,0.0,1,0,0.93,1,2024-06-16 12:41:00,M +65,4,170,263,1,0,112,1,2.0,2,1,0.92,1,2024-06-16 01:26:00,M +70,3,160,269,0,0,112,1,2.9,2,1,0.93,1,2024-06-16 17:37:00,M +47,4,110,275,0,2,118,1,1.0,2,1,0.95,1,2024-06-16 18:54:00,M +51,3,135,160,0,0,150,0,2.0,2,1,0.58,1,2024-06-16 04:37:00,M +56,4,115,0,1,1,82,0,-1.0,1,1,0.67,1,2024-06-16 05:32:00,M +54,4,125,216,0,0,140,0,0.0,2,1,0.79,1,2024-06-16 03:30:00,M +52,4,160,331,0,0,94,1,2.5,2,1,0.99,1,2024-06-16 02:20:00,M +56,1,120,193,0,2,162,0,1.9,2,0,0.86,0,2024-06-16 18:22:00,M +58,4,128,259,0,2,130,1,3.0,2,1,0.97,1,2024-06-16 18:13:00,M +59,4,174,249,0,0,143,1,0.0,2,1,0.88,1,2024-06-16 11:15:00,F +74,1,145,216,1,0,116,1,1.8,2,1,0.69,1,2024-06-16 09:14:00,M +43,4,140,0,0,1,140,1,0.5,1,1,0.57,1,2024-06-16 05:52:00,M +60,3,140,185,0,2,155,0,3.0,2,1,0.9,1,2024-06-16 14:25:00,M +54,3,133,203,0,1,137,0,0.2,1,0,0.98,0,2024-06-16 09:21:00,M +54,4,130,0,1,0,110,1,3.0,2,1,0.94,1,2024-06-16 06:55:00,M +48,4,160,193,0,0,102,1,3.0,2,1,0.97,1,2024-06-16 04:00:00,M +66,3,120,0,0,1,120,0,-0.5,1,0,0.56,0,2024-06-16 07:02:00,M +44,2,150,288,0,0,150,1,3.0,2,1,0.67,1,2024-06-16 00:23:00,M +52,4,130,0,1,0,120,0,0.0,2,1,1.0,1,2024-06-16 05:44:00,M +41,4,110,289,0,0,170,0,0.0,2,1,0.81,1,2024-06-16 00:49:00,M +64,4,110,0,1,0,114,1,1.3,3,1,0.93,1,2024-06-16 05:42:00,M +35,2,110,257,0,0,140,0,0.0,2,1,0.52,1,2024-06-16 03:28:00,M +45,2,128,308,0,2,170,0,0.0,1,0,0.99,0,2024-06-16 17:15:00,M +38,4,105,0,1,0,166,0,2.8,1,1,0.56,1,2024-06-16 05:23:00,F +34,2,150,214,0,1,168,0,0.0,1,0,0.98,0,2024-06-16 01:47:00,M +42,4,136,315,0,0,125,1,1.8,2,1,0.99,1,2024-06-16 11:07:00,M +55,4,180,327,0,1,117,1,3.4,2,1,0.91,1,2024-06-16 12:07:00,F +57,4,130,311,1,1,148,1,2.0,2,1,0.94,1,2024-06-16 07:59:00,M +54,4,136,220,0,0,140,1,3.0,2,1,0.99,1,2024-06-16 08:10:00,M +58,4,130,0,0,1,100,1,1.0,2,1,0.91,1,2024-06-16 05:54:00,M +62,4,120,267,0,0,99,1,1.8,2,1,0.98,1,2024-06-16 15:59:00,M +63,4,170,177,0,0,84,1,2.5,3,1,0.99,1,2024-06-16 07:13:00,M +55,3,133,185,0,1,136,0,0.2,1,0,0.99,0,2024-06-16 09:58:00,M +47,4,160,0,0,0,124,1,0.0,2,1,0.97,1,2024-06-16 06:02:00,M +52,1,118,186,0,2,190,0,0.0,2,0,0.87,0,2024-06-16 16:39:00,M +55,4,158,217,0,0,110,1,2.5,2,1,0.99,1,2024-06-16 09:30:00,M +60,4,135,0,0,0,63,1,0.5,1,1,0.65,1,2024-06-16 06:11:00,M +57,4,128,303,0,2,159,0,0.0,1,0,0.73,0,2024-06-16 10:31:00,F +51,3,120,295,0,2,157,0,0.6,1,0,0.99,0,2024-06-16 19:00:00,F +47,3,138,257,0,2,156,0,0.0,1,0,0.86,0,2024-06-16 14:15:00,M +38,4,150,0,1,0,120,1,0.7,2,1,0.98,1,2024-06-16 06:34:00,M +52,4,130,180,0,0,140,1,1.5,2,1,0.88,0,2024-06-16 00:54:00,F +47,3,130,235,0,0,145,0,2.0,2,0,0.6,0,2024-06-16 04:30:00,F +54,3,120,217,0,0,137,0,0.0,1,0,1.0,0,2024-06-16 03:38:00,M +58,3,105,240,0,2,154,1,0.6,2,0,0.79,0,2024-06-16 17:11:00,M +31,4,120,270,0,0,153,1,1.5,2,1,0.78,1,2024-06-16 00:56:00,M +52,3,122,0,0,0,110,1,2.0,3,1,0.84,1,2024-06-16 07:31:00,M +50,2,120,244,0,0,162,0,1.1,1,0,0.98,0,2024-06-16 11:56:00,F +52,4,128,204,1,0,156,1,1.0,2,1,0.86,1,2024-06-16 19:13:00,M +67,4,100,299,0,2,125,1,0.9,2,1,0.91,1,2024-06-16 13:09:00,M +63,4,150,407,0,2,154,0,4.0,2,1,0.78,1,2024-06-16 10:26:00,F +56,2,120,236,0,0,178,0,0.8,1,0,0.96,0,2024-06-16 14:52:00,M +51,3,94,227,0,0,154,1,0.0,1,0,0.72,0,2024-06-16 16:58:00,M +64,4,143,306,1,1,115,1,1.8,2,1,0.99,1,2024-06-16 08:52:00,M +47,3,155,0,0,0,118,1,1.0,2,1,0.93,1,2024-06-16 06:17:00,M +51,4,140,299,0,0,173,1,1.6,1,1,0.82,1,2024-06-16 13:40:00,M +58,1,150,283,1,2,162,0,1.0,1,0,0.8,0,2024-06-16 15:08:00,F +57,4,110,201,0,0,126,1,1.5,2,0,0.64,0,2024-06-16 18:57:00,M +54,4,110,239,0,0,126,1,2.8,2,1,0.9,1,2024-06-16 16:54:00,M +53,4,120,246,0,0,116,1,0.0,2,1,0.94,1,2024-06-16 03:19:00,M +55,4,122,223,1,1,100,0,0.0,2,1,0.96,1,2024-06-16 10:14:00,M +38,2,140,297,0,0,150,0,0.0,1,0,0.99,0,2024-06-16 01:53:00,M +68,3,118,277,0,0,151,0,1.0,1,0,0.87,0,2024-06-16 17:26:00,M +39,2,190,241,0,0,106,0,0.0,1,0,0.95,0,2024-06-16 01:49:00,M +51,4,130,179,0,0,100,0,0.0,1,0,0.93,0,2024-06-16 01:41:00,M +49,3,140,187,0,0,172,0,0.0,1,0,0.99,0,2024-06-16 02:54:00,M +54,2,120,238,0,0,154,0,0.0,1,0,0.96,0,2024-06-16 01:21:00,M +59,2,140,221,0,0,164,1,0.0,1,0,0.95,0,2024-06-16 11:47:00,M +37,3,118,240,0,2,165,0,1.0,2,0,0.78,0,2024-06-16 09:48:00,M +56,3,155,0,0,1,99,0,0.0,2,1,0.7,1,2024-06-16 06:03:00,M +62,3,160,0,0,0,72,1,0.0,2,1,0.95,1,2024-06-16 06:01:00,M +38,2,120,275,0,0,129,0,0.0,1,0,0.98,0,2024-06-16 03:53:00,F +45,2,180,295,0,0,180,0,0.0,1,0,0.97,0,2024-06-16 04:35:00,F +67,4,160,286,0,2,108,1,1.5,2,1,1.0,1,2024-06-16 14:48:00,M +44,4,130,290,0,0,100,1,2.0,2,1,0.97,1,2024-06-16 02:38:00,M +48,2,133,308,0,1,156,0,2.0,1,0,0.94,0,2024-06-16 04:49:00,F +54,2,160,305,0,0,175,0,0.0,1,0,0.97,0,2024-06-16 04:26:00,M +54,2,120,230,1,0,140,0,0.0,1,0,1.0,0,2024-06-16 02:08:00,F +54,2,192,283,0,2,195,0,0.0,1,0,0.93,1,2024-06-16 17:55:00,M +61,4,130,0,1,0,77,0,2.5,2,1,0.98,1,2024-06-16 05:19:00,M +49,1,130,0,0,1,145,0,3.0,2,1,0.63,1,2024-06-16 08:13:00,M +44,2,130,219,0,2,188,0,0.0,1,0,0.99,0,2024-06-16 14:28:00,M +59,1,160,273,0,2,125,0,0.0,1,1,0.67,1,2024-06-16 10:54:00,M +54,4,130,0,0,1,117,1,1.4,2,1,0.96,1,2024-06-16 07:27:00,M +55,3,136,245,1,1,131,1,1.2,2,1,0.83,1,2024-06-16 08:26:00,M +47,3,140,193,0,0,145,1,1.0,2,1,0.66,1,2024-06-16 04:25:00,M +42,2,150,268,0,0,136,0,0.0,1,0,1.0,0,2024-06-16 03:18:00,M +67,4,106,223,0,0,142,0,0.3,1,0,0.92,0,2024-06-16 19:03:00,F +54,3,110,214,0,0,158,0,1.6,2,0,0.94,0,2024-06-16 17:29:00,F +43,4,120,175,0,0,120,1,1.0,2,1,0.96,1,2024-06-16 00:44:00,M +50,4,120,0,0,1,156,1,0.0,1,1,0.73,1,2024-06-16 06:20:00,M +49,2,134,271,0,0,162,0,0.0,2,0,0.81,0,2024-06-16 18:45:00,F +64,4,134,273,0,0,102,1,4.0,3,1,0.98,1,2024-06-16 09:20:00,M +51,3,140,308,0,2,142,0,1.5,1,0,0.94,0,2024-06-16 14:21:00,F +67,4,125,254,1,0,163,0,0.2,2,1,0.9,1,2024-06-16 15:58:00,M +64,4,120,0,1,1,106,0,2.0,2,1,0.92,1,2024-06-16 07:18:00,M +45,4,142,309,0,2,147,1,0.0,2,1,0.71,1,2024-06-16 11:32:00,M +56,4,125,249,1,2,144,1,1.2,2,1,0.98,1,2024-06-16 12:34:00,M +38,3,100,0,0,0,179,0,-1.1,1,0,0.75,0,2024-06-16 05:33:00,M +47,4,120,205,0,0,98,1,2.0,2,1,0.79,1,2024-06-16 00:51:00,F +63,4,124,197,0,0,136,1,0.0,2,1,0.66,1,2024-06-16 19:41:00,F +61,4,105,0,1,0,110,1,1.5,1,1,0.84,1,2024-06-16 04:56:00,M +58,4,128,259,0,2,130,1,3.0,2,1,0.97,1,2024-06-16 13:00:00,M +41,2,110,235,0,0,153,0,0.0,1,0,0.95,0,2024-06-16 12:08:00,M +43,4,110,211,0,0,161,0,0.0,1,0,0.78,0,2024-06-16 13:01:00,M +74,2,120,269,0,2,121,1,0.2,1,0,0.88,0,2024-06-16 10:21:00,F +64,4,150,193,0,1,135,1,0.5,2,1,0.79,1,2024-06-16 09:33:00,M +55,2,160,292,1,0,143,1,2.0,2,1,0.64,1,2024-06-16 04:03:00,M +63,3,133,0,0,2,120,1,1.0,2,1,0.86,1,2024-06-16 07:10:00,M +70,4,130,322,0,2,109,0,2.4,2,1,0.9,1,2024-06-16 10:17:00,M +48,2,140,238,0,0,118,0,0.0,1,0,0.99,0,2024-06-16 02:48:00,M +58,3,140,179,0,0,160,0,0.0,1,0,0.82,0,2024-06-16 02:50:00,M +29,2,140,263,0,0,170,0,0.0,1,0,1.0,0,2024-06-16 03:40:00,M +38,3,138,175,0,0,173,0,0.0,1,0,1.0,0,2024-06-16 19:49:00,M +62,4,120,220,0,1,86,0,0.0,1,0,0.78,0,2024-06-16 07:12:00,M +53,2,120,0,0,0,95,0,0.0,2,0,0.55,1,2024-06-16 05:58:00,M +65,4,120,177,0,0,140,0,0.4,1,0,0.71,0,2024-06-16 15:38:00,M +44,3,108,141,0,0,175,0,0.6,2,0,0.86,0,2024-06-16 11:57:00,F +53,3,130,197,1,2,152,0,1.2,3,0,0.87,0,2024-06-16 15:36:00,M +36,2,120,166,0,0,180,0,0.0,1,0,1.0,0,2024-06-16 04:50:00,M +63,4,110,252,0,1,140,1,2.0,2,1,0.87,1,2024-06-16 08:54:00,M +49,3,131,142,0,0,127,1,1.5,2,1,0.86,1,2024-06-16 08:49:00,M +69,1,140,239,0,0,151,0,1.8,1,0,0.93,0,2024-06-16 15:17:00,F +57,4,110,0,1,1,131,1,1.4,1,1,0.73,1,2024-06-16 04:58:00,M +40,4,152,223,0,0,181,0,0.0,1,1,0.65,1,2024-06-16 19:15:00,M +50,4,150,243,0,2,128,0,2.6,2,1,0.91,1,2024-06-16 15:34:00,M +43,4,132,341,1,2,136,1,3.0,2,1,0.95,1,2024-06-16 16:40:00,F +38,4,110,190,0,0,150,1,1.0,2,1,0.76,1,2024-06-16 02:36:00,M +57,4,152,274,0,0,88,1,1.2,2,1,1.0,1,2024-06-16 17:42:00,M +68,3,180,274,1,2,150,1,1.6,2,1,0.88,1,2024-06-16 16:10:00,M +66,1,150,226,0,0,114,0,2.6,3,0,0.8,0,2024-06-16 12:15:00,F +28,2,130,132,0,2,185,0,0.0,1,0,0.9,0,2024-06-16 03:29:00,M +34,2,118,210,0,0,192,0,0.7,1,0,1.0,0,2024-06-16 12:35:00,F +43,2,142,207,0,0,138,0,0.0,1,0,0.99,0,2024-06-16 01:36:00,M +50,4,110,254,0,2,159,0,0.0,1,0,0.88,0,2024-06-16 18:07:00,F +56,3,130,167,0,0,114,0,0.0,1,0,1.0,0,2024-06-16 00:31:00,M +51,3,94,227,0,0,154,1,0.0,1,0,0.72,0,2024-06-16 13:54:00,M +65,4,135,254,0,2,127,0,2.8,2,1,0.97,1,2024-06-16 13:58:00,M +58,4,132,458,1,0,69,0,1.0,3,1,0.92,0,2024-06-16 08:17:00,M +68,3,120,211,0,2,115,0,1.5,2,0,0.61,0,2024-06-16 18:01:00,F +54,3,135,304,1,0,170,0,0.0,1,0,0.98,0,2024-06-16 12:10:00,F +54,3,160,201,0,0,163,0,0.0,1,0,0.99,0,2024-06-16 18:41:00,F +49,2,110,208,0,0,160,0,0.0,1,0,1.0,0,2024-06-16 02:43:00,F +51,3,110,190,0,0,120,0,0.0,1,0,1.0,0,2024-06-16 04:46:00,F +43,1,120,291,0,1,155,0,0.0,2,0,0.56,1,2024-06-16 01:28:00,M +64,4,130,258,1,2,130,0,0.0,2,1,0.77,1,2024-06-16 09:54:00,M +57,4,120,354,0,0,163,1,0.6,1,0,0.63,0,2024-06-16 14:54:00,F +43,2,120,266,0,0,118,0,0.0,1,0,0.99,0,2024-06-16 04:04:00,F +50,2,170,209,0,1,116,0,0.0,1,0,0.96,0,2024-06-16 03:12:00,M +52,2,128,205,1,0,184,0,0.0,1,0,1.0,0,2024-06-16 17:09:00,M +55,3,120,0,0,1,125,1,2.5,2,1,0.91,1,2024-06-16 07:47:00,M +62,3,130,231,0,0,146,0,1.8,2,0,0.62,0,2024-06-16 16:19:00,M +63,2,139,217,1,1,128,1,1.2,2,1,0.81,1,2024-06-16 08:06:00,M +61,4,150,0,0,0,117,1,2.0,2,1,0.99,1,2024-06-16 06:07:00,M +52,1,152,298,1,0,178,0,1.2,2,1,0.67,0,2024-06-16 13:42:00,M +67,4,140,219,0,1,122,1,2.0,2,1,1.0,1,2024-06-16 09:49:00,M +57,4,122,264,0,2,100,0,0.0,2,1,0.81,1,2024-06-16 09:57:00,M +43,4,132,247,1,2,143,1,0.1,2,1,0.87,1,2024-06-16 17:59:00,M +37,3,130,211,0,0,142,0,0.0,1,0,1.0,0,2024-06-16 00:10:00,F +65,1,138,282,1,2,174,0,1.4,2,1,0.77,1,2024-06-16 16:51:00,M +67,4,146,369,0,0,110,1,1.9,2,1,1.0,1,2024-06-16 09:38:00,M +57,3,128,229,0,2,150,0,0.4,2,1,0.82,1,2024-06-16 16:34:00,M +52,4,112,342,0,1,96,1,1.0,2,1,0.72,1,2024-06-16 00:59:00,M +44,4,112,290,0,2,153,0,0.0,1,1,0.87,1,2024-06-16 14:29:00,M +37,4,130,315,0,0,158,0,0.0,1,0,0.76,0,2024-06-16 03:51:00,M +60,4,140,281,0,1,118,1,1.5,2,1,0.98,1,2024-06-16 07:33:00,M +40,4,125,0,1,0,165,0,0.0,2,1,0.98,1,2024-06-16 07:38:00,M +57,4,120,354,0,0,163,1,0.6,1,0,0.63,0,2024-06-16 11:28:00,F +59,4,138,271,0,2,182,0,0.0,1,0,0.89,0,2024-06-16 18:26:00,M +60,4,130,206,0,2,132,1,2.4,2,1,0.99,1,2024-06-16 15:11:00,M +67,3,152,277,0,0,172,0,0.0,1,0,0.96,0,2024-06-16 12:33:00,F +45,2,128,308,0,2,170,0,0.0,1,0,0.99,0,2024-06-16 12:54:00,M +41,4,150,171,0,0,128,1,1.5,2,1,0.97,0,2024-06-16 09:27:00,M +45,4,142,309,0,2,147,1,0.0,2,1,0.71,1,2024-06-16 18:12:00,M +56,4,130,283,1,2,103,1,1.6,3,1,0.99,1,2024-06-16 12:16:00,M +47,4,112,204,0,0,143,0,0.1,1,0,0.93,0,2024-06-16 18:33:00,M +60,4,130,253,0,0,144,1,1.4,1,1,0.88,1,2024-06-16 15:41:00,M +66,4,160,228,0,2,138,0,2.3,1,0,0.74,0,2024-06-16 19:18:00,M +58,3,132,224,0,2,173,0,3.2,1,1,0.8,1,2024-06-16 15:10:00,M +59,3,131,0,0,0,128,1,2.0,3,1,0.76,1,2024-06-16 07:45:00,M +66,4,120,302,0,2,151,0,0.4,2,0,0.77,0,2024-06-16 11:53:00,M +65,4,120,177,0,0,140,0,0.4,1,0,0.71,0,2024-06-16 10:22:00,M +53,2,130,0,0,1,120,0,0.7,3,1,0.59,0,2024-06-16 05:08:00,M +58,4,115,0,1,0,138,0,0.5,1,1,0.71,1,2024-06-16 05:09:00,M +66,4,112,261,0,0,140,0,1.5,1,1,0.52,1,2024-06-16 09:09:00,M +45,4,138,236,0,2,152,1,0.2,2,0,0.93,0,2024-06-16 13:05:00,F +67,3,115,564,0,2,160,0,1.6,2,0,0.86,0,2024-06-16 10:18:00,F +47,3,138,257,0,2,156,0,0.0,1,0,0.86,0,2024-06-16 16:13:00,M +44,2,120,220,0,0,170,0,0.0,1,0,1.0,0,2024-06-16 13:56:00,M +60,3,180,0,0,1,140,1,1.5,2,1,0.79,0,2024-06-16 07:04:00,M +58,4,125,300,0,2,171,0,0.0,1,1,0.78,1,2024-06-16 13:38:00,M +61,3,150,243,1,0,137,1,1.0,2,0,0.65,0,2024-06-16 14:33:00,M +52,2,160,196,0,0,165,0,0.0,1,0,0.97,0,2024-06-16 01:23:00,M +59,4,125,0,1,0,119,1,0.9,1,1,0.8,1,2024-06-16 05:28:00,M +53,4,130,264,0,2,143,0,0.4,2,0,0.85,0,2024-06-16 16:08:00,F +45,3,130,236,0,0,144,0,0.1,1,0,0.98,0,2024-06-16 08:46:00,M +42,3,120,228,0,0,152,1,1.5,2,0,0.53,0,2024-06-16 02:09:00,M +58,2,120,284,0,2,160,0,1.8,2,1,0.82,1,2024-06-16 14:39:00,M +59,4,110,0,1,0,94,0,0.0,2,1,0.98,1,2024-06-16 06:49:00,M +60,4,117,230,1,0,160,1,1.4,1,1,0.9,1,2024-06-16 15:18:00,M +54,4,130,202,1,0,112,1,2.0,2,1,0.93,1,2024-06-16 08:59:00,M +50,4,144,349,0,2,120,1,1.0,1,1,0.7,1,2024-06-16 08:43:00,M +55,4,116,186,1,1,102,0,0.0,2,1,0.96,1,2024-06-16 08:53:00,M +53,4,126,0,0,0,106,0,0.0,2,1,0.95,1,2024-06-16 07:51:00,M +39,3,140,321,0,2,182,0,0.0,1,0,1.0,0,2024-06-16 14:18:00,M +45,4,115,260,0,2,185,0,0.0,1,0,0.81,0,2024-06-16 14:09:00,M +32,4,118,529,0,0,130,0,0.0,2,1,0.79,1,2024-06-16 01:16:00,M +51,3,100,222,0,0,143,1,1.2,2,0,0.84,0,2024-06-16 12:03:00,M +54,4,140,166,0,0,118,1,0.0,2,1,0.9,1,2024-06-16 02:18:00,M +51,4,120,0,1,0,127,1,1.5,1,1,0.61,1,2024-06-16 05:06:00,F +65,4,150,225,0,2,114,0,1.0,2,1,0.81,1,2024-06-16 11:06:00,F +54,4,140,239,0,0,160,0,1.2,1,0,0.8,0,2024-06-16 15:04:00,M +46,4,120,277,0,0,125,1,1.0,2,1,0.87,1,2024-06-16 01:03:00,M +50,4,140,341,0,1,125,1,2.5,2,1,0.98,1,2024-06-16 02:21:00,M +59,4,125,222,0,0,135,1,2.5,3,1,0.92,1,2024-06-16 08:55:00,M +52,1,152,298,1,0,178,0,1.2,2,1,0.67,0,2024-06-16 17:17:00,M +55,2,135,250,0,2,161,0,1.4,2,0,0.95,0,2024-06-16 13:32:00,F +43,4,150,247,0,0,130,1,2.0,2,1,0.96,1,2024-06-16 02:19:00,M +55,4,120,226,0,2,127,1,1.7,3,1,0.97,1,2024-06-16 09:59:00,M