From 4232043717a2dca37fa9388ae97fbc7caea4e17f Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 13 Oct 2021 02:14:30 -0400 Subject: [PATCH] Chore: rename mmlspark to synapseml --- .chglog/CHANGELOG.tpl.md | 2 +- .chglog/config.yml | 2 +- .github/ISSUE_TEMPLATE/bug_report.md | 4 +- .github/config.yml | 2 +- CONTRIBUTING.md | 8 +- README.md | 94 +++--- build.sbt | 34 +-- .../ml}/cognitive/AzureSearchWriter.py | 4 +- .../ml}/cognitive/BingImageSearch.py | 8 +- .../ml}/cognitive/__init__.py | 0 .../ml}/cognitive/AnomalyDetection.scala | 12 +- .../cognitive/AnomalyDetectorSchemas.scala | 4 +- .../synapse/ml}/cognitive/AudioStreams.scala | 2 +- .../synapse/ml}/cognitive/AzureSearch.scala | 17 +- .../ml}/cognitive/AzureSearchAPI.scala | 6 +- .../ml}/cognitive/AzureSearchSchemas.scala | 4 +- .../ml}/cognitive/BingImageSearch.scala | 8 +- .../ml}/cognitive/CognitiveServiceBase.scala | 16 +- .../cognitive/CognitiveServiceSchemas.scala | 2 +- .../ml}/cognitive/ComputerVision.scala | 14 +- .../ml}/cognitive/ComputerVisionSchemas.scala | 4 +- .../ml}/cognitive/DocumentTranslator.scala | 14 +- .../synapse/ml}/cognitive/Face.scala | 4 +- .../synapse/ml}/cognitive/FaceSchemas.scala | 4 +- .../ml}/cognitive/FormRecognizer.scala | 6 +- .../ml}/cognitive/FormRecognizerSchemas.scala | 4 +- .../ml}/cognitive/ImageSearchSchemas.scala | 4 +- .../synapse/ml}/cognitive/OCRSchemas.scala | 4 +- .../synapse/ml}/cognitive/RESTHelpers.scala | 2 +- .../synapse/ml}/cognitive/SpeechAPI.scala | 4 +- .../synapse/ml}/cognitive/SpeechSchemas.scala | 4 +- .../synapse/ml}/cognitive/SpeechToText.scala | 4 +- .../ml}/cognitive/SpeechToTextSDK.scala | 24 +- .../synapse/ml}/cognitive/TextAnalytics.scala | 12 +- .../ml}/cognitive/TextAnalyticsSchemas.scala | 4 +- .../cognitive/TextAnalyticsSchemasV2.scala | 4 +- .../ml}/cognitive/TextTranslator.scala | 21 +- .../ml}/cognitive/TranslatorSchemas.scala | 4 +- .../cognitive/__init__.py | 0 .../cognitive/test_simple.py | 4 +- .../split1/AnamolyDetectionSuite.scala | 12 +- .../split1/ComputerVisionSuite.scala | 12 +- .../ml}/cognitive/split1/FaceAPI.scala | 14 +- .../ml}/cognitive/split1/FaceSuite.scala | 11 +- .../split1/FormRecognizerSuite.scala | 24 +- .../cognitive/split1/ImageSearchSuite.scala | 8 +- .../cognitive/split1/TextAnalyticsSuite.scala | 12 +- .../cognitive/split1/TranslatorSuite.scala | 10 +- .../cognitive/split2/SearchWriterSuite.scala | 20 +- .../cognitive/split2/SpeechToTextSuite.scala | 12 +- .../split3/SpeechToTextSDKSuite.scala | 24 +- .../core/utils/utils/ModelEqualitySuite.scala | 18 +- .../utils/utils/SlicerFunctionsSuite.scala | 5 +- core/src/main/python/mmlspark/doc/scala.rst | 4 - .../python/{mmlspark => synapse}/README.txt | 4 +- .../python/{mmlspark => synapse}/__init__.py | 0 .../ml}/automl/BestModel.py | 2 +- .../ml}/automl/HyperparamBuilder.py | 12 +- .../ml}/automl/TuneHyperparametersModel.py | 2 +- .../ml}/automl/__init__.py | 0 .../{mmlspark => synapse/ml}/core/__init__.py | 0 .../ml}/core/schema/TypeConversionUtils.py | 0 .../ml}/core/schema/Utils.py | 6 +- .../ml}/core/schema/__init__.py | 0 .../ml}/core/serialize/__init__.py | 0 .../ml}/core/serialize/java_params_patch.py | 2 +- .../ml}/core/spark/FluentAPI.py | 0 .../ml}/core/spark/__init__.py | 0 .../ml}/cyber/__init__.py | 0 .../ml}/cyber/anomaly/__init__.py | 0 .../cyber/anomaly/collaborative_filtering.py | 6 +- .../ml}/cyber/anomaly/complement_access.py | 2 +- .../{mmlspark => synapse/ml}/cyber/dataset.py | 2 +- .../ml}/cyber/feature/__init__.py | 0 .../ml}/cyber/feature/indexers.py | 2 +- .../ml}/cyber/feature/scalers.py | 2 +- .../ml}/cyber/utils/__init__.py | 0 .../ml}/cyber/utils/spark_utils.py | 0 .../{mmlspark => synapse/ml}/doc/conf.py | 12 +- .../{mmlspark => synapse/ml}/doc/index.rst | 0 core/src/main/python/synapse/ml/doc/scala.rst | 4 + .../ml}/downloader/ModelDownloader.py | 6 +- .../ml}/downloader/__init__.py | 0 .../ml}/io/IOImplicits.py | 6 +- .../{mmlspark => synapse/ml}/io/__init__.py | 0 .../ml}/io/binary/BinaryFileReader.py | 6 +- .../ml}/io/binary/__init__.py | 0 .../ml}/io/http/HTTPFunctions.py | 2 +- .../ml}/io/http/JSONOutputParser.py | 2 +- .../ml}/io/http/ServingFunctions.py | 2 +- .../ml}/io/http/SimpleHTTPTransformer.py | 2 +- .../ml}/io/http/__init__.py | 0 .../ml}/io/image/ImageUtils.py | 4 +- .../ml}/io/image/__init__.py | 0 .../ml}/io/powerbi/PowerBIWriter.py | 4 +- .../ml}/io/powerbi/__init__.py | 0 .../ml}/nn/ConditionalBallTree.py | 4 +- .../{mmlspark => synapse/ml}/nn/__init__.py | 0 .../{mmlspark => synapse/ml}/plot/__init__.py | 0 .../{mmlspark => synapse/ml}/plot/plot.py | 0 .../RankingTrainValidationSplit.py | 4 +- .../RankingTrainValidationSplitModel.py | 2 +- .../ml}/recommendation/SARModel.py | 4 +- .../ml}/recommendation/__init__.py | 12 +- .../ml}/stages/UDFTransformer.py | 8 +- .../ml}/stages/__init__.py | 0 .../ml}/automl/DefaultHyperparams.scala | 2 +- .../synapse/ml}/automl/EvaluationUtils.scala | 13 +- .../synapse/ml}/automl/FindBestModel.scala | 12 +- .../synapse/ml}/automl/FindBestModel.txt | 0 .../ml}/automl/HyperparamBuilder.scala | 2 +- .../synapse/ml}/automl/ParamSpace.scala | 2 +- .../ml}/automl/TuneHyperparameters.scala | 12 +- .../ml}/automl/TuneHyperparameters.txt | 0 .../synapse/ml}/codegen/CodeGen.scala | 34 +-- .../synapse/ml}/codegen/CodegenConfig.scala | 17 +- .../synapse/ml}/codegen/GenerationUtils.scala | 4 +- .../synapse/ml}/codegen/Wrappable.scala | 19 +- .../synapse/ml}/core/contracts/Metrics.scala | 4 +- .../synapse/ml}/core/contracts/Params.scala | 2 +- .../synapse/ml}/core/env/FileUtilities.scala | 2 +- .../synapse/ml}/core/env/NativeLoader.java | 2 +- .../ml}/core/env/StreamUtilities.scala | 2 +- .../ml}/core/metrics/MetricConstants.scala | 2 +- .../ml}/core/metrics/MetricUtils.scala | 6 +- .../ml}/core/schema/BinaryFileSchema.scala | 2 +- .../ml}/core/schema/Categoricals.scala | 4 +- .../ml}/core/schema/DatasetExtensions.scala | 2 +- .../ml}/core/schema/ImageSchemaUtils.scala | 2 +- .../ml}/core/schema/SchemaConstants.scala | 2 +- .../ml}/core/schema/SparkBindings.scala | 2 +- .../synapse/ml}/core/schema/SparkSchema.scala | 4 +- .../ml}/core/serialize/ComplexParam.scala | 2 +- .../synapse/ml}/core/spark/FluentAPI.scala | 2 +- .../synapse/ml}/core/utils/AsyncUtils.scala | 2 +- .../synapse/ml}/core/utils/BreezeUtils.scala | 2 +- .../ml}/core/utils/CastUtilities.scala | 2 +- .../synapse/ml}/core/utils/ClusterUtil.scala | 2 +- .../core/utils/ContextObjectInputStream.scala | 2 +- .../ml}/core/utils/FaultToleranceUtils.scala | 2 +- .../ml}/core/utils/JarLoadingUtils.scala | 13 +- .../ml}/core/utils/ModelEquality.scala | 2 +- .../synapse/ml}/core/utils/OsUtils.scala | 2 +- .../ml}/core/utils/SlicerFunctions.scala | 4 +- .../synapse/ml}/core/utils/StopWatch.scala | 2 +- .../synapse/ml}/explainers/FeatureStats.scala | 2 +- .../ml}/explainers/ImageExplainer.scala | 4 +- .../synapse/ml}/explainers/ImageLIME.scala | 8 +- .../synapse/ml}/explainers/ImageSHAP.scala | 8 +- .../ml}/explainers/KernelSHAPBase.scala | 11 +- .../ml}/explainers/KernelSHAPSampler.scala | 4 +- .../synapse/ml}/explainers/LIMEBase.scala | 10 +- .../synapse/ml}/explainers/LIMESampler.scala | 4 +- .../ml}/explainers/LassoRegression.scala | 5 +- .../explainers/LeastSquaresRegression.scala | 3 +- .../ml}/explainers/LocalExplainer.scala | 4 +- .../ml}/explainers/RegressionBase.scala | 2 +- .../synapse/ml}/explainers/RowUtils.scala | 2 +- .../synapse/ml}/explainers/Sampler.scala | 8 +- .../synapse/ml}/explainers/SharedParams.scala | 2 +- .../synapse/ml}/explainers/TabularLIME.scala | 4 +- .../synapse/ml}/explainers/TabularSHAP.scala | 4 +- .../ml}/explainers/TextExplainer.scala | 2 +- .../synapse/ml}/explainers/TextLIME.scala | 4 +- .../synapse/ml}/explainers/TextSHAP.scala | 4 +- .../synapse/ml}/explainers/VectorLIME.scala | 4 +- .../synapse/ml}/explainers/VectorSHAP.scala | 4 +- .../ml}/featurize/CleanMissingData.scala | 8 +- .../ml}/featurize/CleanMissingData.txt | 0 .../synapse/ml}/featurize/CountSelector.scala | 8 +- .../ml}/featurize/DataConversion.scala | 9 +- .../synapse/ml}/featurize/DataConversion.txt | 0 .../synapse/ml}/featurize/Featurize.scala | 17 +- .../synapse/ml}/featurize/Featurize.txt | 0 .../synapse/ml}/featurize/IndexToValue.scala | 12 +- .../synapse/ml}/featurize/IndexToValue.txt | 0 .../synapse/ml}/featurize/ValueIndexer.scala | 11 +- .../synapse/ml}/featurize/ValueIndexer.txt | 0 .../ml}/featurize/ValueIndexerModel.txt | 0 .../ml}/featurize/text/MultiNGram.scala | 11 +- .../ml}/featurize/text/PageSplitter.scala | 8 +- .../ml}/featurize/text/TextFeaturizer.scala | 11 +- .../ml}/featurize/text/TextFeaturizer.txt | 0 .../ml}/image/ResizeImageTransformer.scala | 19 +- .../synapse/ml}/image/UnrollImage.scala | 13 +- .../synapse/ml}/image/UnrollImage.txt | 0 .../synapse/ml}/io/IOImplicits.scala | 6 +- .../synapse/ml}/io/binary/Binary.scala | 4 +- .../ml}/io/binary/BinaryFileFormat.scala | 13 +- .../ml}/io/binary/BinaryFileReader.scala | 10 +- .../io/binary/KeyValueReaderIterator.scala | 2 +- .../synapse/ml}/io/http/Clients.scala | 4 +- .../synapse/ml}/io/http/HTTPClients.scala | 2 +- .../synapse/ml}/io/http/HTTPSchema.scala | 12 +- .../synapse/ml}/io/http/HTTPTransformer.scala | 13 +- .../synapse/ml}/io/http/Parsers.scala | 18 +- .../synapse/ml}/io/http/PortForwarding.scala | 2 +- .../synapse/ml}/io/http/SharedVariable.scala | 2 +- .../ml}/io/http/SimpleHTTPTransformer.scala | 12 +- .../synapse/ml}/io/image/ImageUtils.scala | 18 +- .../ml}/io/powerbi/PowerBIWriter.scala | 8 +- .../ml}/isolationforest/IsolationForest.scala | 10 +- .../synapse/ml}/lime/BreezeUtils.scala | 2 +- .../synapse/ml}/lime/LIME.scala | 26 +- .../synapse/ml}/lime/Superpixel.scala | 8 +- .../ml}/lime/SuperpixelTransformer.scala | 10 +- .../synapse/ml}/lime/TextLIME.scala | 12 +- .../synapse/ml}/logging/BasicLogging.scala | 4 +- .../synapse/ml}/nn/BallTree.scala | 4 +- .../synapse/ml}/nn/BoundedPriorityQueue.scala | 2 +- .../synapse/ml}/nn/ConditionalKNN.scala | 6 +- .../spark => azure/synapse/ml}/nn/KNN.scala | 8 +- .../synapse/ml}/nn/Schemas.scala | 2 +- .../ml}/recommendation/RankingAdapter.scala | 8 +- .../ml}/recommendation/RankingEvaluator.scala | 4 +- .../RankingTrainValidationSplit.scala | 6 +- .../RecommendationIndexer.scala | 6 +- .../synapse/ml}/recommendation/SAR.scala | 10 +- .../synapse/ml}/recommendation/SARModel.scala | 6 +- .../ml}/recommendation/recommendation.txt | 0 .../synapse/ml}/stages/Batchers.scala | 2 +- .../synapse/ml}/stages/Cacher.scala | 6 +- .../synapse/ml}/stages/ClassBalancer.scala | 8 +- .../synapse/ml}/stages/DropColumns.scala | 6 +- .../synapse/ml}/stages/EnsembleByKey.scala | 6 +- .../synapse/ml}/stages/EnsembleByKey.txt | 0 .../synapse/ml}/stages/Explode.scala | 10 +- .../synapse/ml}/stages/Lambda.scala | 6 +- .../ml}/stages/MiniBatchTransformer.scala | 6 +- .../ml}/stages/MultiColumnAdapter.scala | 8 +- .../synapse/ml}/stages/MultiColumnAdapter.txt | 0 .../ml}/stages/PartitionConsolidator.scala | 11 +- .../synapse/ml}/stages/RenameColumn.scala | 8 +- .../synapse/ml}/stages/Repartition.scala | 6 +- .../synapse/ml}/stages/Repartition.txt | 0 .../synapse/ml}/stages/SelectColumns.scala | 6 +- .../synapse/ml}/stages/SelectColumns.txt | 2 +- .../ml}/stages/StratifiedRepartition.scala | 8 +- .../synapse/ml}/stages/SummarizeData.scala | 6 +- .../synapse/ml}/stages/SummarizeData.txt | 0 .../synapse/ml}/stages/TextPreprocessor.scala | 12 +- .../synapse/ml}/stages/Timer.scala | 6 +- .../synapse/ml}/stages/UDFTransformer.scala | 10 +- .../synapse/ml}/stages/UnicodeNormalize.scala | 14 +- .../synapse/ml}/stages/udfs.scala | 2 +- .../synapse/ml}/train/AutoTrainedModel.scala | 6 +- .../synapse/ml}/train/AutoTrainer.scala | 6 +- .../ml}/train/ComputeModelStatistics.scala | 15 +- .../ml}/train/ComputeModelStatistics.txt | 0 .../train/ComputePerInstanceStatistics.scala | 12 +- .../train/ComputePerInstanceStatistics.txt | 0 .../synapse/ml}/train/TrainClassifier.scala | 15 +- .../synapse/ml}/train/TrainClassifier.txt | 0 .../synapse/ml}/train/TrainRegressor.scala | 15 +- .../synapse/ml}/train/TrainRegressor.txt | 2 +- .../spark/ml/ComplexParamsSerializer.scala | 2 +- .../spark/ml/LimeNamespaceInjections.scala | 2 +- .../scala/org/apache/spark/ml/Ranker.scala | 5 +- .../org/apache/spark/ml/Serializer.scala | 6 +- .../spark/ml/param/ArrayParamMapParam.scala | 2 +- .../apache/spark/ml/param/BallTreeParam.scala | 4 +- .../spark/ml/param/ByteArrayParam.scala | 4 +- .../spark/ml/param/DataFrameParam.scala | 6 +- .../apache/spark/ml/param/DataTypeParam.scala | 2 +- .../spark/ml/param/EstimatorArrayParam.scala | 2 +- .../spark/ml/param/EstimatorParam.scala | 6 +- .../spark/ml/param/EvaluatorParam.scala | 4 +- .../spark/ml/param/ParamSpaceParam.scala | 4 +- .../spark/ml/param/PipelineStageParam.scala | 4 +- .../ml/param/TransformerArrayParam.scala | 2 +- .../spark/ml/param/TransformerParam.scala | 4 +- .../org/apache/spark/ml/param/UDFParam.scala | 4 +- .../apache/spark/ml/param/UDPyFParam.scala | 2 +- .../recommendation/RecommendationHelper.scala | 2 +- .../source/image/PatchedImageFileFormat.scala | 7 +- .../streaming/DistributedHTTPSource.scala | 15 +- .../sql/execution/streaming/HTTPSource.scala | 5 +- .../sql/execution/streaming/ServingUDFs.scala | 4 +- .../streaming/continuous/HTTPSinkV2.scala | 4 +- .../streaming/continuous/HTTPSourceV2.scala | 7 +- .../injections/OptimizedCKNNFitting.scala | 4 +- core/src/test/R/testthat.R | 4 +- core/src/test/python/setup.py | 6 +- .../__init__.py | 0 .../cyber/__init__.py | 0 .../cyber/anamoly/__init__.py | 0 .../anamoly/test_collaborative_filtering.py | 10 +- .../cyber/anamoly/test_complement_access.py | 6 +- .../cyber/explain_tester.py | 2 +- .../cyber/feature/__init__.py | 0 .../cyber/feature/test_indexers.py | 6 +- .../cyber/feature/test_scalers.py | 6 +- .../cyber/utils/__init__.py | 0 .../cyber/utils/test_spark_utils.py | 4 +- .../nn/__init__.py | 0 .../nn/test_ball_tree.py | 4 +- .../recommendation/__init__.py | 0 .../recommendation/test_ranking.py | 12 +- .../spark => azure/synapse/ml}/Secrets.scala | 2 +- .../ml}/automl/VerifyFindBestModel.scala | 12 +- .../automl/VerifyTuneHyperparameters.scala | 11 +- .../synapse/ml}/codegen/TestGen.scala | 24 +- .../synapse/ml}/core/ml/HashingTFSpec.scala | 6 +- .../synapse/ml}/core/ml/IDFSpec.scala | 6 +- .../synapse/ml}/core/ml/NGramSpec.scala | 4 +- .../ml}/core/ml/OneHotEncoderSpec.scala | 6 +- .../synapse/ml}/core/ml/Word2VecSpec.scala | 6 +- .../ml}/core/schema/SparkBindingsTest.scala | 4 +- .../ml}/core/schema/TestCategoricals.scala | 4 +- .../schema/VerifyFastVectorAssembler.scala | 4 +- .../ml}/core/schema/VerifySparkSchema.scala | 4 +- .../ValidateComplexParamSerializer.scala | 6 +- .../core/test/base/SparkSessionFactory.scala | 2 +- .../synapse/ml}/core/test/base/TestBase.scala | 4 +- .../ml}/core/test/benchmarks/Benchmarks.scala | 11 +- .../ml}/core/test/fuzzing/Fuzzing.scala | 20 +- .../ml}/core/utils/VerifyClusterUtil.scala | 4 +- .../split1/HasExplainTargetSuite.scala | 9 +- .../KernelSHAPSamplerSupportSuite.scala | 6 +- .../split1/LassoRegressionSuite.scala | 6 +- .../split1/LeastSquaresRegressionSuite.scala | 6 +- .../ml}/explainers/split1/SamplerSuite.scala | 22 +- .../split1/TabularLIMEExplainerSuite.scala | 13 +- .../split1/TabularSHAPExplainerSuite.scala | 13 +- .../split1/TextExplainersSuite.scala | 15 +- .../split1/VectorLIMEExplainerSuite.scala | 13 +- .../split1/VectorSHAPExplainerSuite.scala | 15 +- .../featurize/VerifyCleanMissingData.scala | 10 +- .../ml}/featurize/VerifyCountSelector.scala | 6 +- .../ml}/featurize/VerifyDataConversion.scala | 8 +- .../ml}/featurize/VerifyFeaturize.scala | 16 +- .../ml}/featurize/VerifyValueIndexer.scala | 8 +- .../ml}/featurize/text/MultiNGramSpec.scala | 6 +- .../ml}/featurize/text/PageSplitterSpec.scala | 4 +- .../featurize/text/TextFeaturizerSpec.scala | 6 +- .../flaky/PartitionConsolidatorSuite.scala | 8 +- .../synapse/ml}/image/ImageTestUtils.scala | 12 +- .../ml}/io/split1/BinaryFileReaderSuite.scala | 21 +- .../ml}/io/split1/HTTPTransformerSuite.scala | 18 +- .../ml}/io/split1/ImageReaderSuite.scala | 14 +- .../synapse/ml}/io/split1/ParserSuite.scala | 6 +- .../synapse/ml}/io/split1/PowerBiSuite.scala | 10 +- .../split1/SimpleHTTPTransformerSuite.scala | 6 +- .../ml}/io/split2/ContinuousHTTPSuite.scala | 11 +- .../ml}/io/split2/DistributedHTTPSuite.scala | 25 +- .../synapse/ml}/io/split2/HTTPSuite.scala | 8 +- .../synapse/ml}/io/split2/HTTPv2Suite.scala | 11 +- .../VerifyIsolationForest.scala | 22 +- .../synapse/ml}/lime/LIMESuite.scala | 12 +- .../synapse/ml}/lime/SuperpixelSuite.scala | 9 +- .../ml}/lime/SuperpixelTransformerSuite.scala | 8 +- .../synapse/ml}/lime/TextLIMESuite.scala | 11 +- .../synapse/ml}/nbtest/DatabricksTests.scala | 6 +- .../ml}/nbtest/DatabricksUtilities.scala | 22 +- .../synapse/ml}/nbtest/SprayUtilities.scala | 2 +- .../synapse/ml}/nbtest/SynapseTests.scala | 10 +- .../synapse/ml}/nbtest/SynapseUtilities.scala | 12 +- .../synapse/ml}/nn/BallTreeTest.scala | 4 +- .../ml}/nn/ConditionalBallTreeTest.scala | 4 +- .../synapse/ml}/nn/KNNTest.scala | 4 +- .../recommendation/RankingAdapterSpec.scala | 4 +- .../recommendation/RankingEvaluatorSpec.scala | 4 +- .../ml}/recommendation/RankingTestBase.scala | 4 +- .../RankingTrainValidationSpec.scala | 5 +- .../RecommendationIndexerSpec.scala | 4 +- .../synapse/ml}/recommendation/SARSpec.scala | 4 +- .../ml}/stages/BatchIteratorSuite.scala | 4 +- .../synapse/ml}/stages/CacherSuite.scala | 4 +- .../ml}/stages/ClassBalancerSuite.scala | 4 +- .../synapse/ml}/stages/DropColumnsSuite.scala | 6 +- .../ml}/stages/EnsembleByKeySuite.scala | 6 +- .../synapse/ml}/stages/ExplodeSuite.scala | 4 +- .../synapse/ml}/stages/LambdaSuite.scala | 6 +- .../stages/MiniBatchTransformerSuite.scala | 11 +- .../ml}/stages/MultiColumnAdapterSpec.scala | 8 +- .../ml}/stages/RenameColumnSuite.scala | 6 +- .../synapse/ml}/stages/RepartitionSuite.scala | 6 +- .../ml}/stages/SelectColumnsSuite.scala | 6 +- .../stages/StratifiedRepartitionSuite.scala | 6 +- .../ml}/stages/SummarizeDataSuite.scala | 6 +- .../ml}/stages/TextPreprocessorSuite.scala | 6 +- .../synapse/ml}/stages/TimerSuite.scala | 4 +- .../synapse/ml}/stages/UDFSuite.scala | 4 +- .../ml}/stages/UDFTransformerSuite.scala | 6 +- .../ml}/stages/UnicodeNormalizeSuite.scala | 6 +- .../train/VerifyComputeModelStatistics.scala | 22 +- .../VerifyComputePerInstanceStatistics.scala | 12 +- .../ml}/train/VerifyTrainClassifier.scala | 20 +- .../ml}/train/VerifyTrainRegressor.scala | 11 +- deep-learning/src/main/R/model_downloader.R | 2 +- .../ml}/cntk/CNTKModel.py | 6 +- .../ml}/cntk/ImageFeaturizer.py | 2 +- .../{mmlspark => synapse/ml}/cntk/__init__.py | 0 .../ml}/onnx/ONNXModel.py | 2 +- .../{mmlspark => synapse/ml}/onnx/__init__.py | 0 .../microsoft/CNTK/SerializableFunction.scala | 2 +- .../synapse/ml}/SharedParams.scala | 2 +- .../synapse/ml}/cntk/CNTKFunctionParam.scala | 11 +- .../synapse/ml}/cntk/CNTKModel.scala | 22 +- .../synapse/ml}/cntk/ConversionUtils.scala | 2 +- .../synapse/ml}/cntk/ImageFeaturizer.scala | 14 +- .../synapse/ml}/cntk/ImageFeaturizer.txt | 0 .../synapse/ml}/cntk/_CNTKModel.txt | 0 .../ml}/downloader/ModelDownloader.scala | 18 +- .../synapse/ml}/downloader/Schema.scala | 6 +- .../synapse/ml}/onnx/ONNXModel.scala | 20 +- .../synapse/ml}/cntk/CNTKBindingSuite.scala | 9 +- .../synapse/ml}/cntk/CNTKModelSuite.scala | 17 +- .../ml}/cntk/ImageFeaturizerSuite.scala | 28 +- .../ml}/downloader/DownloaderSuite.scala | 8 +- .../ml}/explainers/ImageExplainersSuite.scala | 14 +- .../split2/ImageLIMEExplainerSuite.scala | 23 +- .../split3/ImageSHAPExplainerSuite.scala | 17 +- .../synapse/ml}/lime/ImageLIMESuite.scala | 28 +- .../synapse/ml}/onnx/ONNXModelSuite.scala | 16 +- docs/R-setup.md | 18 +- docs/cogsvc.md | 42 +-- docs/cyber.md | 34 +-- docs/datasets.md | 2 +- docs/developer-readme.md | 22 +- docs/docker.md | 38 +-- docs/http.md | 8 +- docs/lightgbm.md | 4 +- docs/mmlspark-serving.md | 12 +- docs/onnx.md | 6 +- docs/vagrant.md | 4 +- docs/vw.md | 8 +- docs/your-first-model.md | 18 +- environment.yaml | 3 +- .../lightgbm/LightGBMClassificationModel.py | 10 +- .../ml}/lightgbm/LightGBMRankerModel.py | 10 +- .../ml}/lightgbm/LightGBMRegressionModel.py | 10 +- .../ml}/lightgbm/__init__.py | 0 .../ml}/lightgbm/mixin.py | 2 +- .../synapse/ml}/lightgbm/LightGBMBase.scala | 26 +- .../ml}/lightgbm/LightGBMClassifier.scala | 16 +- .../ml}/lightgbm/LightGBMClassifier.txt | 0 .../ml}/lightgbm/LightGBMConstants.scala | 2 +- .../ml}/lightgbm/LightGBMDelegate.scala | 6 +- .../ml}/lightgbm/LightGBMModelMethods.scala | 4 +- .../synapse/ml}/lightgbm/LightGBMRanker.scala | 15 +- .../synapse/ml}/lightgbm/LightGBMRanker.txt | 0 .../ml}/lightgbm/LightGBMRegressor.scala | 16 +- .../ml}/lightgbm/LightGBMRegressor.txt | 0 .../synapse/ml}/lightgbm/LightGBMUtils.scala | 9 +- .../synapse/ml}/lightgbm/SharedState.scala | 13 +- .../ml}/lightgbm/TaskTrainingMethods.scala | 6 +- .../synapse/ml}/lightgbm/TrainUtils.scala | 20 +- .../lightgbm/booster/LightGBMBooster.scala | 8 +- .../lightgbm/dataset/DatasetAggregator.scala | 11 +- .../ml}/lightgbm/dataset/DatasetUtils.scala | 6 +- .../lightgbm/dataset/LightGBMDataset.scala | 6 +- .../ml}/lightgbm/params/FObjParam.scala | 4 +- .../ml}/lightgbm/params/FObjTrait.scala | 4 +- .../params/LightGBMBoosterParam.scala | 6 +- .../ml}/lightgbm/params/LightGBMParams.scala | 12 +- .../ml}/lightgbm/params/TrainParams.scala | 4 +- .../synapse/ml}/lightgbm/swig/SwigUtils.scala | 2 +- .../split1/VerifyLightGBMClassifier.scala | 27 +- .../split2/VerifyLightGBMRanker.scala | 26 +- .../split2/VerifyLightGBMRegressor.scala | 17 +- .../AzureSearchIndex - Met Artworks.ipynb | 8 +- ...on - Adult Census with Vowpal Wabbit.ipynb | 10 +- notebooks/Classification - Adult Census.ipynb | 6 +- ...fication - Before and After MMLSpark.ipynb | 14 +- ...Twitter Sentiment with Vowpal Wabbit.ipynb | 12 +- ...eServices - Celebrity Quote Analysis.ipynb | 6 +- notebooks/CognitiveServices - Overview.ipynb | 78 ++--- ...iveServices - Predictive Maintenance.ipynb | 2 +- ...lKNN - Exploring Art Across Cultures.ipynb | 2 +- ...CyberML - Anomalous Access Detection.ipynb | 6 +- ...g - BiLSTM Medical Entity Extraction.ipynb | 6 +- ...ning - CIFAR10 Convolutional Network.ipynb | 4 +- ...arning - Flower Image Classification.ipynb | 10 +- .../DeepLearning - Transfer Learning.ipynb | 8 +- ...rk - Working with Arbitrary Web APIs.ipynb | 2 +- ...meterTuning - Fighting Breast Cancer.ipynb | 10 +- .../Interpretability - Image Explainers.ipynb | 10 +- ...pretability - Tabular SHAP explainer.ipynb | 2 +- .../Interpretability - Text Explainers.ipynb | 6 +- notebooks/LightGBM - Overview.ipynb | 12 +- ...erpretation - Snow Leopard Detection.ipynb | 14 +- notebooks/ONNX - Inference on Spark.ipynb | 4 +- ...nCV - Pipeline Image Transformations.ipynb | 14 +- notebooks/Regression - Auto Imports.ipynb | 12 +- ...on - Flight Delays with DataCleaning.ipynb | 8 +- notebooks/Regression - Flight Delays.ipynb | 8 +- ...it vs. LightGBM vs. Linear Regressor.ipynb | 10 +- ...parkServing - Deploying a Classifier.ipynb | 8 +- ... - Amazon Book Reviews with Word2Vec.ipynb | 6 +- .../TextAnalytics - Amazon Book Reviews.ipynb | 8 +- notebooks/Vowpal Wabbit - Overview.ipynb | 22 +- .../ml}/opencv/ImageTransformer.py | 2 +- .../ml}/opencv/__init__.py | 0 .../ml}/opencv/ImageSetAugmenter.scala | 8 +- .../synapse/ml}/opencv/ImageTransformer.scala | 12 +- .../synapse/ml}/opencv/OpenCVUtils.scala | 8 +- .../image/ResizeImageTransformerSuite.scala | 16 +- .../ml}/opencv/ImageSetAugmenterSuite.scala | 10 +- .../ml}/opencv/ImageTransformerSuite.scala | 12 +- pipeline.yaml | 20 +- project/BlobMavenPlugin.scala | 4 +- project/CodegenPlugin.scala | 38 ++- project/CondaPlugin.scala | 2 +- scalastyle-config.xml | 4 +- scalastyle-test-config.xml | 4 +- .../ml/core/test/fuzzing/FuzzingTest.scala | 280 +++++++++++++++++ .../spark/core/test/fuzzing/FuzzingTest.scala | 281 ------------------ tools/docker/demo/init_notebook.py | 4 +- tools/docker/developer/Dockerfile | 4 +- tools/helm/zeppelin/Dockerfile | 4 +- tools/helm/zeppelin/mini.Dockerfile | 6 +- .../classification_mmlspark_2E3REACQR.zpln | 8 +- .../helm/zeppelin/mmlsparkExamples/serving.py | 2 +- .../simplification_mmlspark.zpln | 2 +- .../sparkserving_2DZFNGU8A.zpln | 4 +- .../mmlsparkExamples/submitjob_2DZ7DHX6E.zpln | 4 +- tools/helm/zeppelin/zeppelin-env.sh | 2 +- tools/misc/get-stats | 2 +- tools/pytest/run_all_tests.py | 2 +- tools/vagrant/Vagrantfile | 6 +- .../ml}/vw/VowpalWabbitClassificationModel.py | 2 +- .../ml}/vw/VowpalWabbitClassifier.py | 2 +- .../ml}/vw/VowpalWabbitContextualBandit.py | 2 +- .../vw/VowpalWabbitContextualBanditModel.py | 2 +- .../ml}/vw/VowpalWabbitRegressionModel.py | 2 +- .../ml}/vw/VowpalWabbitRegressor.py | 2 +- .../{mmlspark => synapse/ml}/vw/__init__.py | 0 .../synapse/ml}/vw/HasNumBits.scala | 2 +- .../synapse/ml}/vw/HasSumcollisions.scala | 2 +- .../synapse/ml}/vw/VectorUtils.scala | 2 +- .../synapse/ml}/vw/VectorZipper.scala | 8 +- .../synapse/ml}/vw/VowpalWabbitBase.scala | 14 +- .../ml}/vw/VowpalWabbitBaseModel.scala | 8 +- .../ml}/vw/VowpalWabbitClassifier.scala | 8 +- .../ml}/vw/VowpalWabbitContextualBandit.scala | 6 +- .../ml}/vw/VowpalWabbitFeaturizer.scala | 20 +- .../ml}/vw/VowpalWabbitInteractions.scala | 18 +- .../ml}/vw/VowpalWabbitMurmurWithPrefix.scala | 2 +- .../ml}/vw/VowpalWabbitRegressor.scala | 6 +- .../synapse/ml}/vw/VowpalWabbitUtil.scala | 2 +- .../ml}/vw/featurizer/BooleanFeaturizer.scala | 2 +- .../ml}/vw/featurizer/ElementFeaturizer.scala | 2 +- .../ml}/vw/featurizer/Featurizer.scala | 4 +- .../ml}/vw/featurizer/MapFeaturizer.scala | 2 +- .../vw/featurizer/MapStringFeaturizer.scala | 2 +- .../ml}/vw/featurizer/NumericFeaturizer.scala | 2 +- .../ml}/vw/featurizer/SeqFeaturizer.scala | 2 +- .../ml}/vw/featurizer/StringFeaturizer.scala | 2 +- .../vw/featurizer/StringSplitFeaturizer.scala | 2 +- .../ml}/vw/featurizer/StructFeaturizer.scala | 2 +- .../ml}/vw/featurizer/VectorFeaturizer.scala | 2 +- .../vw/__init__.py | 0 .../vw/test_vw.py | 8 +- .../vw/test_vw_cb.py | 8 +- .../ml}/vw/VWContextualBandidSpec.scala | 12 +- .../synapse/ml}/vw/VerifyVectorZipper.scala | 4 +- .../ml}/vw/VerifyVowpalWabbitClassifier.scala | 17 +- .../ml}/vw/VerifyVowpalWabbitFeaturizer.scala | 6 +- .../vw/VerifyVowpalWabbitInteractions.scala | 6 +- .../VerifyVowpalWabbitMurmurWithPrefix.scala | 6 +- .../ml}/vw/VerifyVowpalWabbitRegressor.scala | 6 +- 562 files changed, 2304 insertions(+), 2328 deletions(-) rename cognitive/src/main/python/{mmlspark => synapse/ml}/cognitive/AzureSearchWriter.py (82%) rename cognitive/src/main/python/{mmlspark => synapse/ml}/cognitive/BingImageSearch.py (86%) rename cognitive/src/main/python/{mmlspark => synapse/ml}/cognitive/__init__.py (100%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/AnomalyDetection.scala (96%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/AnomalyDetectorSchemas.scala (95%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/AudioStreams.scala (98%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/AzureSearch.scala (96%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/AzureSearchAPI.scala (97%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/AzureSearchSchemas.scala (95%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/BingImageSearch.scala (98%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/CognitiveServiceBase.scala (96%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/CognitiveServiceSchemas.scala (84%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/ComputerVision.scala (97%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/ComputerVisionSchemas.scala (96%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/DocumentTranslator.scala (93%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/Face.scala (99%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/FaceSchemas.scala (96%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/FormRecognizer.scala (98%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/FormRecognizerSchemas.scala (97%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/ImageSearchSchemas.scala (96%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/OCRSchemas.scala (93%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/RESTHelpers.scala (98%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/SpeechAPI.scala (93%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/SpeechSchemas.scala (95%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/SpeechToText.scala (97%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/SpeechToTextSDK.scala (96%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/TextAnalytics.scala (97%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/TextAnalyticsSchemas.scala (97%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/TextAnalyticsSchemasV2.scala (95%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/TextTranslator.scala (97%) rename cognitive/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/TranslatorSchemas.scala (97%) rename cognitive/src/test/python/{mmlsparktest => synapsemltest}/cognitive/__init__.py (100%) rename cognitive/src/test/python/{mmlsparktest => synapsemltest}/cognitive/test_simple.py (93%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/split1/AnamolyDetectionSuite.scala (95%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/split1/ComputerVisionSuite.scala (97%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/split1/FaceAPI.scala (95%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/split1/FaceSuite.scala (97%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/split1/FormRecognizerSuite.scala (97%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/split1/ImageSearchSuite.scala (94%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/split1/TextAnalyticsSuite.scala (97%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/split1/TranslatorSuite.scala (98%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/split2/SearchWriterSuite.scala (95%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/split2/SpeechToTextSuite.scala (90%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cognitive/split3/SpeechToTextSDKSuite.scala (96%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/utils/ModelEqualitySuite.scala (68%) rename cognitive/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/utils/SlicerFunctionsSuite.scala (89%) delete mode 100644 core/src/main/python/mmlspark/doc/scala.rst rename core/src/main/python/{mmlspark => synapse}/README.txt (64%) rename core/src/main/python/{mmlspark => synapse}/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/automl/BestModel.py (97%) rename core/src/main/python/{mmlspark => synapse/ml}/automl/HyperparamBuilder.py (79%) rename core/src/main/python/{mmlspark => synapse/ml}/automl/TuneHyperparametersModel.py (89%) rename core/src/main/python/{mmlspark => synapse/ml}/automl/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/core/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/core/schema/TypeConversionUtils.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/core/schema/Utils.py (89%) rename core/src/main/python/{mmlspark => synapse/ml}/core/schema/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/core/serialize/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/core/serialize/java_params_patch.py (97%) rename core/src/main/python/{mmlspark => synapse/ml}/core/spark/FluentAPI.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/core/spark/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/cyber/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/cyber/anomaly/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/cyber/anomaly/collaborative_filtering.py (99%) rename core/src/main/python/{mmlspark => synapse/ml}/cyber/anomaly/complement_access.py (98%) rename core/src/main/python/{mmlspark => synapse/ml}/cyber/dataset.py (98%) rename core/src/main/python/{mmlspark => synapse/ml}/cyber/feature/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/cyber/feature/indexers.py (97%) rename core/src/main/python/{mmlspark => synapse/ml}/cyber/feature/scalers.py (99%) rename core/src/main/python/{mmlspark => synapse/ml}/cyber/utils/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/cyber/utils/spark_utils.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/doc/conf.py (93%) rename core/src/main/python/{mmlspark => synapse/ml}/doc/index.rst (100%) create mode 100644 core/src/main/python/synapse/ml/doc/scala.rst rename core/src/main/python/{mmlspark => synapse/ml}/downloader/ModelDownloader.py (94%) rename core/src/main/python/{mmlspark => synapse/ml}/downloader/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/io/IOImplicits.py (94%) rename core/src/main/python/{mmlspark => synapse/ml}/io/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/io/binary/BinaryFileReader.py (92%) rename core/src/main/python/{mmlspark => synapse/ml}/io/binary/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/io/http/HTTPFunctions.py (95%) rename core/src/main/python/{mmlspark => synapse/ml}/io/http/JSONOutputParser.py (91%) rename core/src/main/python/{mmlspark => synapse/ml}/io/http/ServingFunctions.py (90%) rename core/src/main/python/{mmlspark => synapse/ml}/io/http/SimpleHTTPTransformer.py (84%) rename core/src/main/python/{mmlspark => synapse/ml}/io/http/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/io/image/ImageUtils.py (91%) rename core/src/main/python/{mmlspark => synapse/ml}/io/image/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/io/powerbi/PowerBIWriter.py (83%) rename core/src/main/python/{mmlspark => synapse/ml}/io/powerbi/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/nn/ConditionalBallTree.py (91%) rename core/src/main/python/{mmlspark => synapse/ml}/nn/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/plot/__init__.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/plot/plot.py (100%) rename core/src/main/python/{mmlspark => synapse/ml}/recommendation/RankingTrainValidationSplit.py (85%) rename core/src/main/python/{mmlspark => synapse/ml}/recommendation/RankingTrainValidationSplitModel.py (84%) rename core/src/main/python/{mmlspark => synapse/ml}/recommendation/SARModel.py (77%) rename core/src/main/python/{mmlspark => synapse/ml}/recommendation/__init__.py (50%) rename core/src/main/python/{mmlspark => synapse/ml}/stages/UDFTransformer.py (93%) rename core/src/main/python/{mmlspark => synapse/ml}/stages/__init__.py (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/automl/DefaultHyperparams.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/automl/EvaluationUtils.scala (91%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/automl/FindBestModel.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/automl/FindBestModel.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/automl/HyperparamBuilder.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/automl/ParamSpace.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/automl/TuneHyperparameters.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/automl/TuneHyperparameters.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/codegen/CodeGen.scala (84%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/codegen/CodegenConfig.scala (80%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/codegen/GenerationUtils.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/codegen/Wrappable.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/contracts/Metrics.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/contracts/Params.scala (99%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/env/FileUtilities.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/env/NativeLoader.java (99%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/env/StreamUtilities.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/metrics/MetricConstants.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/metrics/MetricUtils.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/schema/BinaryFileSchema.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/schema/Categoricals.scala (99%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/schema/DatasetExtensions.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/schema/ImageSchemaUtils.scala (95%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/schema/SchemaConstants.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/schema/SparkBindings.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/schema/SparkSchema.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/serialize/ComplexParam.scala (95%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/spark/FluentAPI.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/AsyncUtils.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/BreezeUtils.scala (95%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/CastUtilities.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/ClusterUtil.scala (99%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/ContextObjectInputStream.scala (91%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/FaultToleranceUtils.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/JarLoadingUtils.scala (88%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/ModelEquality.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/OsUtils.scala (83%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/SlicerFunctions.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/StopWatch.scala (91%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/FeatureStats.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/ImageExplainer.scala (84%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/ImageLIME.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/ImageSHAP.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/KernelSHAPBase.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/KernelSHAPSampler.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/LIMEBase.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/LIMESampler.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/LassoRegression.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/LeastSquaresRegression.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/LocalExplainer.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/RegressionBase.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/RowUtils.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/Sampler.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/SharedParams.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/TabularLIME.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/TabularSHAP.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/TextExplainer.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/TextLIME.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/TextSHAP.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/VectorLIME.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/VectorSHAP.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/CleanMissingData.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/CleanMissingData.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/CountSelector.scala (91%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/DataConversion.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/DataConversion.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/Featurize.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/Featurize.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/IndexToValue.scala (88%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/IndexToValue.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/ValueIndexer.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/ValueIndexer.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/ValueIndexerModel.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/text/MultiNGram.scala (87%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/text/PageSplitter.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/text/TextFeaturizer.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/text/TextFeaturizer.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/image/ResizeImageTransformer.scala (90%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/image/UnrollImage.scala (95%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/image/UnrollImage.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/IOImplicits.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/binary/Binary.scala (91%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/binary/BinaryFileFormat.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/binary/BinaryFileReader.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/binary/KeyValueReaderIterator.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/http/Clients.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/http/HTTPClients.scala (99%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/http/HTTPSchema.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/http/HTTPTransformer.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/http/Parsers.scala (95%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/http/PortForwarding.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/http/SharedVariable.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/http/SimpleHTTPTransformer.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/image/ImageUtils.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/powerbi/PowerBIWriter.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/isolationforest/IsolationForest.scala (86%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lime/BreezeUtils.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lime/LIME.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lime/Superpixel.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lime/SuperpixelTransformer.scala (87%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lime/TextLIME.scala (89%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/logging/BasicLogging.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nn/BallTree.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nn/BoundedPriorityQueue.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nn/ConditionalKNN.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nn/KNN.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nn/Schemas.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/RankingAdapter.scala (95%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/RankingEvaluator.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/RankingTrainValidationSplit.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/RecommendationIndexer.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/SAR.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/SARModel.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/recommendation.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/Batchers.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/Cacher.scala (88%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/ClassBalancer.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/DropColumns.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/EnsembleByKey.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/EnsembleByKey.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/Explode.scala (83%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/Lambda.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/MiniBatchTransformer.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/MultiColumnAdapter.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/MultiColumnAdapter.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/PartitionConsolidator.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/RenameColumn.scala (87%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/Repartition.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/Repartition.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/SelectColumns.scala (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/SelectColumns.txt (93%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/StratifiedRepartition.scala (94%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/SummarizeData.scala (98%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/SummarizeData.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/TextPreprocessor.scala (95%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/Timer.scala (96%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/UDFTransformer.scala (92%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/UnicodeNormalize.scala (91%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/udfs.scala (95%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/AutoTrainedModel.scala (90%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/AutoTrainer.scala (87%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/ComputeModelStatistics.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/ComputeModelStatistics.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/ComputePerInstanceStatistics.scala (91%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/ComputePerInstanceStatistics.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/TrainClassifier.scala (97%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/TrainClassifier.txt (100%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/TrainRegressor.scala (95%) rename core/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/TrainRegressor.txt (91%) rename core/src/test/python/{mmlsparktest => synapsemltest}/__init__.py (100%) rename core/src/test/python/{mmlsparktest => synapsemltest}/cyber/__init__.py (100%) rename core/src/test/python/{mmlsparktest => synapsemltest}/cyber/anamoly/__init__.py (100%) rename core/src/test/python/{mmlsparktest => synapsemltest}/cyber/anamoly/test_collaborative_filtering.py (99%) rename core/src/test/python/{mmlsparktest => synapsemltest}/cyber/anamoly/test_complement_access.py (95%) rename core/src/test/python/{mmlsparktest => synapsemltest}/cyber/explain_tester.py (98%) rename core/src/test/python/{mmlsparktest => synapsemltest}/cyber/feature/__init__.py (100%) rename core/src/test/python/{mmlsparktest => synapsemltest}/cyber/feature/test_indexers.py (97%) rename core/src/test/python/{mmlsparktest => synapsemltest}/cyber/feature/test_scalers.py (96%) rename core/src/test/python/{mmlsparktest => synapsemltest}/cyber/utils/__init__.py (100%) rename core/src/test/python/{mmlsparktest => synapsemltest}/cyber/utils/test_spark_utils.py (96%) rename core/src/test/python/{mmlsparktest => synapsemltest}/nn/__init__.py (100%) rename core/src/test/python/{mmlsparktest => synapsemltest}/nn/test_ball_tree.py (88%) rename core/src/test/python/{mmlsparktest => synapsemltest}/recommendation/__init__.py (100%) rename core/src/test/python/{mmlsparktest => synapsemltest}/recommendation/test_ranking.py (92%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/Secrets.scala (98%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/automl/VerifyFindBestModel.scala (93%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/automl/VerifyTuneHyperparameters.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/codegen/TestGen.scala (76%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/ml/HashingTFSpec.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/ml/IDFSpec.scala (96%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/ml/NGramSpec.scala (96%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/ml/OneHotEncoderSpec.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/ml/Word2VecSpec.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/schema/SparkBindingsTest.scala (89%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/schema/TestCategoricals.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/schema/VerifyFastVectorAssembler.scala (97%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/schema/VerifySparkSchema.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/serialize/ValidateComplexParamSerializer.scala (97%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/test/base/SparkSessionFactory.scala (97%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/test/base/TestBase.scala (98%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/test/benchmarks/Benchmarks.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/test/fuzzing/Fuzzing.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/core/utils/VerifyClusterUtil.scala (84%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split1/HasExplainTargetSuite.scala (80%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split1/KernelSHAPSamplerSupportSuite.scala (93%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split1/LassoRegressionSuite.scala (96%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split1/LeastSquaresRegressionSuite.scala (96%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split1/SamplerSuite.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split1/TabularLIMEExplainerSuite.scala (93%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split1/TabularSHAPExplainerSuite.scala (87%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split1/TextExplainersSuite.scala (88%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split1/VectorLIMEExplainerSuite.scala (82%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split1/VectorSHAPExplainerSuite.scala (90%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/VerifyCleanMissingData.scala (97%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/VerifyCountSelector.scala (88%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/VerifyDataConversion.scala (98%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/VerifyFeaturize.scala (98%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/VerifyValueIndexer.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/text/MultiNGramSpec.scala (88%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/text/PageSplitterSpec.scala (90%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/featurize/text/TextFeaturizerSpec.scala (93%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/flaky/PartitionConsolidatorSuite.scala (89%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/image/ImageTestUtils.scala (93%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/split1/BinaryFileReaderSuite.scala (90%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/split1/HTTPTransformerSuite.scala (90%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/split1/ImageReaderSuite.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/split1/ParserSuite.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/split1/PowerBiSuite.scala (91%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/split1/SimpleHTTPTransformerSuite.scala (90%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/split2/ContinuousHTTPSuite.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/split2/DistributedHTTPSuite.scala (96%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/split2/HTTPSuite.scala (89%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/io/split2/HTTPv2Suite.scala (98%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/isolationforest/VerifyIsolationForest.scala (84%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lime/LIMESuite.scala (80%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lime/SuperpixelSuite.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lime/SuperpixelTransformerSuite.scala (76%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lime/TextLIMESuite.scala (89%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nbtest/DatabricksTests.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nbtest/DatabricksUtilities.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nbtest/SprayUtilities.scala (97%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nbtest/SynapseTests.scala (87%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nbtest/SynapseUtilities.scala (96%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nn/BallTreeTest.scala (97%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nn/ConditionalBallTreeTest.scala (97%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/nn/KNNTest.scala (96%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/RankingAdapterSpec.scala (83%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/RankingEvaluatorSpec.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/RankingTestBase.scala (96%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/RankingTrainValidationSpec.scala (89%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/RecommendationIndexerSpec.scala (89%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/recommendation/SARSpec.scala (98%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/BatchIteratorSuite.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/CacherSuite.scala (88%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/ClassBalancerSuite.scala (91%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/DropColumnsSuite.scala (88%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/EnsembleByKeySuite.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/ExplodeSuite.scala (87%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/LambdaSuite.scala (83%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/MiniBatchTransformerSuite.scala (92%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/MultiColumnAdapterSpec.scala (88%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/RenameColumnSuite.scala (84%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/RepartitionSuite.scala (88%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/SelectColumnsSuite.scala (90%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/StratifiedRepartitionSuite.scala (94%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/SummarizeDataSuite.scala (92%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/TextPreprocessorSuite.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/TimerSuite.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/UDFSuite.scala (84%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/UDFTransformerSuite.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/stages/UnicodeNormalizeSuite.scala (89%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/VerifyComputeModelStatistics.scala (95%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/VerifyComputePerInstanceStatistics.scala (93%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/VerifyTrainClassifier.scala (96%) rename core/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/train/VerifyTrainRegressor.scala (96%) rename deep-learning/src/main/python/{mmlspark => synapse/ml}/cntk/CNTKModel.py (93%) rename deep-learning/src/main/python/{mmlspark => synapse/ml}/cntk/ImageFeaturizer.py (93%) rename deep-learning/src/main/python/{mmlspark => synapse/ml}/cntk/__init__.py (100%) rename deep-learning/src/main/python/{mmlspark => synapse/ml}/onnx/ONNXModel.py (98%) rename deep-learning/src/main/python/{mmlspark => synapse/ml}/onnx/__init__.py (100%) rename deep-learning/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/SharedParams.scala (96%) rename deep-learning/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cntk/CNTKFunctionParam.scala (81%) rename deep-learning/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cntk/CNTKModel.scala (96%) rename deep-learning/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cntk/ConversionUtils.scala (98%) rename deep-learning/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cntk/ImageFeaturizer.scala (93%) rename deep-learning/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cntk/ImageFeaturizer.txt (100%) rename deep-learning/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cntk/_CNTKModel.txt (100%) rename deep-learning/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/downloader/ModelDownloader.scala (94%) rename deep-learning/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/downloader/Schema.scala (95%) rename deep-learning/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/onnx/ONNXModel.scala (97%) rename deep-learning/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cntk/CNTKBindingSuite.scala (94%) rename deep-learning/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cntk/CNTKModelSuite.scala (94%) rename deep-learning/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/cntk/ImageFeaturizerSuite.scala (89%) rename deep-learning/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/downloader/DownloaderSuite.scala (93%) rename deep-learning/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/ImageExplainersSuite.scala (78%) rename deep-learning/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split2/ImageLIMEExplainerSuite.scala (75%) rename deep-learning/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/explainers/split3/ImageSHAPExplainerSuite.scala (72%) rename deep-learning/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lime/ImageLIMESuite.scala (86%) rename deep-learning/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/onnx/ONNXModelSuite.scala (95%) rename lightgbm/src/main/python/{mmlspark => synapse/ml}/lightgbm/LightGBMClassificationModel.py (74%) rename lightgbm/src/main/python/{mmlspark => synapse/ml}/lightgbm/LightGBMRankerModel.py (76%) rename lightgbm/src/main/python/{mmlspark => synapse/ml}/lightgbm/LightGBMRegressionModel.py (73%) rename lightgbm/src/main/python/{mmlspark => synapse/ml}/lightgbm/__init__.py (100%) rename lightgbm/src/main/python/{mmlspark => synapse/ml}/lightgbm/mixin.py (97%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/LightGBMBase.scala (96%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/LightGBMClassifier.scala (96%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/LightGBMClassifier.txt (100%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/LightGBMConstants.scala (97%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/LightGBMDelegate.scala (93%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/LightGBMModelMethods.scala (97%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/LightGBMRanker.scala (95%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/LightGBMRanker.txt (100%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/LightGBMRegressor.scala (95%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/LightGBMRegressor.txt (100%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/LightGBMUtils.scala (95%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/SharedState.scala (92%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/TaskTrainingMethods.scala (90%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/TrainUtils.scala (97%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/booster/LightGBMBooster.scala (98%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/dataset/DatasetAggregator.scala (98%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/dataset/DatasetUtils.scala (95%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/dataset/LightGBMDataset.scala (97%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/params/FObjParam.scala (82%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/params/FObjTrait.scala (81%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/params/LightGBMBoosterParam.scala (75%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/params/LightGBMParams.scala (97%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/params/TrainParams.scala (98%) rename lightgbm/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/swig/SwigUtils.scala (98%) rename lightgbm/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/split1/VerifyLightGBMClassifier.scala (98%) rename lightgbm/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/split2/VerifyLightGBMRanker.scala (85%) rename lightgbm/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/lightgbm/split2/VerifyLightGBMRegressor.scala (94%) rename opencv/src/main/python/{mmlspark => synapse/ml}/opencv/ImageTransformer.py (98%) rename opencv/src/main/python/{mmlspark => synapse/ml}/opencv/__init__.py (100%) rename opencv/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/opencv/ImageSetAugmenter.scala (91%) rename opencv/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/opencv/ImageTransformer.scala (98%) rename opencv/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/opencv/OpenCVUtils.scala (80%) rename opencv/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/image/ResizeImageTransformerSuite.scala (91%) rename opencv/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/opencv/ImageSetAugmenterSuite.scala (78%) rename opencv/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/opencv/ImageTransformerSuite.scala (97%) create mode 100644 src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala delete mode 100644 src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/FuzzingTest.scala rename vw/src/main/python/{mmlspark => synapse/ml}/vw/VowpalWabbitClassificationModel.py (91%) rename vw/src/main/python/{mmlspark => synapse/ml}/vw/VowpalWabbitClassifier.py (85%) rename vw/src/main/python/{mmlspark => synapse/ml}/vw/VowpalWabbitContextualBandit.py (93%) rename vw/src/main/python/{mmlspark => synapse/ml}/vw/VowpalWabbitContextualBanditModel.py (90%) rename vw/src/main/python/{mmlspark => synapse/ml}/vw/VowpalWabbitRegressionModel.py (91%) rename vw/src/main/python/{mmlspark => synapse/ml}/vw/VowpalWabbitRegressor.py (86%) rename vw/src/main/python/{mmlspark => synapse/ml}/vw/__init__.py (100%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/HasNumBits.scala (95%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/HasSumcollisions.scala (93%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VectorUtils.scala (97%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VectorZipper.scala (85%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VowpalWabbitBase.scala (98%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VowpalWabbitBaseModel.scala (94%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VowpalWabbitClassifier.scala (95%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VowpalWabbitContextualBandit.scala (98%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VowpalWabbitFeaturizer.scala (97%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VowpalWabbitInteractions.scala (93%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VowpalWabbitMurmurWithPrefix.scala (98%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VowpalWabbitRegressor.scala (93%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VowpalWabbitUtil.scala (98%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/featurizer/BooleanFeaturizer.scala (97%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/featurizer/ElementFeaturizer.scala (87%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/featurizer/Featurizer.scala (88%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/featurizer/MapFeaturizer.scala (96%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/featurizer/MapStringFeaturizer.scala (96%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/featurizer/NumericFeaturizer.scala (97%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/featurizer/SeqFeaturizer.scala (96%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/featurizer/StringFeaturizer.scala (96%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/featurizer/StringSplitFeaturizer.scala (97%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/featurizer/StructFeaturizer.scala (97%) rename vw/src/main/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/featurizer/VectorFeaturizer.scala (96%) rename vw/src/test/python/{mmlsparktest => synapsemltest}/vw/__init__.py (100%) rename vw/src/test/python/{mmlsparktest => synapsemltest}/vw/test_vw.py (86%) rename vw/src/test/python/{mmlsparktest => synapsemltest}/vw/test_vw_cb.py (97%) rename vw/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VWContextualBandidSpec.scala (98%) rename vw/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VerifyVectorZipper.scala (89%) rename vw/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VerifyVowpalWabbitClassifier.scala (97%) rename vw/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VerifyVowpalWabbitFeaturizer.scala (98%) rename vw/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VerifyVowpalWabbitInteractions.scala (91%) rename vw/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VerifyVowpalWabbitMurmurWithPrefix.scala (95%) rename vw/src/test/scala/com/microsoft/{ml/spark => azure/synapse/ml}/vw/VerifyVowpalWabbitRegressor.scala (96%) diff --git a/.chglog/CHANGELOG.tpl.md b/.chglog/CHANGELOG.tpl.md index 55681a54d2..bbe9499ab0 100644 --- a/.chglog/CHANGELOG.tpl.md +++ b/.chglog/CHANGELOG.tpl.md @@ -27,7 +27,7 @@ {{ end -}} ## Acknowledgements -We would like to acknowledge the developers and contributors, both internal and external who helped create this version of MMLSpark.\n +We would like to acknowledge the developers and contributors, both internal and external who helped create this version of SynapseML.\n {{ end -}} diff --git a/.chglog/config.yml b/.chglog/config.yml index 3ec3535b0e..2ca9ab8a7e 100644 --- a/.chglog/config.yml +++ b/.chglog/config.yml @@ -2,7 +2,7 @@ style: github template: CHANGELOG.tpl.md info: title: CHANGELOG - repository_url: https://github.com/Azure/mmlspark + repository_url: https://github.com/Microsoft/SynapseML options: commit_groups: title_maps: diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index ae80dc38c3..1ba366c52d 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -17,7 +17,7 @@ Steps to reproduce the behavior, code snippets encouraged A clear and concise description of what you expected to happen. **Info (please complete the following information):** - - MMLSpark Version: [e.g. v0.17] + - SynapseML Version: [e.g. v0.17] - Spark Version [e.g. 2.4.3] - Spark Platform [e.g. Databricks] @@ -26,7 +26,7 @@ A clear and concise description of what you expected to happen. Please post the stacktrace here if applicable ``` -If the bug pertains to a specific feature please tag the appropriate [CODEOWNER](https://github.com/Azure/mmlspark/blob/master/CODEOWNERS) for better visibility +If the bug pertains to a specific feature please tag the appropriate [CODEOWNER](https://github.com/Microsoft/SynapseML/blob/master/CODEOWNERS) for better visibility **Additional context** Add any other context about the problem here. diff --git a/.github/config.yml b/.github/config.yml index 6f5ed1d450..80a174db86 100644 --- a/.github/config.yml +++ b/.github/config.yml @@ -24,7 +24,7 @@ newPRWelcomeComment: > - `style: Remove nulls from CNTKModel` - `test: Add test coverage for CNTKModel` - Make sure to check out the [developer guide](https://github.com/Azure/mmlspark/blob/master/CONTRIBUTING.md) for guidance on testing your change. + Make sure to check out the [developer guide](https://github.com/Microsoft/SynapseML/blob/master/CONTRIBUTING.md) for guidance on testing your change. # Configuration for first-pr-merge - https://github.com/behaviorbot/first-pr-merge diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 739b6065c4..85ef889def 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,4 @@ -## Interested in contributing to MMLSpark? We're excited to work with you. +## Interested in contributing to SynapseML? We're excited to work with you. ### You can contribute in many ways: @@ -32,7 +32,7 @@ this process: #### Implement your contribution -- Fork the MMLSpark repository. +- Fork the SynapseML repository. - Implement your algorithm in Scala, using our wrapper generation mechanism to produce PySpark bindings. - Use SparkML `PipelineStage`s so your algorithm can be used as a part of @@ -41,7 +41,7 @@ this process: - Implement model saving and loading by extending SparkML `MLReadable`. - Use good Scala style. - Binary dependencies should be on Maven Central. -- See this [pull request](https://github.com/Azure/mmlspark/pull/22) for an +- See this [pull request](https://github.com/Microsoft/SynapseML/pull/22) for an example contribution. #### Implement tests @@ -65,7 +65,7 @@ this process: - In most cases, you should squash your commits into one. - Open a pull request, and link it to the discussion issue you created earlier. -- An MMLSpark core team member will trigger a build to test your changes. +- An SynapseML core team member will trigger a build to test your changes. - Fix any build failures. (The pull request will have comments from the build with useful links.) - Wait for code reviews from core team members and others. diff --git a/README.md b/README.md index ba71da6e1a..48bdb37f0d 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,28 @@ -![MMLSpark](https://mmlspark.azureedge.net/icons/mmlspark.svg) +![SynapseML](https://mmlspark.azureedge.net/icons/mmlspark.svg) # Microsoft Machine Learning for Apache Spark -[![Build Status](https://msdata.visualstudio.com/A365/_apis/build/status/microsoft.SynapseML?branchName=master)](https://msdata.visualstudio.com/A365/_build/latest?definitionId=17563&branchName=master) [![codecov](https://codecov.io/gh/Azure/mmlspark/branch/master/graph/badge.svg)](https://codecov.io/gh/Azure/mmlspark) [![Gitter](https://badges.gitter.im/Microsoft/MMLSpark.svg)](https://gitter.im/Microsoft/MMLSpark?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) +[![Build Status](https://msdata.visualstudio.com/A365/_apis/build/status/microsoft.SynapseML?branchName=master)](https://msdata.visualstudio.com/A365/_build/latest?definitionId=17563&branchName=master) [![codecov](https://codecov.io/gh/Microsoft/SynapseML/branch/master/graph/badge.svg)](https://codecov.io/gh/Microsoft/SynapseML) [![Gitter](https://badges.gitter.im/Microsoft/MMLSpark.svg)](https://gitter.im/Microsoft/MMLSpark?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) -[![Release Notes](https://img.shields.io/badge/release-notes-blue)](https://github.com/Azure/mmlspark/releases) [![Scala Docs](https://img.shields.io/static/v1?label=api%20docs&message=scala&color=blue&logo=scala)](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/index.html#package) [![PySpark Docs](https://img.shields.io/static/v1?label=api%20docs&message=python&color=blue&logo=python)](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/index.html) [![Academic Paper](https://img.shields.io/badge/academic-paper-7fdcf7)](https://arxiv.org/abs/1810.08744) +[![Release Notes](https://img.shields.io/badge/release-notes-blue)](https://github.com/Microsoft/SynapseML/releases) [![Scala Docs](https://img.shields.io/static/v1?label=api%20docs&message=scala&color=blue&logo=scala)](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/index.html#package) [![PySpark Docs](https://img.shields.io/static/v1?label=api%20docs&message=python&color=blue&logo=python)](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/index.html) [![Academic Paper](https://img.shields.io/badge/academic-paper-7fdcf7)](https://arxiv.org/abs/1810.08744) -[![Version](https://img.shields.io/badge/version-1.0.0--rc4-blue)](https://github.com/Azure/mmlspark/releases) [![Snapshot Version](https://mmlspark.blob.core.windows.net/icons/badges/master_version3.svg)](#sbt) +[![Version](https://img.shields.io/badge/version-1.0.0--rc4-blue)](https://github.com/Microsoft/SynapseML/releases) [![Snapshot Version](https://mmlspark.blob.core.windows.net/icons/badges/master_version3.svg)](#sbt) -MMLSpark is an ecosystem of tools aimed towards expanding the distributed computing framework +SynapseML is an ecosystem of tools aimed towards expanding the distributed computing framework [Apache Spark](https://github.com/apache/spark) in several new directions. -MMLSpark adds many deep learning and data science tools to the Spark ecosystem, +SynapseML adds many deep learning and data science tools to the Spark ecosystem, including seamless integration of Spark Machine Learning pipelines with [Microsoft Cognitive Toolkit (CNTK)](https://github.com/Microsoft/CNTK), [LightGBM](https://github.com/Microsoft/LightGBM) and [OpenCV](http://www.opencv.org/). These tools enable powerful and highly-scalable predictive and analytical models for a variety of datasources. -MMLSpark also brings new networking capabilities to the Spark Ecosystem. With the HTTP on Spark project, users -can embed **any** web service into their SparkML models. In this vein, MMLSpark provides easy to use +SynapseML also brings new networking capabilities to the Spark Ecosystem. With the HTTP on Spark project, users +can embed **any** web service into their SparkML models. In this vein, SynapseML provides easy to use SparkML transformers for a wide variety of [Microsoft Cognitive Services](https://azure.microsoft.com/en-us/services/cognitive-services/). For production grade deployment, the Spark Serving project enables high throughput, sub-millisecond latency web services, backed by your Spark cluster. -MMLSpark requires Scala 2.12, Spark 3.0+, and Python 3.6+. +SynapseML requires Scala 2.12, Spark 3.0+, and Python 3.6+. See the API documentation [for Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/index.html#package) and [for PySpark](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/index.html). @@ -60,7 +60,7 @@ PySpark](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/index.htm | | | | |:--:|:--:|:--:| -| **Isolation Forest on Spark** | [**CyberML**](https://github.com/Azure/mmlspark/blob/master/notebooks/CyberML%20-%20Anomalous%20Access%20Detection.ipynb) | **Conditional KNN** | +| **Isolation Forest on Spark** | [**CyberML**](https://github.com/Microsoft/SynapseML/blob/master/notebooks/CyberML%20-%20Anomalous%20Access%20Detection.ipynb) | **Conditional KNN** | | Distributed Nonlinear Outlier Detection | Machine Learning Tools for Cyber Security | Scalable KNN Models with Conditional Queries | @@ -71,7 +71,7 @@ PySpark](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/index.htm - Fit a LightGBM classification or regression model on a biochemical dataset ([example 3]), to learn more check out the [LightGBM documentation page](docs/lightgbm.md). -- Deploy a deep network as a distributed web service with [MMLSpark +- Deploy a deep network as a distributed web service with [SynapseML Serving](docs/mmlspark-serving.md) - Use web services in Spark with [HTTP on Apache Spark](docs/http.md) - Use Bi-directional LSTMs from Keras for medical entity extraction @@ -97,7 +97,7 @@ See our [notebooks](notebooks/) for all examples. [example 4]: notebooks/TextAnalytics%20-%20Amazon%20Book%20Reviews.ipynb "Amazon Book Reviews - TextFeaturizer" -[example 5]: notebooks/HyperParameterTuning%20-%20Fighting%20Breast%20Cancer.ipynb "Hyperparameter Tuning with MMLSpark" +[example 5]: notebooks/HyperParameterTuning%20-%20Fighting%20Breast%20Cancer.ipynb "Hyperparameter Tuning with SynapseML" [example 6]: notebooks/DeepLearning%20-%20CIFAR10%20Convolutional%20Network.ipynb "CIFAR10 CNTK CNN Evaluation" @@ -124,9 +124,9 @@ classify images in the CIFAR-10 dataset. View the whole source code in notebook ```python ... -import mmlspark +import synapse.ml # Initialize CNTKModel and define input and output columns -cntkModel = mmlspark.cntk.CNTKModel() \ +cntkModel = synapse.ml.cntk.CNTKModel() \ .setInputCol("images").setOutputCol("output") \ .setModelLocation(modelFile) # Train on dataset with internal spark pipeline @@ -134,7 +134,7 @@ scoredImages = cntkModel.transform(imagesWithLabels) ... ``` -See [other sample notebooks](notebooks/) as well as the MMLSpark +See [other sample notebooks](notebooks/) as well as the SynapseML documentation for [Scala](http://mmlspark.azureedge.net/docs/scala/) and [PySpark](http://mmlspark.azureedge.net/docs/pyspark/). @@ -142,17 +142,17 @@ documentation for [Scala](http://mmlspark.azureedge.net/docs/scala/) and ### Python -To try out MMLSpark on a Python (or Conda) installation you can get Spark +To try out SynapseML on a Python (or Conda) installation you can get Spark installed via pip with `pip install pyspark`. You can then use `pyspark` as in the above example, or from python: ```python import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ - .config("spark.jars.packages", "com.microsoft.ml.spark:mmlspark:1.0.0-rc4") \ + .config("spark.jars.packages", "com.microsoft.azure:synapseml:1.0.0-rc4") \ .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ .getOrCreate() -import mmlspark +import synapse.ml ``` ### SBT @@ -161,70 +161,70 @@ If you are building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "MMLSpark" at "https://mmlspark.azureedge.net/maven" -libraryDependencies += "com.microsoft.ml.spark" %% "mmlspark" % "1.0.0-rc4" +resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +libraryDependencies += "com.microsoft.azure" %% "synapseml" % "1.0.0-rc4" ``` ### Spark package -MMLSpark can be conveniently installed on existing Spark clusters via the +SynapseML can be conveniently installed on existing Spark clusters via the `--packages` option, examples: ```bash -spark-shell --packages com.microsoft.ml.spark:mmlspark:1.0.0-rc4 -pyspark --packages com.microsoft.ml.spark:mmlspark:1.0.0-rc4 -spark-submit --packages com.microsoft.ml.spark:mmlspark:1.0.0-rc4 MyApp.jar +spark-shell --packages com.microsoft.azure:synapseml:1.0.0-rc4 +pyspark --packages com.microsoft.azure:synapseml:1.0.0-rc4 +spark-submit --packages com.microsoft.azure:synapseml:1.0.0-rc4 MyApp.jar ``` -This can be used in other Spark contexts too. For example, you can use MMLSpark +This can be used in other Spark contexts too. For example, you can use SynapseML in [AZTK](https://github.com/Azure/aztk/) by [adding it to the `.aztk/spark-defaults.conf` file](https://github.com/Azure/aztk/wiki/PySpark-on-Azure-with-AZTK#optional-set-up-mmlspark). ### Databricks -To install MMLSpark on the [Databricks +To install SynapseML on the [Databricks cloud](http://community.cloud.databricks.com), create a new [library from Maven coordinates](https://docs.databricks.com/user-guide/libraries.html#libraries-from-maven-pypi-or-spark-packages) in your workspace. -For the coordinates use: `com.microsoft.ml.spark:mmlspark:1.0.0-rc4` +For the coordinates use: `com.microsoft.azure:synapseml:1.0.0-rc4` with the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.12 and Scala 2.12. -You can use MMLSpark in both your Scala and PySpark notebooks. To get started with our example notebooks import the following databricks archive: +You can use SynapseML in both your Scala and PySpark notebooks. To get started with our example notebooks import the following databricks archive: -`https://mmlspark.blob.core.windows.net/dbcs/MMLSparkExamplesv1.0.0-rc4.dbc` +`https://mmlspark.blob.core.windows.net/dbcs/SynapseMLExamplesv1.0.0-rc4.dbc` ### Apache Livy and HDInsight -To install MMLSpark from within a Jupyter notebook served by Apache Livy the following configure magic can be used. You will need to start a new session after this configure cell is executed. +To install SynapseML from within a Jupyter notebook served by Apache Livy the following configure magic can be used. You will need to start a new session after this configure cell is executed. Excluding certain packages from the library may be necessary due to current issues with Livy 0.5 ``` %%configure -f { - "name": "mmlspark", + "name": "synapseml", "conf": { - "spark.jars.packages": "com.microsoft.ml.spark:mmlspark:1.0.0-rc4", + "spark.jars.packages": "com.microsoft.azure:synapseml:1.0.0-rc4", "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12" } } ``` -In Azure Synapse, "spark.yarn.user.classpath.first" should be set to "true" to override the existing MMLSpark packages +In Azure Synapse, "spark.yarn.user.classpath.first" should be set to "true" to override the existing SynapseML packages ``` %%configure -f { - "name": "mmlspark", + "name": "synapseml", "conf": { - "spark.jars.packages": "com.microsoft.ml.spark:mmlspark:1.0.0-rc4", + "spark.jars.packages": "com.microsoft.azure:synapseml:1.0.0-rc4", "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12", "spark.yarn.user.classpath.first": "true" @@ -234,7 +234,7 @@ In Azure Synapse, "spark.yarn.user.classpath.first" should be set to "true" to o ### Docker -The easiest way to evaluate MMLSpark is via our pre-built Docker container. To +The easiest way to evaluate SynapseML is via our pre-built Docker container. To do so, run the following command: ```bash @@ -249,7 +249,7 @@ notebooks. See the [documentation](docs/docker.md) for more on Docker use. ### GPU VM Setup -MMLSpark can be used to train deep learning models on GPU nodes from a Spark +SynapseML can be used to train deep learning models on GPU nodes from a Spark application. See the instructions for [setting up an Azure GPU VM](docs/gpu-setup.md). @@ -257,17 +257,17 @@ VM](docs/gpu-setup.md). ### Building from source -MMLSpark has recently transitioned to a new build infrastructure. +SynapseML has recently transitioned to a new build infrastructure. For detailed developer docs please see the [Developer Readme](docs/developer-readme.md) -If you are an existing mmlspark developer, you will need to reconfigure your +If you are an existing synapsemldeveloper, you will need to reconfigure your development setup. We now support platform independent development and better integrate with intellij and SBT. If you encounter issues please reach out to our support email! ### R (Beta) -To try out MMLSpark using the R autogenerated wrappers [see our +To try out SynapseML using the R autogenerated wrappers [see our instructions](docs/R-setup.md). Note: This feature is still under development and some necessary custom wrappers may be missing. @@ -277,9 +277,9 @@ and some necessary custom wrappers may be missing. - [Conditional Image Retrieval](https://arxiv.org/abs/2007.07177) -- [MMLSpark: Unifying Machine Learning Ecosystems at Massive Scales](https://arxiv.org/abs/1810.08744) +- [SynapseML: Unifying Machine Learning Ecosystems at Massive Scales](https://arxiv.org/abs/1810.08744) -- [Flexible and Scalable Deep Learning with MMLSpark](https://arxiv.org/abs/1804.04031) +- [Flexible and Scalable Deep Learning with SynapseML](https://arxiv.org/abs/1804.04031) ## Learn More @@ -287,7 +287,7 @@ and some necessary custom wrappers may be missing. - Watch our keynote demos at [the Spark+AI Summit 2019], [the Spark+AI European Summit 2018], and [the Spark+AI Summit 2018]. -- See how MMLSpark is used to [help endangered species]. +- See how SynapseML is used to [help endangered species]. - Explore generative adversarial artwork in [our collaboration with The MET and MIT]. @@ -301,7 +301,7 @@ and some necessary custom wrappers may be missing. [the Spark+AI European Summit 2018]: https://youtu.be/N3ozCZXeOeU?t=472 -[our paper]: https://arxiv.org/abs/1804.04031 "Flexible and Scalable Deep Learning with MMLSpark" +[our paper]: https://arxiv.org/abs/1804.04031 "Flexible and Scalable Deep Learning with SynapseML" [help endangered species]: https://www.microsoft.com/en-us/ai/ai-lab-stories?activetab=pivot1:primaryr3 "Identifying snow leopards with AI" @@ -309,9 +309,9 @@ and some necessary custom wrappers may be missing. [our collaboration with Apache Spark]: https://blogs.technet.microsoft.com/machinelearning/2018/03/05/image-data-support-in-apache-spark/ "Image Data Support in Apache Spark" -[MMLSpark in Azure Machine Learning]: https://docs.microsoft.com/en-us/azure/machine-learning/preview/how-to-use-mmlspark "How to Use Microsoft Machine Learning Library for Apache Spark" +[SynapseML in Azure Machine Learning]: https://docs.microsoft.com/en-us/azure/machine-learning/preview/how-to-use-mmlspark "How to Use Microsoft Machine Learning Library for Apache Spark" -[MMLSpark at the Spark Summit]: https://databricks.com/session/mmlspark-lessons-from-building-a-sparkml-compatible-machine-learning-library-for-apache-spark "MMLSpark: Lessons from Building a SparkML-Compatible Machine Learning Library for Apache Spark" +[SynapseML at the Spark Summit]: https://databricks.com/session/mmlspark-lessons-from-building-a-sparkml-compatible-machine-learning-library-for-apache-spark "MMLSpark: Lessons from Building a SparkML-Compatible Machine Learning Library for Apache Spark" ## Contributing & feedback @@ -338,7 +338,7 @@ Issue](https://help.github.com/articles/creating-an-issue/). - [Recommenders](https://github.com/Microsoft/Recommenders) -- [JPMML-SparkML plugin for converting MMLSpark LightGBM models to +- [JPMML-SparkML plugin for converting SynapseML LightGBM models to PMML](https://github.com/alipay/jpmml-sparkml-lightgbm) - [Microsoft Cognitive Toolkit](https://github.com/Microsoft/CNTK) diff --git a/build.sbt b/build.sbt index 0ba5596e9b..e700ae718b 100644 --- a/build.sbt +++ b/build.sbt @@ -8,10 +8,10 @@ import scala.xml.transform.{RewriteRule, RuleTransformer} import BuildUtils._ import xerial.sbt.Sonatype._ -val condaEnvName = "mmlspark" +val condaEnvName = "synapseml" val sparkVersion = "3.1.2" -name := "mmlspark" -ThisBuild / organization := "com.microsoft.ml.spark" +name := "synapseml" +ThisBuild / organization := "com.microsoft.azure" ThisBuild / scalaVersion := "2.12.10" val scalaMajorVersion = 2.12 @@ -118,7 +118,7 @@ generatePythonDoc := { ).value val targetDir = artifactPath.in(packageBin).in(Compile).in(root).value.getParentFile val codegenDir = join(targetDir, "generated") - val dir = join(codegenDir, "src", "python", "mmlspark") + val dir = join(codegenDir, "src", "python", "synapse","ml") join(dir, "__init__.py").createNewFile() runCmd(activateCondaEnv.value ++ Seq("sphinx-apidoc", "-f", "-o", "doc", "."), dir) runCmd(activateCondaEnv.value ++ Seq("sphinx-build", "-b", "html", "doc", "../../../doc/pyspark"), dir) @@ -145,7 +145,7 @@ publishDocs := { uploadToBlob(unifiedDocDir.toString, version.value, "docs") } -val release = TaskKey[Unit]("release", "publish the library to mmlspark blob") +val release = TaskKey[Unit]("release", "publish the library to synapseml blob") release := Def.taskDyn { val v = isSnapshot.value if (!v) { @@ -159,7 +159,7 @@ release := Def.taskDyn { } } -val publishBadges = TaskKey[Unit]("publishBadges", "publish badges to mmlspark blob") +val publishBadges = TaskKey[Unit]("publishBadges", "publish badges to synapseml blob") publishBadges := { def enc(s: String): String = { s.replaceAllLiterally("_", "__").replaceAllLiterally(" ", "_").replaceAllLiterally("-", "--") @@ -205,8 +205,8 @@ lazy val core = (project in file("core")) sbtVersion, baseDirectory ), - name := "mmlspark-core", - buildInfoPackage := "com.microsoft.ml.spark.build", + name := "synapseml-core", + buildInfoPackage := "com.microsoft.azure.synapse.ml.build", ): _*) lazy val deepLearning = (project in file("deep-learning")) @@ -217,7 +217,7 @@ lazy val deepLearning = (project in file("deep-learning")) "com.microsoft.cntk" % "cntk" % "2.4", "com.microsoft.onnxruntime" % "onnxruntime_gpu" % "1.8.1" ), - name := "mmlspark-deep-learning", + name := "synapseml-deep-learning", ): _*) lazy val lightgbm = (project in file("lightgbm")) @@ -225,7 +225,7 @@ lazy val lightgbm = (project in file("lightgbm")) .dependsOn(core % "test->test;compile->compile") .settings(settings ++ Seq( libraryDependencies += ("com.microsoft.ml.lightgbm" % "lightgbmlib" % "3.2.110"), - name := "mmlspark-lightgbm" + name := "synapseml-lightgbm" ): _*) lazy val vw = (project in file("vw")) @@ -233,7 +233,7 @@ lazy val vw = (project in file("vw")) .dependsOn(core % "test->test;compile->compile") .settings(settings ++ Seq( libraryDependencies += ("com.github.vowpalwabbit" % "vw-jni" % "8.9.1"), - name := "mmlspark-vw" + name := "synapseml-vw" ): _*) lazy val cognitive = (project in file("cognitive")) @@ -242,7 +242,7 @@ lazy val cognitive = (project in file("cognitive")) .settings(settings ++ Seq( libraryDependencies += ("com.microsoft.cognitiveservices.speech" % "client-sdk" % "1.14.0"), resolvers += speechResolver, - name := "mmlspark-cognitive" + name := "synapseml-cognitive" ): _*) lazy val opencv = (project in file("opencv")) @@ -250,7 +250,7 @@ lazy val opencv = (project in file("opencv")) .dependsOn(core % "test->test;compile->compile") .settings(settings ++ Seq( libraryDependencies += ("org.openpnp" % "opencv" % "3.2.0-1"), - name := "mmlspark-opencv" + name := "synapseml-opencv" ): _*) lazy val root = (project in file(".")) @@ -265,7 +265,7 @@ lazy val root = (project in file(".")) .enablePlugins(ScalaUnidocPlugin && SbtPlugin) .disablePlugins(CodegenPlugin) .settings(settings ++ Seq( - name := "mmlspark", + name := "synapseml", )) val setupTask = TaskKey[Unit]("setup", "set up library for intellij") @@ -278,8 +278,8 @@ setupTask := { } sonatypeProjectHosting := Some( - GitHubHosting("Azure", "MMLSpark", "mmlspark-support@microsot.com")) -homepage := Some(url("https://github.com/Azure/mmlspark")) + GitHubHosting("Azure", "SynapseML", "mmlspark-support@microsot.com")) +homepage := Some(url("https://github.com/Microsoft/SynapseML")) developers := List( Developer("mhamilton723", "Mark Hamilton", "mmlspark-support@microsoft.com", url("https://github.com/mhamilton723")), @@ -289,7 +289,7 @@ developers := List( "mmlspark-support@microsoft.com", url("https://github.com/drdarshan")) ) -licenses += ("MIT", url("https://github.com/Azure/mmlspark/blob/master/LICENSE")) +licenses += ("MIT", url("https://github.com/Microsoft/SynapseML/blob/master/LICENSE")) credentials += Credentials("Sonatype Nexus Repository Manager", "oss.sonatype.org", diff --git a/cognitive/src/main/python/mmlspark/cognitive/AzureSearchWriter.py b/cognitive/src/main/python/synapse/ml/cognitive/AzureSearchWriter.py similarity index 82% rename from cognitive/src/main/python/mmlspark/cognitive/AzureSearchWriter.py rename to cognitive/src/main/python/synapse/ml/cognitive/AzureSearchWriter.py index e7fbc5a72d..8723ca793d 100644 --- a/cognitive/src/main/python/mmlspark/cognitive/AzureSearchWriter.py +++ b/cognitive/src/main/python/synapse/ml/cognitive/AzureSearchWriter.py @@ -14,14 +14,14 @@ def streamToAzureSearch(df, **options): jvm = SparkContext.getOrCreate()._jvm - writer = jvm.com.microsoft.ml.spark.cognitive.AzureSearchWriter + writer = jvm.com.microsoft.azure.synapse.ml.cognitive.AzureSearchWriter return writer.stream(df._jdf, options) setattr(pyspark.sql.DataFrame, 'streamToAzureSearch', streamToAzureSearch) def writeToAzureSearch(df, **options): jvm = SparkContext.getOrCreate()._jvm - writer = jvm.com.microsoft.ml.spark.cognitive.AzureSearchWriter + writer = jvm.com.microsoft.azure.synapse.ml.cognitive.AzureSearchWriter writer.write(df._jdf, options) setattr(pyspark.sql.DataFrame, 'writeToAzureSearch', writeToAzureSearch) diff --git a/cognitive/src/main/python/mmlspark/cognitive/BingImageSearch.py b/cognitive/src/main/python/synapse/ml/cognitive/BingImageSearch.py similarity index 86% rename from cognitive/src/main/python/mmlspark/cognitive/BingImageSearch.py rename to cognitive/src/main/python/synapse/ml/cognitive/BingImageSearch.py index 751d8f8e7f..08440ece6a 100644 --- a/cognitive/src/main/python/mmlspark/cognitive/BingImageSearch.py +++ b/cognitive/src/main/python/synapse/ml/cognitive/BingImageSearch.py @@ -6,8 +6,8 @@ if sys.version >= '3': basestring = str -from mmlspark.cognitive._BingImageSearch import _BingImageSearch -from mmlspark.stages import Lambda +from synapse.ml.cognitive._BingImageSearch import _BingImageSearch +from synapse.ml.stages import Lambda from pyspark.ml.common import inherit_doc from pyspark.sql import SparkSession @@ -32,10 +32,10 @@ def setMarketCol(self, value): @staticmethod def getUrlTransformer(imageCol, urlCol): - bis = SparkSession.builder.getOrCreate()._jvm.com.microsoft.ml.spark.cognitive.BingImageSearch + bis = SparkSession.builder.getOrCreate()._jvm.com.microsoft.azure.synapse.ml.cognitive.BingImageSearch return Lambda._from_java(bis.getUrlTransformer(imageCol,urlCol)) @staticmethod def downloadFromUrls(pathCol, bytesCol, concurrency, timeout): - bis = SparkSession.builder.getOrCreate()._jvm.com.microsoft.ml.spark.cognitive.BingImageSearch + bis = SparkSession.builder.getOrCreate()._jvm.com.microsoft.azure.synapse.ml.cognitive.BingImageSearch return Lambda._from_java(bis.downloadFromUrls(pathCol, bytesCol, concurrency, timeout)) diff --git a/cognitive/src/main/python/mmlspark/cognitive/__init__.py b/cognitive/src/main/python/synapse/ml/cognitive/__init__.py similarity index 100% rename from cognitive/src/main/python/mmlspark/cognitive/__init__.py rename to cognitive/src/main/python/synapse/ml/cognitive/__init__.py diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AnomalyDetection.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AnomalyDetection.scala similarity index 96% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AnomalyDetection.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AnomalyDetection.scala index 5e321a48ca..4f9dc2112c 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AnomalyDetection.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AnomalyDetection.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.cognitive.AnomalyDetectorProtocol._ -import com.microsoft.ml.spark.core.contracts.HasOutputCol -import com.microsoft.ml.spark.core.schema.DatasetExtensions -import com.microsoft.ml.spark.io.http.ErrorUtils -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.core.contracts.HasOutputCol +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.io.http.ErrorUtils +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import AnomalyDetectorProtocol._ import org.apache.http.entity.{AbstractHttpEntity, StringEntity} import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.ComplexParamsReadable diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AnomalyDetectorSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AnomalyDetectorSchemas.scala similarity index 95% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AnomalyDetectorSchemas.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AnomalyDetectorSchemas.scala index 43c9062361..46ecd86fae 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AnomalyDetectorSchemas.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AnomalyDetectorSchemas.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.SparkBindings +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings import spray.json.DefaultJsonProtocol._ import spray.json.RootJsonFormat diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AudioStreams.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AudioStreams.scala similarity index 98% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AudioStreams.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AudioStreams.scala index 8bbbb236fa..152c665b62 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AudioStreams.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AudioStreams.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark +package com.microsoft.azure.synapse.ml.cognitive import java.io.InputStream diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearch.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AzureSearch.scala similarity index 96% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearch.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AzureSearch.scala index b405bb13b0..f7b4c742d6 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearch.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AzureSearch.scala @@ -1,27 +1,26 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.io.http.{ErrorUtils, SimpleHTTPTransformer} -import com.microsoft.ml.spark.io.powerbi.StreamMaterializer -import com.microsoft.ml.spark.stages.{FixedMiniBatchTransformer, HasBatchSize, Lambda} +import com.microsoft.azure.synapse.ml.cognitive.AzureSearchProtocol._ +import com.microsoft.azure.synapse.ml.io.http.{ErrorUtils, SimpleHTTPTransformer} +import com.microsoft.azure.synapse.ml.io.powerbi.StreamMaterializer +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.{FixedMiniBatchTransformer, HasBatchSize, Lambda} import org.apache.http.Consts import org.apache.http.entity.{AbstractHttpEntity, ContentType, StringEntity} import org.apache.log4j.{LogManager, Logger} +import org.apache.spark.injections.UDFUtils import org.apache.spark.internal.{Logging => SLogging} import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ import org.apache.spark.ml.{ComplexParamsReadable, NamespaceInjections, PipelineModel} -import org.apache.spark.sql.functions.{col, expr, struct, to_json, udf} +import org.apache.spark.sql.functions.{col, expr, struct, to_json} import org.apache.spark.sql.streaming.DataStreamWriter import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Dataset, Row} -import com.microsoft.ml.spark.cognitive.AzureSearchProtocol._ import spray.json._ -import DefaultJsonProtocol._ -import com.microsoft.ml.spark.logging.BasicLogging -import org.apache.spark.injections.UDFUtils import scala.collection.JavaConverters._ diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchAPI.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AzureSearchAPI.scala similarity index 97% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchAPI.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AzureSearchAPI.scala index 9e66cd056b..9a8b478793 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchAPI.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AzureSearchAPI.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.cognitive.AzureSearchProtocol._ -import com.microsoft.ml.spark.cognitive.RESTHelpers._ +import AzureSearchProtocol._ +import RESTHelpers._ import org.apache.commons.io.IOUtils import org.apache.http.client.methods.{HttpGet, HttpPost} import org.apache.http.entity.StringEntity diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AzureSearchSchemas.scala similarity index 95% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchSchemas.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AzureSearchSchemas.scala index af31ea7f1d..adb61926d2 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchSchemas.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/AzureSearchSchemas.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.SparkBindings +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings import spray.json.DefaultJsonProtocol._ import spray.json.{JsonFormat, RootJsonFormat} diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/BingImageSearch.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/BingImageSearch.scala similarity index 98% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/BingImageSearch.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/BingImageSearch.scala index 60e2aa00a3..f6cbf239e0 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/BingImageSearch.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/BingImageSearch.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.utils.AsyncUtils -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages.Lambda +import com.microsoft.azure.synapse.ml.core.utils.AsyncUtils +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.Lambda import org.apache.commons.io.IOUtils import org.apache.http.client.methods.{HttpGet, HttpRequestBase} import org.apache.http.entity.AbstractHttpEntity diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceBase.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/CognitiveServiceBase.scala similarity index 96% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceBase.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/CognitiveServiceBase.scala index 56f87eba4f..4f684687bd 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceBase.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/CognitiveServiceBase.scala @@ -1,14 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive - -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.HasOutputCol -import com.microsoft.ml.spark.core.schema.DatasetExtensions -import com.microsoft.ml.spark.io.http._ -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages.{DropColumns, Lambda} +package com.microsoft.azure.synapse.ml.cognitive + +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.HasOutputCol +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.io.http._ +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.{DropColumns, Lambda} import org.apache.http.NameValuePair import org.apache.http.client.methods.{HttpEntityEnclosingRequestBase, HttpPost, HttpRequestBase} import org.apache.http.client.utils.URLEncodedUtils diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/CognitiveServiceSchemas.scala similarity index 84% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceSchemas.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/CognitiveServiceSchemas.scala index faf1735976..786cf2a31b 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceSchemas.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/CognitiveServiceSchemas.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive case class Rectangle(left: Int, top: Int, width: Int, height: Int) diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVision.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVision.scala similarity index 97% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVision.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVision.scala index 238242825a..f990e82a8e 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVision.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVision.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.io.http.HandlingUtils._ -import com.microsoft.ml.spark.io.http._ -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages.UDFTransformer +import com.microsoft.azure.synapse.ml.io.http.HandlingUtils._ +import com.microsoft.azure.synapse.ml.io.http._ +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.UDFTransformer +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.commons.io.IOUtils import org.apache.http.client.methods.{HttpEntityEnclosingRequestBase, HttpGet, HttpRequestBase} import org.apache.http.entity.{AbstractHttpEntity, ByteArrayEntity, ContentType, StringEntity} @@ -216,7 +216,7 @@ trait BasicAsyncReply extends HasAsyncReply { val get = new HttpGet() get.setURI(location) key.foreach(get.setHeader("Ocp-Apim-Subscription-Key", _)) - get.setHeader("User-Agent", s"mmlspark/${BuildInfo.version}${HeaderValues.PlatformInfo}") + get.setHeader("User-Agent", s"synapseml/${BuildInfo.version}${HeaderValues.PlatformInfo}") val resp = convertAndClose(sendWithRetries(client, get, getBackoffs)) get.releaseConnection() val status = IOUtils.toString(resp.entity.get.content, "UTF-8") diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVisionSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVisionSchemas.scala similarity index 96% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVisionSchemas.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVisionSchemas.scala index 6213eddd41..afe6a278f8 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVisionSchemas.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ComputerVisionSchemas.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.SparkBindings +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings case class DSIRResponse(requestId: String, metadata: DSIRMetadata, diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/DocumentTranslator.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/DocumentTranslator.scala similarity index 93% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/DocumentTranslator.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/DocumentTranslator.scala index 76ede8bb5a..3730772891 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/DocumentTranslator.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/DocumentTranslator.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.io.http.HandlingUtils.{convertAndClose, sendWithRetries} -import com.microsoft.ml.spark.io.http.{HTTPResponseData, HeaderValues} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.io.http.HandlingUtils.{convertAndClose, sendWithRetries} +import com.microsoft.azure.synapse.ml.io.http.{HTTPResponseData, HeaderValues} +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.commons.io.IOUtils import org.apache.http.client.methods.HttpGet import org.apache.http.entity.{AbstractHttpEntity, ContentType, StringEntity} @@ -32,7 +32,7 @@ trait DocumentTranslatorAsyncReply extends BasicAsyncReply { val get = new HttpGet() get.setURI(location) key.foreach(get.setHeader("Ocp-Apim-Subscription-Key", _)) - get.setHeader("User-Agent", s"mmlspark/${BuildInfo.version}${HeaderValues.PlatformInfo}") + get.setHeader("User-Agent", s"synapseml/${BuildInfo.version}${HeaderValues.PlatformInfo}") val resp = convertAndClose(sendWithRetries(client, get, getBackoffs)) get.releaseConnection() val status = IOUtils.toString(resp.entity.get.content, "UTF-8") diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/Face.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/Face.scala similarity index 99% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/Face.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/Face.scala index d9262ea06f..6abc712935 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/Face.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/Face.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.http.entity.{AbstractHttpEntity, StringEntity} import org.apache.spark.ml.ComplexParamsReadable import org.apache.spark.ml.param.ServiceParam diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/FaceSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/FaceSchemas.scala similarity index 96% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/FaceSchemas.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/FaceSchemas.scala index be94bb99fc..d37f1eccce 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/FaceSchemas.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/FaceSchemas.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.SparkBindings +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings case class Face(faceId: String, faceRectangle: Rectangle, diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/FormRecognizer.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/FormRecognizer.scala similarity index 98% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/FormRecognizer.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/FormRecognizer.scala index 4adf873660..0df36440f0 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/FormRecognizer.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/FormRecognizer.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages.UDFTransformer +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.UDFTransformer import org.apache.http.client.methods.{HttpGet, HttpRequestBase} import org.apache.http.entity.{AbstractHttpEntity, ByteArrayEntity, ContentType, StringEntity} import org.apache.spark.injections.UDFUtils diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/FormRecognizerSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/FormRecognizerSchemas.scala similarity index 97% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/FormRecognizerSchemas.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/FormRecognizerSchemas.scala index 63f0d78104..35d1a32448 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/FormRecognizerSchemas.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/FormRecognizerSchemas.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.SparkBindings +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings import spray.json.{DefaultJsonProtocol, RootJsonFormat} object AnalyzeResponse extends SparkBindings[AnalyzeResponse] diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ImageSearchSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ImageSearchSchemas.scala similarity index 96% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ImageSearchSchemas.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ImageSearchSchemas.scala index 25759e132b..5c5c453ed7 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ImageSearchSchemas.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/ImageSearchSchemas.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.SparkBindings +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings // Bing Schema /*case class BingImagesResponse(`_type`: String, diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/OCRSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/OCRSchemas.scala similarity index 93% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/OCRSchemas.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/OCRSchemas.scala index f29e09bc71..dadb69a6d9 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/OCRSchemas.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/OCRSchemas.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.SparkBindings +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings case class OCRResponse(language: String, textAngle: Option[Double], diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/RESTHelpers.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/RESTHelpers.scala similarity index 98% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/RESTHelpers.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/RESTHelpers.scala index 01de211a8e..8ee427f911 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/RESTHelpers.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/RESTHelpers.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechAPI.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechAPI.scala similarity index 93% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechAPI.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechAPI.scala index b240da1a95..376bb814ab 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechAPI.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechAPI.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive import java.io.File -import com.microsoft.ml.spark.core.env.StreamUtilities.using +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities.using import org.apache.commons.io.IOUtils import org.apache.http.client.methods.{HttpEntityEnclosingRequestBase, RequestBuilder} import org.apache.http.entity.mime.content.FileBody diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechSchemas.scala similarity index 95% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechSchemas.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechSchemas.scala index b4c7d661fb..4e8d591114 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechSchemas.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechSchemas.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.SparkBindings +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings import spray.json.{DefaultJsonProtocol, RootJsonFormat} case class DetailedSpeechResponse(Confidence: Double, diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToText.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechToText.scala similarity index 97% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToText.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechToText.scala index 52706fba99..4f97b9ddf8 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToText.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechToText.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.http.entity.{AbstractHttpEntity, ByteArrayEntity} import org.apache.spark.ml.ComplexParamsReadable import org.apache.spark.ml.param.ServiceParam diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToTextSDK.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechToTextSDK.scala similarity index 96% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToTextSDK.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechToTextSDK.scala index af86ef1f9d..7d559bb6a9 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToTextSDK.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/SpeechToTextSDK.scala @@ -1,21 +1,20 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive - +package com.microsoft.azure.synapse.ml.cognitive + +import com.microsoft.azure.synapse.ml.cognitive.SpeechFormat._ +import com.microsoft.azure.synapse.ml.core.contracts.HasOutputCol +import com.microsoft.azure.synapse.ml.core.schema.{DatasetExtensions, SparkBindings} +import com.microsoft.azure.synapse.ml.core.utils.OsUtils +import com.microsoft.azure.synapse.ml.io.http.HasURL +import com.microsoft.azure.synapse.ml.logging.BasicLogging import com.microsoft.cognitiveservices.speech._ import com.microsoft.cognitiveservices.speech.audio._ import com.microsoft.cognitiveservices.speech.transcription.{ Conversation, ConversationTranscriber, ConversationTranscriptionEventArgs, Participant} import com.microsoft.cognitiveservices.speech.util.EventHandler -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.cognitive.SpeechFormat._ -import com.microsoft.ml.spark.core.contracts.HasOutputCol -import com.microsoft.ml.spark.core.schema.{DatasetExtensions, SparkBindings} -import com.microsoft.ml.spark.core.utils.OsUtils -import com.microsoft.ml.spark.io.http.HasURL -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.{CompressedStream, WavStream} +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.commons.io.FilenameUtils import org.apache.hadoop.fs.Path import org.apache.spark.broadcast.Broadcast @@ -36,7 +35,6 @@ import java.util.UUID import java.util.concurrent.{LinkedBlockingQueue, TimeUnit} import scala.concurrent.{ExecutionContext, Future, blocking} import scala.language.existentials -import scala.reflect.internal.util.ScalaClassLoader object SpeechToTextSDK extends ComplexParamsReadable[SpeechToTextSDK] @@ -445,7 +443,7 @@ class SpeechToTextSDK(override val uid: String) extends SpeechSDKBase with Basic val recognizer = new SpeechRecognizer(speechConfig, audioConfig) val connection = Connection.fromRecognizer(recognizer) connection.setMessageProperty("speech.config", "application", - s"""{"name":"mmlspark", "version": "${BuildInfo.version}"}""") + s"""{"name":"synapseml", "version": "${BuildInfo.version}"}""") val queue = new LinkedBlockingQueue[Option[String]]() def recognizedHandler(s: Any, e: SpeechRecognitionEventArgs): Unit = { @@ -534,7 +532,7 @@ class ConversationTranscription(override val uid: String) extends SpeechSDKBase transcriber.joinConversationAsync(conversation).get() val connection = Connection.fromRecognizer(transcriber) connection.setMessageProperty("speech.config", "application", - s"""{"name":"mmlspark", "version": "${BuildInfo.version}"}""") + s"""{"name":"synapseml", "version": "${BuildInfo.version}"}""") val queue = new LinkedBlockingQueue[Option[String]]() def cleanUp(): Unit = { diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalytics.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextAnalytics.scala similarity index 97% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalytics.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextAnalytics.scala index 30344728f8..57e9185b9d 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalytics.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextAnalytics.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.DatasetExtensions -import com.microsoft.ml.spark.io.http.SimpleHTTPTransformer -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages.{DropColumns, Lambda, UDFTransformer} +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.io.http.SimpleHTTPTransformer +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.{DropColumns, Lambda, UDFTransformer} import org.apache.http.client.methods.{HttpPost, HttpRequestBase} import org.apache.http.entity.{AbstractHttpEntity, StringEntity} import org.apache.spark.injections.UDFUtils @@ -66,7 +66,7 @@ abstract class TextAnalyticsBase(override val uid: String) extends CognitiveServ } else if (getValue(row, text).forall(Option(_).isEmpty)) { None } else { - import com.microsoft.ml.spark.cognitive.TAJSONFormat._ + import TAJSONFormat._ val post = new HttpPost(getUrl) getValueOpt(row, subscriptionKey).foreach(post.setHeader("Ocp-Apim-Subscription-Key", _)) post.setHeader("Content-Type", "application/json") diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextAnalyticsSchemas.scala similarity index 97% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemas.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextAnalyticsSchemas.scala index 5c0bc3a464..78bfd6b4ec 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemas.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextAnalyticsSchemas.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.SparkBindings +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings import spray.json.RootJsonFormat // General Text Analytics Schemas diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemasV2.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextAnalyticsSchemasV2.scala similarity index 95% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemasV2.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextAnalyticsSchemasV2.scala index d909f3d9c2..9389fda9c9 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemasV2.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextAnalyticsSchemasV2.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.SparkBindings +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings // Sentiment schemas diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextTranslator.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextTranslator.scala similarity index 97% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextTranslator.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextTranslator.scala index 2eb1f6adf6..798a95eb09 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextTranslator.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TextTranslator.scala @@ -1,26 +1,23 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive - -import com.microsoft.ml.spark.core.schema.DatasetExtensions -import com.microsoft.ml.spark.io.http.SimpleHTTPTransformer -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages.{DropColumns, Lambda, UDFTransformer} -import org.apache.http.client.methods.{HttpEntityEnclosingRequestBase, HttpPost, HttpRequestBase} -import org.apache.http.entity.{AbstractHttpEntity, ContentType, StringEntity} -import org.apache.spark.injections.UDFUtils -import org.apache.spark.ml.{ComplexParamsReadable, NamespaceInjections, PipelineModel, Transformer} +package com.microsoft.azure.synapse.ml.cognitive + +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.io.http.SimpleHTTPTransformer +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.{DropColumns, Lambda} +import org.apache.http.client.methods.{HttpPost, HttpRequestBase} +import org.apache.http.entity.{AbstractHttpEntity, StringEntity} import org.apache.spark.ml.param.ServiceParam import org.apache.spark.ml.util.Identifiable +import org.apache.spark.ml.{ComplexParamsReadable, NamespaceInjections, PipelineModel, Transformer} import org.apache.spark.sql.Row import org.apache.spark.sql.functions.{array, col, lit, struct} import org.apache.spark.sql.types.{ArrayType, DataType, StringType, StructType} import spray.json.DefaultJsonProtocol._ import spray.json._ -import java.net.URI - trait HasSubscriptionRegion extends HasServiceParams { val subscriptionRegion = new ServiceParam[String]( this, "subscriptionRegion", "the API region to use") diff --git a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TranslatorSchemas.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TranslatorSchemas.scala similarity index 97% rename from cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TranslatorSchemas.scala rename to cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TranslatorSchemas.scala index 324e6d25da..51c82eb2f5 100644 --- a/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TranslatorSchemas.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/TranslatorSchemas.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive +package com.microsoft.azure.synapse.ml.cognitive -import com.microsoft.ml.spark.core.schema.SparkBindings +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings import spray.json._ object TranslateResponse extends SparkBindings[TranslateResponse] diff --git a/cognitive/src/test/python/mmlsparktest/cognitive/__init__.py b/cognitive/src/test/python/synapsemltest/cognitive/__init__.py similarity index 100% rename from cognitive/src/test/python/mmlsparktest/cognitive/__init__.py rename to cognitive/src/test/python/synapsemltest/cognitive/__init__.py diff --git a/cognitive/src/test/python/mmlsparktest/cognitive/test_simple.py b/cognitive/src/test/python/synapsemltest/cognitive/test_simple.py similarity index 93% rename from cognitive/src/test/python/mmlsparktest/cognitive/test_simple.py rename to cognitive/src/test/python/synapsemltest/cognitive/test_simple.py index cde36abc99..f361d1b546 100644 --- a/cognitive/src/test/python/mmlsparktest/cognitive/test_simple.py +++ b/cognitive/src/test/python/synapsemltest/cognitive/test_simple.py @@ -4,8 +4,8 @@ # Prepare training and test data. import unittest -from mmlspark.io.http import * -from mmlsparktest.spark import * +from synapse.ml.io.http import * +from synapsemltest.spark import * from pyspark.sql.functions import struct from pyspark.sql.types import * diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/AnamolyDetectionSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/AnamolyDetectionSuite.scala similarity index 95% rename from cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/AnamolyDetectionSuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/AnamolyDetectionSuite.scala index d3d3b556f5..465a530ce1 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/AnamolyDetectionSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/AnamolyDetectionSuite.scala @@ -1,15 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive.split1 +package com.microsoft.azure.synapse.ml.cognitive.split1 -import com.microsoft.ml.spark.Secrets -import com.microsoft.ml.spark.cognitive._ -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.Secrets +import com.microsoft.azure.synapse.ml.cognitive._ +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable +import org.apache.spark.sql.functions._ import org.apache.spark.sql.{DataFrame, Row} -import org.apache.spark.sql.functions.{col, collect_list, lit, sort_array, struct} trait AnomalyKey { lazy val anomalyKey = sys.env.getOrElse("ANOMALY_API_KEY", Secrets.AnomalyApiKey) diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ComputerVisionSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ComputerVisionSuite.scala similarity index 97% rename from cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ComputerVisionSuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ComputerVisionSuite.scala index acaf07db6c..974dd0bb00 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ComputerVisionSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ComputerVisionSuite.scala @@ -1,18 +1,18 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive.split1 +package com.microsoft.azure.synapse.ml.cognitive.split1 -import com.microsoft.ml.spark.Secrets -import com.microsoft.ml.spark.cognitive._ -import com.microsoft.ml.spark.core.test.base.{Flaky, TestBase} -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.Secrets +import com.microsoft.azure.synapse.ml.cognitive._ +import com.microsoft.azure.synapse.ml.core.spark.FluentAPI._ +import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.NamespaceInjections.pipelineModel import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.functions.typedLit import org.apache.spark.sql.{DataFrame, Dataset, Row} import org.scalactic.Equality -import com.microsoft.ml.spark.FluentAPI._ trait CognitiveKey { lazy val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", Secrets.CognitiveApiKey) diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceAPI.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/FaceAPI.scala similarity index 95% rename from cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceAPI.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/FaceAPI.scala index 3b1744c63f..16441642b6 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceAPI.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/FaceAPI.scala @@ -1,21 +1,21 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive.split1 +package com.microsoft.azure.synapse.ml.cognitive.split1 -import java.net.URI - -import com.microsoft.ml.spark.cognitive.{RESTHelpers, URLEncodingUtils, _} -import com.microsoft.ml.spark.core.env.StreamUtilities.using +import com.microsoft.azure.synapse.ml.cognitive._ +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities.using import org.apache.commons.io.IOUtils import org.apache.http.client.methods._ import org.apache.http.entity.StringEntity import spray.json.DefaultJsonProtocol._ import spray.json._ +import java.net.URI + object FaceUtils extends CognitiveKey { - import RESTHelpers._ + import com.microsoft.azure.synapse.ml.cognitive.RESTHelpers._ val BaseURL = "https://eastus.api.cognitive.microsoft.com/face/v1.0/" @@ -78,7 +78,7 @@ object FaceUtils extends CognitiveKey { } } -import com.microsoft.ml.spark.cognitive.split1.FaceUtils._ +import com.microsoft.azure.synapse.ml.cognitive.split1.FaceUtils._ object FaceListProtocol { implicit val PfiEnc = jsonFormat2(PersistedFaceInfo.apply) diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/FaceSuite.scala similarity index 97% rename from cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceSuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/FaceSuite.scala index 35dd109531..80ae2dda51 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/FaceSuite.scala @@ -1,17 +1,16 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive.split1 +package com.microsoft.azure.synapse.ml.cognitive.split1 -import java.util.UUID - -import com.microsoft.ml.spark.cognitive._ -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.cognitive._ +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.functions.{col, explode, lit} import org.apache.spark.sql.{DataFrame, Row} import org.scalactic.Equality -import org.scalatest.Assertion + +import java.util.UUID class DetectFaceSuite extends TransformerFuzzing[DetectFace] with CognitiveKey { diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FormRecognizerSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/FormRecognizerSuite.scala similarity index 97% rename from cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FormRecognizerSuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/FormRecognizerSuite.scala index 6814ef2c37..a0ae026b46 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FormRecognizerSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/FormRecognizerSuite.scala @@ -1,16 +1,16 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive.split1 - -import com.microsoft.ml.spark.FluentAPI._ -import com.microsoft.ml.spark.cognitive.FormsFlatteners._ -import com.microsoft.ml.spark.cognitive.RESTHelpers.retry -import com.microsoft.ml.spark.cognitive._ -import com.microsoft.ml.spark.core.env.StreamUtilities.using -import com.microsoft.ml.spark.core.test.base.{Flaky, TestBase} -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.stages.UDFTransformer +package com.microsoft.azure.synapse.ml.cognitive.split1 + +import com.microsoft.azure.synapse.ml.cognitive.FormsFlatteners._ +import com.microsoft.azure.synapse.ml.cognitive.RESTHelpers.retry +import com.microsoft.azure.synapse.ml.cognitive._ +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities.using +import com.microsoft.azure.synapse.ml.core.spark.FluentAPI._ +import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.stages.UDFTransformer import org.apache.commons.io.IOUtils import org.apache.http.client.methods._ import org.apache.http.entity.StringEntity @@ -28,7 +28,7 @@ object TrainCustomModelProtocol extends DefaultJsonProtocol { implicit val TrainCustomModelEnc: RootJsonFormat[TrainCustomModelSchema] = jsonFormat3(TrainCustomModelSchema) } -import com.microsoft.ml.spark.cognitive.split1.TrainCustomModelProtocol._ +import com.microsoft.azure.synapse.ml.cognitive.split1.TrainCustomModelProtocol._ case class TrainCustomModelSchema(source: String, sourceFilter: SourceFilter, useLabelFile: Boolean) @@ -36,7 +36,7 @@ case class SourceFilter(prefix: String, includeSubFolders: Boolean) object FormRecognizerUtils extends CognitiveKey { - import RESTHelpers._ + import com.microsoft.azure.synapse.ml.cognitive.RESTHelpers._ val PollingDelay = 1000 diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ImageSearchSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ImageSearchSuite.scala similarity index 94% rename from cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ImageSearchSuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ImageSearchSuite.scala index 77829f15c5..28f95bed76 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ImageSearchSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/ImageSearchSuite.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive.split1 +package com.microsoft.azure.synapse.ml.cognitive.split1 -import com.microsoft.ml.spark.Secrets -import com.microsoft.ml.spark.cognitive.BingImageSearch -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.Secrets +import com.microsoft.azure.synapse.ml.cognitive.BingImageSearch +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.NamespaceInjections.pipelineModel import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.{DataFrame, Row} diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/TextAnalyticsSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/TextAnalyticsSuite.scala similarity index 97% rename from cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/TextAnalyticsSuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/TextAnalyticsSuite.scala index c700910ee9..ea9b76ca7c 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/TextAnalyticsSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/TextAnalyticsSuite.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive.split1 +package com.microsoft.azure.synapse.ml.cognitive.split1 -import com.microsoft.ml.spark.Secrets -import com.microsoft.ml.spark.cognitive._ -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.stages.FixedMiniBatchTransformer +import com.microsoft.azure.synapse.ml.Secrets +import com.microsoft.azure.synapse.ml.cognitive._ +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.stages.FixedMiniBatchTransformer import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.functions.col diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/TranslatorSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/TranslatorSuite.scala similarity index 98% rename from cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/TranslatorSuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/TranslatorSuite.scala index 29e780ac7f..9ef46e99a5 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/TranslatorSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/TranslatorSuite.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive.split1 +package com.microsoft.azure.synapse.ml.cognitive.split1 -import com.microsoft.ml.spark.Secrets -import com.microsoft.ml.spark.cognitive._ -import com.microsoft.ml.spark.core.test.base.{Flaky, TestBase} -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.Secrets +import com.microsoft.azure.synapse.ml.cognitive._ +import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.{col, flatten} diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SearchWriterSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split2/SearchWriterSuite.scala similarity index 95% rename from cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SearchWriterSuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split2/SearchWriterSuite.scala index 9b8d91af8a..45d4e33aae 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SearchWriterSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split2/SearchWriterSuite.scala @@ -1,21 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive.split2 - -import java.util.UUID - -import com.microsoft.ml.spark.Secrets -import com.microsoft.ml.spark.cognitive.RESTHelpers._ -import com.microsoft.ml.spark.cognitive._ -import com.microsoft.ml.spark.cognitive.split1.CognitiveKey -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +package com.microsoft.azure.synapse.ml.cognitive.split2 + +import com.microsoft.azure.synapse.ml.Secrets +import com.microsoft.azure.synapse.ml.cognitive.RESTHelpers._ +import com.microsoft.azure.synapse.ml.cognitive.split1.CognitiveKey +import com.microsoft.azure.synapse.ml.cognitive._ +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.http.client.methods.HttpDelete import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame -import org.apache.spark.sql.functions.{col, lit, split, udf} +import java.util.UUID import scala.collection.mutable import scala.concurrent.blocking diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SpeechToTextSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split2/SpeechToTextSuite.scala similarity index 90% rename from cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SpeechToTextSuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split2/SpeechToTextSuite.scala index 65949bf096..b2f5f16ad6 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SpeechToTextSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split2/SpeechToTextSuite.scala @@ -1,18 +1,18 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive.split2 +package com.microsoft.azure.synapse.ml.cognitive.split2 -import java.net.{URI, URL} - -import com.microsoft.ml.spark.cognitive.split1.CognitiveKey -import com.microsoft.ml.spark.cognitive.{SpeechResponse, SpeechToText} -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.cognitive.split1.CognitiveKey +import com.microsoft.azure.synapse.ml.cognitive.{SpeechResponse, SpeechToText} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.commons.compress.utils.IOUtils import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.{DataFrame, Row} import org.scalactic.Equality +import java.net.{URI, URL} + class SpeechToTextSuite extends TransformerFuzzing[SpeechToText] with CognitiveKey { diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split3/SpeechToTextSDKSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split3/SpeechToTextSDKSuite.scala similarity index 96% rename from cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split3/SpeechToTextSDKSuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split3/SpeechToTextSDKSuite.scala index 53503db1b1..fb34d2ba7d 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split3/SpeechToTextSDKSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split3/SpeechToTextSDKSuite.scala @@ -1,25 +1,25 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cognitive.split3 - -import java.io.{ByteArrayInputStream, File, FileInputStream} -import java.net.URI - -import com.microsoft.ml.spark.Secrets -import com.microsoft.ml.spark.cognitive._ -import com.microsoft.ml.spark.cognitive.split1.CognitiveKey -import com.microsoft.ml.spark.core.env.StreamUtilities -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +package com.microsoft.azure.synapse.ml.cognitive.split3 + +import com.microsoft.azure.synapse.ml.Secrets +import com.microsoft.azure.synapse.ml.cognitive.split1.CognitiveKey +import com.microsoft.azure.synapse.ml.cognitive._ +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.commons.compress.utils.IOUtils import org.apache.commons.io.FileUtils import org.apache.spark.ml.util.MLReadable -import org.apache.spark.sql.functions.{col, lit, to_json} +import org.apache.spark.sql.functions.{col, to_json} import org.apache.spark.sql.{DataFrame, Row} import org.scalactic.Equality import org.scalatest.Assertion +import java.io.{ByteArrayInputStream, File, FileInputStream} +import java.net.URI + trait CustomSpeechKey { lazy val customSpeechKey = sys.env.getOrElse("CUSTOM_SPEECH_API_KEY", Secrets.CustomSpeechApiKey) } diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/core/utils/utils/ModelEqualitySuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/core/utils/utils/ModelEqualitySuite.scala similarity index 68% rename from cognitive/src/test/scala/com/microsoft/ml/spark/core/utils/utils/ModelEqualitySuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/core/utils/utils/ModelEqualitySuite.scala index d88d70d63a..3505cc858e 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/core/utils/utils/ModelEqualitySuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/core/utils/utils/ModelEqualitySuite.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils.utils +package com.microsoft.azure.synapse.ml.core.utils.utils -import com.microsoft.ml.spark.cognitive.TextSentiment -import com.microsoft.ml.spark.core.env.FileUtilities.join -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.utils.ModelEquality -import com.microsoft.ml.spark.stages.DropColumns +import com.microsoft.azure.synapse.ml.cognitive.TextSentiment +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.utils.ModelEquality +import com.microsoft.azure.synapse.ml.core.env.FileUtilities.join +import com.microsoft.azure.synapse.ml.stages.DropColumns class ModelEqualitySuite extends TestBase { spark @@ -21,7 +21,7 @@ class ModelEqualitySuite extends TestBase { val p2 = join(tmpDir.toString, "1_m2.model").toString m1.write.overwrite().save(p1) m2.write.overwrite().save(p2) - ModelEquality.assertEqual("com.microsoft.ml.spark.cognitive.TextSentiment", p1, p2) + ModelEquality.assertEqual("com.microsoft.azure.synapse.ml.cognitive.TextSentiment", p1, p2) } test("Basic equality") { @@ -33,7 +33,7 @@ class ModelEqualitySuite extends TestBase { val p2 = join(tmpDir.toString, "2_m2.model").toString m1.write.overwrite().save(p1) m2.write.overwrite().save(p2) - ModelEquality.assertEqual("com.microsoft.ml.spark.stages.DropColumns", p1, p2) + ModelEquality.assertEqual("com.microsoft.azure.synapse.ml.stages.DropColumns", p1, p2) } test("Basic non equality") { @@ -46,7 +46,7 @@ class ModelEqualitySuite extends TestBase { m1.write.overwrite().save(p1) m2.write.overwrite().save(p2) assertThrows[AssertionError]( - ModelEquality.assertEqual("com.microsoft.ml.spark.stages.DropColumns", p1, p2)) + ModelEquality.assertEqual("com.microsoft.azure.synapse.ml.stages.DropColumns", p1, p2)) } } diff --git a/cognitive/src/test/scala/com/microsoft/ml/spark/core/utils/utils/SlicerFunctionsSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/core/utils/utils/SlicerFunctionsSuite.scala similarity index 89% rename from cognitive/src/test/scala/com/microsoft/ml/spark/core/utils/utils/SlicerFunctionsSuite.scala rename to cognitive/src/test/scala/com/microsoft/azure/synapse/ml/core/utils/utils/SlicerFunctionsSuite.scala index 1696727b05..1268d7fdc4 100644 --- a/cognitive/src/test/scala/com/microsoft/ml/spark/core/utils/utils/SlicerFunctionsSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/core/utils/utils/SlicerFunctionsSuite.scala @@ -1,9 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils.utils -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.utils.SlicerFunctions import org.apache.spark.SparkException import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.sql.functions.{col, lit} diff --git a/core/src/main/python/mmlspark/doc/scala.rst b/core/src/main/python/mmlspark/doc/scala.rst deleted file mode 100644 index 01f3c15694..0000000000 --- a/core/src/main/python/mmlspark/doc/scala.rst +++ /dev/null @@ -1,4 +0,0 @@ -Scala API Docs -=============== - -See the `Scala API documentation for MMLSpark <../scala/index.html>`_. diff --git a/core/src/main/python/mmlspark/README.txt b/core/src/main/python/synapse/README.txt similarity index 64% rename from core/src/main/python/mmlspark/README.txt rename to core/src/main/python/synapse/README.txt index b73c4462e6..f5b3c9bca7 100644 --- a/core/src/main/python/mmlspark/README.txt +++ b/core/src/main/python/synapse/README.txt @@ -1,8 +1,8 @@ Microsoft ML for Apache Spark ============================= -This package contains the PySpark library for MMLSpark. +This package contains the PySpark library for SynapseML. This library provides spark estimators, transformers, and utility functions for machine learning on Spark. For more complete documentation, refer to -the MMLSpark repo: https://github.com/Azure/mmlspark . +the SynapseML repo: https://github.com/Microsoft/SynapseML . diff --git a/core/src/main/python/mmlspark/__init__.py b/core/src/main/python/synapse/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/__init__.py rename to core/src/main/python/synapse/__init__.py diff --git a/core/src/main/python/mmlspark/automl/BestModel.py b/core/src/main/python/synapse/ml/automl/BestModel.py similarity index 97% rename from core/src/main/python/mmlspark/automl/BestModel.py rename to core/src/main/python/synapse/ml/automl/BestModel.py index ad7e5bb779..9830ebd09f 100644 --- a/core/src/main/python/mmlspark/automl/BestModel.py +++ b/core/src/main/python/synapse/ml/automl/BestModel.py @@ -6,7 +6,7 @@ if sys.version >= '3': basestring = str -from mmlspark.automl._BestModel import _BestModel +from synapse.ml.automl._BestModel import _BestModel from pyspark import SparkContext, SQLContext from pyspark.ml.wrapper import JavaParams from pyspark.ml.common import inherit_doc diff --git a/core/src/main/python/mmlspark/automl/HyperparamBuilder.py b/core/src/main/python/synapse/ml/automl/HyperparamBuilder.py similarity index 79% rename from core/src/main/python/mmlspark/automl/HyperparamBuilder.py rename to core/src/main/python/synapse/ml/automl/HyperparamBuilder.py index a9faa6bacf..bb0642a31c 100644 --- a/core/src/main/python/mmlspark/automl/HyperparamBuilder.py +++ b/core/src/main/python/synapse/ml/automl/HyperparamBuilder.py @@ -46,7 +46,7 @@ class DiscreteHyperParam(object): def __init__(self, values, seed=0): ctx = SparkContext.getOrCreate() self.jvm = ctx.getOrCreate()._jvm - self.hyperParam = self.jvm.com.microsoft.ml.spark.automl.HyperParamUtils.getDiscreteHyperParam(values, seed) + self.hyperParam = self.jvm.com.microsoft.azure.synapse.ml.automl.HyperParamUtils.getDiscreteHyperParam(values, seed) def get(self): return self.hyperParam @@ -58,7 +58,7 @@ class RangeHyperParam(object): def __init__(self, min, max, seed=0): ctx = SparkContext.getOrCreate() self.jvm = ctx.getOrCreate()._jvm - self.rangeParam = self.jvm.com.microsoft.ml.spark.automl.HyperParamUtils.getRangeHyperParam(min, max, seed) + self.rangeParam = self.jvm.com.microsoft.azure.synapse.ml.automl.HyperParamUtils.getRangeHyperParam(min, max, seed) def get(self): return self.rangeParam @@ -70,11 +70,11 @@ class GridSpace(object): def __init__(self, paramValues): ctx = SparkContext.getOrCreate() self.jvm = ctx.getOrCreate()._jvm - hyperparamBuilder = self.jvm.com.microsoft.ml.spark.automl.HyperparamBuilder() + hyperparamBuilder = self.jvm.com.microsoft.azure.synapse.ml.automl.HyperparamBuilder() for k, (est, hyperparam) in paramValues: javaParam = est._java_obj.getParam(k.name) hyperparamBuilder.addHyperparam(javaParam, hyperparam.get()) - self.gridSpace = self.jvm.com.microsoft.ml.spark.automl.GridSpace(hyperparamBuilder.build()) + self.gridSpace = self.jvm.com.microsoft.azure.synapse.ml.automl.GridSpace(hyperparamBuilder.build()) def space(self): return self.gridSpace @@ -86,11 +86,11 @@ class RandomSpace(object): def __init__(self, paramDistributions): ctx = SparkContext.getOrCreate() self.jvm = ctx.getOrCreate()._jvm - hyperparamBuilder = self.jvm.com.microsoft.ml.spark.automl.HyperparamBuilder() + hyperparamBuilder = self.jvm.com.microsoft.azure.synapse.ml.automl.HyperparamBuilder() for k, (est, hyperparam) in paramDistributions: javaParam = est._java_obj.getParam(k.name) hyperparamBuilder.addHyperparam(javaParam, hyperparam.get()) - self.paramSpace = self.jvm.com.microsoft.ml.spark.automl.RandomSpace(hyperparamBuilder.build()) + self.paramSpace = self.jvm.com.microsoft.azure.synapse.ml.automl.RandomSpace(hyperparamBuilder.build()) def space(self): return self.paramSpace diff --git a/core/src/main/python/mmlspark/automl/TuneHyperparametersModel.py b/core/src/main/python/synapse/ml/automl/TuneHyperparametersModel.py similarity index 89% rename from core/src/main/python/mmlspark/automl/TuneHyperparametersModel.py rename to core/src/main/python/synapse/ml/automl/TuneHyperparametersModel.py index 2fca48d869..02203fb1fd 100644 --- a/core/src/main/python/mmlspark/automl/TuneHyperparametersModel.py +++ b/core/src/main/python/synapse/ml/automl/TuneHyperparametersModel.py @@ -8,7 +8,7 @@ if sys.version >= '3': basestring = str -from mmlspark.automl._TuneHyperparametersModel import _TuneHyperparametersModel +from synapse.ml.automl._TuneHyperparametersModel import _TuneHyperparametersModel from pyspark.ml.wrapper import JavaParams from pyspark.ml.common import inherit_doc diff --git a/core/src/main/python/mmlspark/automl/__init__.py b/core/src/main/python/synapse/ml/automl/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/automl/__init__.py rename to core/src/main/python/synapse/ml/automl/__init__.py diff --git a/core/src/main/python/mmlspark/core/__init__.py b/core/src/main/python/synapse/ml/core/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/core/__init__.py rename to core/src/main/python/synapse/ml/core/__init__.py diff --git a/core/src/main/python/mmlspark/core/schema/TypeConversionUtils.py b/core/src/main/python/synapse/ml/core/schema/TypeConversionUtils.py similarity index 100% rename from core/src/main/python/mmlspark/core/schema/TypeConversionUtils.py rename to core/src/main/python/synapse/ml/core/schema/TypeConversionUtils.py diff --git a/core/src/main/python/mmlspark/core/schema/Utils.py b/core/src/main/python/synapse/ml/core/schema/Utils.py similarity index 89% rename from core/src/main/python/mmlspark/core/schema/Utils.py rename to core/src/main/python/synapse/ml/core/schema/Utils.py index f30d75636e..1df935b9ca 100644 --- a/core/src/main/python/mmlspark/core/schema/Utils.py +++ b/core/src/main/python/synapse/ml/core/schema/Utils.py @@ -70,14 +70,14 @@ def read(cls): class ComplexParamsMixin(MLReadable): def _transfer_params_from_java(self): """ - Transforms the embedded com.microsoft.ml.spark.core.serialize.params from the companion Java object. + Transforms the embedded com.microsoft.azure.synapse.ml.core.serialize.params from the companion Java object. """ sc = SparkContext._active_spark_context for param in self.params: if self._java_obj.hasParam(param.name): java_param = self._java_obj.getParam(param.name) - # SPARK-14931: Only check set com.microsoft.ml.spark.core.serialize.params back to avoid default com.microsoft.ml.spark.core.serialize.params mismatch. - complex_param_class = sc._gateway.jvm.com.microsoft.ml.spark.core.serialize.ComplexParam._java_lang_class + # SPARK-14931: Only check set com.microsoft.azure.synapse.ml.core.serialize.params back to avoid default com.microsoft.azure.synapse.ml.core.serialize.params mismatch. + complex_param_class = sc._gateway.jvm.com.microsoft.azure.synapse.ml.core.serialize.ComplexParam._java_lang_class is_complex_param = complex_param_class.isAssignableFrom(java_param.getClass()) if self._java_obj.isSet(java_param): if is_complex_param: diff --git a/core/src/main/python/mmlspark/core/schema/__init__.py b/core/src/main/python/synapse/ml/core/schema/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/core/schema/__init__.py rename to core/src/main/python/synapse/ml/core/schema/__init__.py diff --git a/core/src/main/python/mmlspark/core/serialize/__init__.py b/core/src/main/python/synapse/ml/core/serialize/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/core/serialize/__init__.py rename to core/src/main/python/synapse/ml/core/serialize/__init__.py diff --git a/core/src/main/python/mmlspark/core/serialize/java_params_patch.py b/core/src/main/python/synapse/ml/core/serialize/java_params_patch.py similarity index 97% rename from core/src/main/python/mmlspark/core/serialize/java_params_patch.py rename to core/src/main/python/synapse/ml/core/serialize/java_params_patch.py index 0bd07d4751..e8be3d3ed9 100644 --- a/core/src/main/python/mmlspark/core/serialize/java_params_patch.py +++ b/core/src/main/python/synapse/ml/core/serialize/java_params_patch.py @@ -30,7 +30,7 @@ def __get_class(clazz): return m stage_name = java_stage.getClass().getName().replace("org.apache.spark", "pyspark") - stage_name = stage_name.replace("com.microsoft.ml.spark", "mmlspark") + stage_name = stage_name.replace("com.microsoft.azure.synapse.ml", "synapseml") # Generate a default new instance from the stage_name class. py_type = __get_class(stage_name) if issubclass(py_type, JavaParams): diff --git a/core/src/main/python/mmlspark/core/spark/FluentAPI.py b/core/src/main/python/synapse/ml/core/spark/FluentAPI.py similarity index 100% rename from core/src/main/python/mmlspark/core/spark/FluentAPI.py rename to core/src/main/python/synapse/ml/core/spark/FluentAPI.py diff --git a/core/src/main/python/mmlspark/core/spark/__init__.py b/core/src/main/python/synapse/ml/core/spark/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/core/spark/__init__.py rename to core/src/main/python/synapse/ml/core/spark/__init__.py diff --git a/core/src/main/python/mmlspark/cyber/__init__.py b/core/src/main/python/synapse/ml/cyber/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/cyber/__init__.py rename to core/src/main/python/synapse/ml/cyber/__init__.py diff --git a/core/src/main/python/mmlspark/cyber/anomaly/__init__.py b/core/src/main/python/synapse/ml/cyber/anomaly/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/cyber/anomaly/__init__.py rename to core/src/main/python/synapse/ml/cyber/anomaly/__init__.py diff --git a/core/src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py b/core/src/main/python/synapse/ml/cyber/anomaly/collaborative_filtering.py similarity index 99% rename from core/src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py rename to core/src/main/python/synapse/ml/cyber/anomaly/collaborative_filtering.py index bd45fcc58e..d4e58e1f7e 100644 --- a/core/src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py +++ b/core/src/main/python/synapse/ml/cyber/anomaly/collaborative_filtering.py @@ -3,9 +3,9 @@ import os from typing import List, Optional, Tuple -from mmlspark.cyber.anomaly.complement_access import ComplementAccessTransformer -from mmlspark.cyber.feature import indexers, scalers -from mmlspark.cyber.utils import spark_utils +from synapse.ml.cyber.anomaly.complement_access import ComplementAccessTransformer +from synapse.ml.cyber.feature import indexers, scalers +from synapse.ml.cyber.utils import spark_utils import numpy as np diff --git a/core/src/main/python/mmlspark/cyber/anomaly/complement_access.py b/core/src/main/python/synapse/ml/cyber/anomaly/complement_access.py similarity index 98% rename from core/src/main/python/mmlspark/cyber/anomaly/complement_access.py rename to core/src/main/python/synapse/ml/cyber/anomaly/complement_access.py index 73394427e2..40f4e070aa 100644 --- a/core/src/main/python/mmlspark/cyber/anomaly/complement_access.py +++ b/core/src/main/python/synapse/ml/cyber/anomaly/complement_access.py @@ -2,7 +2,7 @@ from typing import List, Optional -from mmlspark.cyber.utils.spark_utils import DataFrameUtils, ExplainBuilder +from synapse.ml.cyber.utils.spark_utils import DataFrameUtils, ExplainBuilder from pyspark.ml import Transformer from pyspark.ml.param.shared import Param, Params diff --git a/core/src/main/python/mmlspark/cyber/dataset.py b/core/src/main/python/synapse/ml/cyber/dataset.py similarity index 98% rename from core/src/main/python/mmlspark/cyber/dataset.py rename to core/src/main/python/synapse/ml/cyber/dataset.py index 52759fe8e8..937f1eff5f 100644 --- a/core/src/main/python/mmlspark/cyber/dataset.py +++ b/core/src/main/python/synapse/ml/cyber/dataset.py @@ -5,7 +5,7 @@ import pandas as pd import random -from mmlspark.cyber.anomaly.collaborative_filtering import AccessAnomalyConfig +from synapse.ml.cyber.anomaly.collaborative_filtering import AccessAnomalyConfig class DataFactory: diff --git a/core/src/main/python/mmlspark/cyber/feature/__init__.py b/core/src/main/python/synapse/ml/cyber/feature/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/cyber/feature/__init__.py rename to core/src/main/python/synapse/ml/cyber/feature/__init__.py diff --git a/core/src/main/python/mmlspark/cyber/feature/indexers.py b/core/src/main/python/synapse/ml/cyber/feature/indexers.py similarity index 97% rename from core/src/main/python/mmlspark/cyber/feature/indexers.py rename to core/src/main/python/synapse/ml/cyber/feature/indexers.py index 6482aa1531..6d80be2e46 100644 --- a/core/src/main/python/mmlspark/cyber/feature/indexers.py +++ b/core/src/main/python/synapse/ml/cyber/feature/indexers.py @@ -2,7 +2,7 @@ from typing import List -from mmlspark.cyber.utils.spark_utils import DataFrameUtils, ExplainBuilder, HasSetInputCol, HasSetOutputCol +from synapse.ml.cyber.utils.spark_utils import DataFrameUtils, ExplainBuilder, HasSetInputCol, HasSetOutputCol from pyspark.ml import Estimator, Transformer from pyspark.ml.param.shared import HasInputCol, HasOutputCol, Param, Params diff --git a/core/src/main/python/mmlspark/cyber/feature/scalers.py b/core/src/main/python/synapse/ml/cyber/feature/scalers.py similarity index 99% rename from core/src/main/python/mmlspark/cyber/feature/scalers.py rename to core/src/main/python/synapse/ml/cyber/feature/scalers.py index 1b55f7e607..00a12d029e 100644 --- a/core/src/main/python/mmlspark/cyber/feature/scalers.py +++ b/core/src/main/python/synapse/ml/cyber/feature/scalers.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from typing import Callable, Dict, List, Optional, Union -from mmlspark.cyber.utils.spark_utils import ExplainBuilder, HasSetInputCol, HasSetOutputCol +from synapse.ml.cyber.utils.spark_utils import ExplainBuilder, HasSetInputCol, HasSetOutputCol from pyspark.ml import Estimator, Transformer from pyspark.ml.param.shared import HasInputCol, HasOutputCol, Param, Params diff --git a/core/src/main/python/mmlspark/cyber/utils/__init__.py b/core/src/main/python/synapse/ml/cyber/utils/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/cyber/utils/__init__.py rename to core/src/main/python/synapse/ml/cyber/utils/__init__.py diff --git a/core/src/main/python/mmlspark/cyber/utils/spark_utils.py b/core/src/main/python/synapse/ml/cyber/utils/spark_utils.py similarity index 100% rename from core/src/main/python/mmlspark/cyber/utils/spark_utils.py rename to core/src/main/python/synapse/ml/cyber/utils/spark_utils.py diff --git a/core/src/main/python/mmlspark/doc/conf.py b/core/src/main/python/synapse/ml/doc/conf.py similarity index 93% rename from core/src/main/python/mmlspark/doc/conf.py rename to core/src/main/python/synapse/ml/doc/conf.py index 2ae8764bd7..c0f46cd45b 100644 --- a/core/src/main/python/mmlspark/doc/conf.py +++ b/core/src/main/python/synapse/ml/doc/conf.py @@ -76,14 +76,14 @@ # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. -htmlhelp_basename = "MMLSparkdoc" +htmlhelp_basename = "SynapseMLdoc" # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, "mmlspark", "MMLSpark Documentation", [author], 1) + (master_doc, "synapseml", "SynapseML Documentation", [author], 1) ] @@ -93,8 +93,8 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, "MMLSpark", "MMLSpark Documentation", author, - "MMLSpark", "One line description of project.", "Miscellaneous"), + (master_doc, "SynapseML", "SynapseML Documentation", author, + "SynapseML", "One line description of project.", "Miscellaneous"), ] @@ -122,11 +122,11 @@ def __getattr__(cls, name): # (note: currently, this requires pip-installing "sphinx==1.5.6" because of an # obscure bug, see rtfd/recommonmark#73 and sphinx-doc/sphinx#3800) # from recommonmark.transform import AutoStructify -# def mmlspark_doc_resolver(path): +# def synapseml_doc_resolver(path): # return path # github_doc_root + url # def setup(app): # app.add_config_value("recommonmark_config", { -# "url_resolver": mmlspark_doc_resolver, +# "url_resolver": synapseml_doc_resolver, # "auto_toc_tree_section": "Contents", # "enable_eval_rst": True, # "enable_auto_doc_ref": True, diff --git a/core/src/main/python/mmlspark/doc/index.rst b/core/src/main/python/synapse/ml/doc/index.rst similarity index 100% rename from core/src/main/python/mmlspark/doc/index.rst rename to core/src/main/python/synapse/ml/doc/index.rst diff --git a/core/src/main/python/synapse/ml/doc/scala.rst b/core/src/main/python/synapse/ml/doc/scala.rst new file mode 100644 index 0000000000..8b511eae12 --- /dev/null +++ b/core/src/main/python/synapse/ml/doc/scala.rst @@ -0,0 +1,4 @@ +Scala API Docs +=============== + +See the `Scala API documentation for SynapseML <../scala/index.html>`_. diff --git a/core/src/main/python/mmlspark/downloader/ModelDownloader.py b/core/src/main/python/synapse/ml/downloader/ModelDownloader.py similarity index 94% rename from core/src/main/python/mmlspark/downloader/ModelDownloader.py rename to core/src/main/python/synapse/ml/downloader/ModelDownloader.py index 52bb852cfe..2790c8617b 100644 --- a/core/src/main/python/mmlspark/downloader/ModelDownloader.py +++ b/core/src/main/python/synapse/ml/downloader/ModelDownloader.py @@ -7,7 +7,7 @@ basestring = str from pyspark.ml.param.shared import * -from mmlspark.core.schema.Utils import * +from synapse.ml.core.schema.Utils import * DEFAULT_URL = "https://mmlspark.azureedge.net/datasets/CNTKModels/" @@ -48,7 +48,7 @@ def __repr__(self): def toJava(self, sparkSession): ctx = sparkSession.sparkContext uri = ctx._jvm.java.net.URI(self.uri) - return ctx._jvm.com.microsoft.ml.spark.downloader.ModelSchema( + return ctx._jvm.com.microsoft.azure.synapse.ml.downloader.ModelSchema( self.name, self.dataset, self.modelType, uri, self.hash, self.size, self.inputNode, self.numLayers, self.layerNames) @@ -78,7 +78,7 @@ def __init__(self, sparkSession, localPath, serverURL=DEFAULT_URL): self._sparkSession = sparkSession self._ctx = sparkSession.sparkContext - self._model_downloader = self._ctx._jvm.com.microsoft.ml.spark.downloader.ModelDownloader( + self._model_downloader = self._ctx._jvm.com.microsoft.azure.synapse.ml.downloader.ModelDownloader( sparkSession._jsparkSession, localPath, serverURL) def _wrap(self, iter): diff --git a/core/src/main/python/mmlspark/downloader/__init__.py b/core/src/main/python/synapse/ml/downloader/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/downloader/__init__.py rename to core/src/main/python/synapse/ml/downloader/__init__.py diff --git a/core/src/main/python/mmlspark/io/IOImplicits.py b/core/src/main/python/synapse/ml/io/IOImplicits.py similarity index 94% rename from core/src/main/python/mmlspark/io/IOImplicits.py rename to core/src/main/python/synapse/ml/io/IOImplicits.py index 9a6cb37af1..30679775e6 100644 --- a/core/src/main/python/mmlspark/io/IOImplicits.py +++ b/core/src/main/python/synapse/ml/io/IOImplicits.py @@ -2,7 +2,7 @@ from pyspark import SparkContext from pyspark.sql import DataFrame from pyspark.sql.types import StructType, StructField -from mmlspark.opencv.ImageTransformer import ImageSchema +from synapse.ml.opencv.ImageTransformer import ImageSchema image_source = "org.apache.spark.ml.source.image.PatchedImageFileFormat" @@ -62,7 +62,7 @@ def _parseRequest(self, apiName, schema, idCol="id", requestCol="request", parsingCheck = "none"): ctx = SparkContext.getOrCreate() jvm = ctx._jvm - extended = jvm.com.microsoft.ml.spark.io.DataFrameExtensions(self._jdf) + extended = jvm.com.microsoft.azure.synapse.ml.io.DataFrameExtensions(self._jdf) dt = jvm.org.apache.spark.sql.types.DataType jResult = extended.parseRequest(apiName, dt.fromJson(schema.json()), idCol, requestCol, parsingCheck) sql_ctx = pyspark.SQLContext.getOrCreate(ctx) @@ -73,7 +73,7 @@ def _parseRequest(self, apiName, schema, def _makeReply(self, replyCol, name="reply"): ctx = SparkContext.getOrCreate() jvm = ctx._jvm - extended = jvm.com.microsoft.ml.spark.io.DataFrameExtensions(self._jdf) + extended = jvm.com.microsoft.azure.synapse.ml.io.DataFrameExtensions(self._jdf) jResult = extended.makeReply(replyCol, name) sql_ctx = pyspark.SQLContext.getOrCreate(ctx) return DataFrame(jResult, sql_ctx) diff --git a/core/src/main/python/mmlspark/io/__init__.py b/core/src/main/python/synapse/ml/io/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/io/__init__.py rename to core/src/main/python/synapse/ml/io/__init__.py diff --git a/core/src/main/python/mmlspark/io/binary/BinaryFileReader.py b/core/src/main/python/synapse/ml/io/binary/BinaryFileReader.py similarity index 92% rename from core/src/main/python/mmlspark/io/binary/BinaryFileReader.py rename to core/src/main/python/synapse/ml/io/binary/BinaryFileReader.py index 5e46dab923..0117b3d263 100644 --- a/core/src/main/python/mmlspark/io/binary/BinaryFileReader.py +++ b/core/src/main/python/synapse/ml/io/binary/BinaryFileReader.py @@ -47,7 +47,7 @@ def readBinaryFiles(self, path, recursive = False, sampleRatio = 1.0, inspectZip """ ctx = SparkContext.getOrCreate() - reader = ctx._jvm.com.microsoft.ml.spark.io.binary.BinaryFileReader + reader = ctx._jvm.com.microsoft.azure.synapse.ml.io.binary.BinaryFileReader sql_ctx = pyspark.SQLContext.getOrCreate(ctx) jsession = sql_ctx.sparkSession._jsparkSession jresult = reader.read(path, recursive, jsession, float(sampleRatio), inspectZip, seed) @@ -72,7 +72,7 @@ def streamBinaryFiles(self, path, sampleRatio = 1.0, inspectZip = True, seed=0): """ ctx = SparkContext.getOrCreate() - reader = ctx._jvm.com.microsoft.ml.spark.io.binary.BinaryFileReader + reader = ctx._jvm.com.microsoft.azure.synapse.ml.io.binary.BinaryFileReader sql_ctx = pyspark.SQLContext.getOrCreate(ctx) jsession = sql_ctx.sparkSession._jsparkSession jresult = reader.stream(path, jsession, float(sampleRatio), inspectZip, seed) @@ -93,5 +93,5 @@ def isBinaryFile(df, column): """ ctx = SparkContext.getOrCreate() - schema = ctx._jvm.com.microsoft.ml.spark.core.schema.BinaryFileSchema + schema = ctx._jvm.com.microsoft.azure.synapse.ml.core.schema.BinaryFileSchema return schema.isBinaryFile(df._jdf, column) diff --git a/core/src/main/python/mmlspark/io/binary/__init__.py b/core/src/main/python/synapse/ml/io/binary/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/io/binary/__init__.py rename to core/src/main/python/synapse/ml/io/binary/__init__.py diff --git a/core/src/main/python/mmlspark/io/http/HTTPFunctions.py b/core/src/main/python/synapse/ml/io/http/HTTPFunctions.py similarity index 95% rename from core/src/main/python/mmlspark/io/http/HTTPFunctions.py rename to core/src/main/python/synapse/ml/io/http/HTTPFunctions.py index e01ccee64a..559f75e80c 100644 --- a/core/src/main/python/mmlspark/io/http/HTTPFunctions.py +++ b/core/src/main/python/synapse/ml/io/http/HTTPFunctions.py @@ -19,7 +19,7 @@ def requests_to_spark(p): } -# SparkContext._active_spark_context._jvm.com.microsoft.ml.spark.io.http.HTTPRequestData.schema().json() +# SparkContext._active_spark_context._jvm.com.microsoft.azure.synapse.ml.io.http.HTTPRequestData.schema().json() # TODO figure out why we cannot just grab from SparkContext on databricks HTTPRequestDataType = StructType().fromJson(json.loads( '{"type":"struct","fields":[{"name":"requestLine","type":{"type":"struct","fields":[{"name":"method",' diff --git a/core/src/main/python/mmlspark/io/http/JSONOutputParser.py b/core/src/main/python/synapse/ml/io/http/JSONOutputParser.py similarity index 91% rename from core/src/main/python/mmlspark/io/http/JSONOutputParser.py rename to core/src/main/python/synapse/ml/io/http/JSONOutputParser.py index 33a049f3ea..941b123d09 100644 --- a/core/src/main/python/mmlspark/io/http/JSONOutputParser.py +++ b/core/src/main/python/synapse/ml/io/http/JSONOutputParser.py @@ -6,7 +6,7 @@ if sys.version >= '3': basestring = str -from mmlspark.io.http._JSONOutputParser import _JSONOutputParser +from synapse.ml.io.http._JSONOutputParser import _JSONOutputParser from pyspark.ml.common import inherit_doc from pyspark import SparkContext import json diff --git a/core/src/main/python/mmlspark/io/http/ServingFunctions.py b/core/src/main/python/synapse/ml/io/http/ServingFunctions.py similarity index 90% rename from core/src/main/python/mmlspark/io/http/ServingFunctions.py rename to core/src/main/python/synapse/ml/io/http/ServingFunctions.py index 8100835b43..78cadd7bcd 100644 --- a/core/src/main/python/mmlspark/io/http/ServingFunctions.py +++ b/core/src/main/python/synapse/ml/io/http/ServingFunctions.py @@ -3,7 +3,7 @@ from pyspark.sql.column import Column def _http_schema(): - return SparkContext._active_spark_context._jvm.com.microsoft.ml.spark.io.http.HTTPSchema + return SparkContext._active_spark_context._jvm.com.microsoft.azure.synapse.ml.io.http.HTTPSchema def string_to_response(c): return Column(_http_schema().string_to_response(c._jc)) diff --git a/core/src/main/python/mmlspark/io/http/SimpleHTTPTransformer.py b/core/src/main/python/synapse/ml/io/http/SimpleHTTPTransformer.py similarity index 84% rename from core/src/main/python/mmlspark/io/http/SimpleHTTPTransformer.py rename to core/src/main/python/synapse/ml/io/http/SimpleHTTPTransformer.py index 10c2336e4e..9f82eab980 100644 --- a/core/src/main/python/mmlspark/io/http/SimpleHTTPTransformer.py +++ b/core/src/main/python/synapse/ml/io/http/SimpleHTTPTransformer.py @@ -6,7 +6,7 @@ if sys.version >= '3': basestring = str -from mmlspark.io.http._SimpleHTTPTransformer import _SimpleHTTPTransformer +from synapse.ml.io.http._SimpleHTTPTransformer import _SimpleHTTPTransformer from pyspark.ml.common import inherit_doc @inherit_doc diff --git a/core/src/main/python/mmlspark/io/http/__init__.py b/core/src/main/python/synapse/ml/io/http/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/io/http/__init__.py rename to core/src/main/python/synapse/ml/io/http/__init__.py diff --git a/core/src/main/python/mmlspark/io/image/ImageUtils.py b/core/src/main/python/synapse/ml/io/image/ImageUtils.py similarity index 91% rename from core/src/main/python/mmlspark/io/image/ImageUtils.py rename to core/src/main/python/synapse/ml/io/image/ImageUtils.py index 43a2e7dcdb..a8c5a6ee0f 100644 --- a/core/src/main/python/mmlspark/io/image/ImageUtils.py +++ b/core/src/main/python/synapse/ml/io/image/ImageUtils.py @@ -27,7 +27,7 @@ def readFromPaths(df, pathCol, imageCol="image"): """ ctx = SparkContext.getOrCreate() jvm = ctx.getOrCreate()._jvm - reader = jvm.com.microsoft.ml.spark.io.image.ImageUtils + reader = jvm.com.microsoft.azure.synapse.ml.io.image.ImageUtils jresult = reader.readFromPaths(df._jdf, pathCol, imageCol) sql_ctx = pyspark.SQLContext.getOrCreate(ctx) return DataFrame(jresult, sql_ctx) @@ -47,7 +47,7 @@ def readFromStrings(df, bytesCol, imageCol="image", dropPrefix=False): """ ctx = SparkContext.getOrCreate() jvm = ctx.getOrCreate()._jvm - reader = jvm.com.microsoft.ml.spark.io.image.ImageUtils + reader = jvm.com.microsoft.azure.synapse.ml.io.image.ImageUtils jresult = reader.readFromStrings(df._jdf, bytesCol, imageCol, dropPrefix) sql_ctx = pyspark.SQLContext.getOrCreate(ctx) return DataFrame(jresult, sql_ctx) diff --git a/core/src/main/python/mmlspark/io/image/__init__.py b/core/src/main/python/synapse/ml/io/image/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/io/image/__init__.py rename to core/src/main/python/synapse/ml/io/image/__init__.py diff --git a/core/src/main/python/mmlspark/io/powerbi/PowerBIWriter.py b/core/src/main/python/synapse/ml/io/powerbi/PowerBIWriter.py similarity index 83% rename from core/src/main/python/mmlspark/io/powerbi/PowerBIWriter.py rename to core/src/main/python/synapse/ml/io/powerbi/PowerBIWriter.py index 59c814c4c4..326e90cb1b 100644 --- a/core/src/main/python/mmlspark/io/powerbi/PowerBIWriter.py +++ b/core/src/main/python/synapse/ml/io/powerbi/PowerBIWriter.py @@ -15,14 +15,14 @@ def streamToPowerBI(df, url, options=dict()): jvm = SparkContext.getOrCreate()._jvm - writer = jvm.com.microsoft.ml.spark.io.powerbi.PowerBIWriter + writer = jvm.com.microsoft.azure.synapse.ml.io.powerbi.PowerBIWriter return writer.stream(df.drop("label")._jdf, url, options) setattr(pyspark.sql.DataFrame, 'streamToPowerBI', streamToPowerBI) def writeToPowerBI(df, url, options=dict()): jvm = SparkContext.getOrCreate()._jvm - writer = jvm.com.microsoft.ml.spark.io.powerbi.PowerBIWriter + writer = jvm.com.microsoft.azure.synapse.ml.io.powerbi.PowerBIWriter writer.write(df._jdf, url, options) setattr(pyspark.sql.DataFrame, 'writeToPowerBI', writeToPowerBI) diff --git a/core/src/main/python/mmlspark/io/powerbi/__init__.py b/core/src/main/python/synapse/ml/io/powerbi/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/io/powerbi/__init__.py rename to core/src/main/python/synapse/ml/io/powerbi/__init__.py diff --git a/core/src/main/python/mmlspark/nn/ConditionalBallTree.py b/core/src/main/python/synapse/ml/nn/ConditionalBallTree.py similarity index 91% rename from core/src/main/python/mmlspark/nn/ConditionalBallTree.py rename to core/src/main/python/synapse/ml/nn/ConditionalBallTree.py index bdabc5e2c0..55b9bc1f71 100644 --- a/core/src/main/python/mmlspark/nn/ConditionalBallTree.py +++ b/core/src/main/python/synapse/ml/nn/ConditionalBallTree.py @@ -20,7 +20,7 @@ def __init__(self, keys, values, labels, leafSize, java_obj=None): """ if java_obj is None: self._jconditional_balltree = SparkContext._active_spark_context._jvm \ - .com.microsoft.ml.spark.nn.ConditionalBallTree \ + .com.microsoft.azure.synapse.ml.nn.ConditionalBallTree \ .apply(keys, values, labels, leafSize) else: self._jconditional_balltree = java_obj @@ -42,5 +42,5 @@ def save(self, filename): @staticmethod def load(filename): java_obj = SparkContext._active_spark_context._jvm \ - .com.microsoft.ml.spark.nn.ConditionalBallTree.load(filename) + .com.microsoft.azure.synapse.ml.nn.ConditionalBallTree.load(filename) return ConditionalBallTree(None, None, None, None, java_obj=java_obj) diff --git a/core/src/main/python/mmlspark/nn/__init__.py b/core/src/main/python/synapse/ml/nn/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/nn/__init__.py rename to core/src/main/python/synapse/ml/nn/__init__.py diff --git a/core/src/main/python/mmlspark/plot/__init__.py b/core/src/main/python/synapse/ml/plot/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/plot/__init__.py rename to core/src/main/python/synapse/ml/plot/__init__.py diff --git a/core/src/main/python/mmlspark/plot/plot.py b/core/src/main/python/synapse/ml/plot/plot.py similarity index 100% rename from core/src/main/python/mmlspark/plot/plot.py rename to core/src/main/python/synapse/ml/plot/plot.py diff --git a/core/src/main/python/mmlspark/recommendation/RankingTrainValidationSplit.py b/core/src/main/python/synapse/ml/recommendation/RankingTrainValidationSplit.py similarity index 85% rename from core/src/main/python/mmlspark/recommendation/RankingTrainValidationSplit.py rename to core/src/main/python/synapse/ml/recommendation/RankingTrainValidationSplit.py index 0084998677..f90484813f 100644 --- a/core/src/main/python/mmlspark/recommendation/RankingTrainValidationSplit.py +++ b/core/src/main/python/synapse/ml/recommendation/RankingTrainValidationSplit.py @@ -8,7 +8,7 @@ from pyspark.ml.tuning import _ValidatorParams from pyspark.ml.wrapper import JavaParams -from mmlspark.recommendation._RankingTrainValidationSplit import _RankingTrainValidationSplit +from synapse.ml.recommendation._RankingTrainValidationSplit import _RankingTrainValidationSplit class RankingTrainValidationSplit(_ValidatorParams, _RankingTrainValidationSplit): @@ -19,7 +19,7 @@ def _to_java(self): estimator, epms, evaluator = _ValidatorParams._to_java_impl(self) _java_obj = JavaParams._new_java_obj( - "com.microsoft.ml.spark.recommendation.RankingTrainValidationSplit", self.uid + "com.microsoft.azure.synapse.ml.recommendation.RankingTrainValidationSplit", self.uid ) _java_obj.setEstimatorParamMaps(epms) _java_obj.setEvaluator(evaluator) diff --git a/core/src/main/python/mmlspark/recommendation/RankingTrainValidationSplitModel.py b/core/src/main/python/synapse/ml/recommendation/RankingTrainValidationSplitModel.py similarity index 84% rename from core/src/main/python/mmlspark/recommendation/RankingTrainValidationSplitModel.py rename to core/src/main/python/synapse/ml/recommendation/RankingTrainValidationSplitModel.py index 1ae15cf1a6..8c3f8eec2b 100644 --- a/core/src/main/python/mmlspark/recommendation/RankingTrainValidationSplitModel.py +++ b/core/src/main/python/synapse/ml/recommendation/RankingTrainValidationSplitModel.py @@ -6,7 +6,7 @@ if sys.version >= "3": basestring = str -from mmlspark.recommendation._RankingTrainValidationSplitModel import _RankingTrainValidationSplitModel +from synapse.ml.recommendation._RankingTrainValidationSplitModel import _RankingTrainValidationSplitModel from pyspark.ml.util import * diff --git a/core/src/main/python/mmlspark/recommendation/SARModel.py b/core/src/main/python/synapse/ml/recommendation/SARModel.py similarity index 77% rename from core/src/main/python/mmlspark/recommendation/SARModel.py rename to core/src/main/python/synapse/ml/recommendation/SARModel.py index 844e2cfc07..03162cbcc2 100644 --- a/core/src/main/python/mmlspark/recommendation/SARModel.py +++ b/core/src/main/python/synapse/ml/recommendation/SARModel.py @@ -7,8 +7,8 @@ if sys.version >= "3": basestring = str -from mmlspark.core.schema.Utils import * -from mmlspark.recommendation._SARModel import _SARModel +from synapse.ml.core.schema.Utils import * +from synapse.ml.recommendation._SARModel import _SARModel @inherit_doc diff --git a/core/src/main/python/mmlspark/recommendation/__init__.py b/core/src/main/python/synapse/ml/recommendation/__init__.py similarity index 50% rename from core/src/main/python/mmlspark/recommendation/__init__.py rename to core/src/main/python/synapse/ml/recommendation/__init__.py index 81081c17f0..de240c578e 100644 --- a/core/src/main/python/mmlspark/recommendation/__init__.py +++ b/core/src/main/python/synapse/ml/recommendation/__init__.py @@ -6,12 +6,12 @@ This module contains general functions for Azure Managed Application publication. """ -from mmlspark.recommendation.RankingAdapter import RankingAdapter -from mmlspark.recommendation.RankingEvaluator import RankingEvaluator -from mmlspark.recommendation.RankingTrainValidationSplit import RankingTrainValidationSplit -from mmlspark.recommendation.RecommendationIndexer import RecommendationIndexer -from mmlspark.recommendation.SAR import SAR -from mmlspark.recommendation.SARModel import SARModel +from synapse.ml.recommendation.RankingAdapter import RankingAdapter +from synapse.ml.recommendation.RankingEvaluator import RankingEvaluator +from synapse.ml.recommendation.RankingTrainValidationSplit import RankingTrainValidationSplit +from synapse.ml.recommendation.RecommendationIndexer import RecommendationIndexer +from synapse.ml.recommendation.SAR import SAR +from synapse.ml.recommendation.SARModel import SARModel __all__ = [ diff --git a/core/src/main/python/mmlspark/stages/UDFTransformer.py b/core/src/main/python/synapse/ml/stages/UDFTransformer.py similarity index 93% rename from core/src/main/python/mmlspark/stages/UDFTransformer.py rename to core/src/main/python/synapse/ml/stages/UDFTransformer.py index beb9b20651..f5fb1bc5f6 100644 --- a/core/src/main/python/mmlspark/stages/UDFTransformer.py +++ b/core/src/main/python/synapse/ml/stages/UDFTransformer.py @@ -6,7 +6,7 @@ basestring = str from pyspark.ml.param.shared import * -from mmlspark.stages._UDFTransformer import _UDFTransformer +from synapse.ml.stages._UDFTransformer import _UDFTransformer from pyspark import keyword_only from pyspark.ml.util import JavaMLReadable, JavaMLWritable @@ -15,7 +15,7 @@ from pyspark.ml.common import inherit_doc from pyspark.sql.functions import UserDefinedFunction from pyspark.ml.common import inherit_doc -from mmlspark.core.schema.Utils import * +from synapse.ml.core.schema.Utils import * @inherit_doc class UDFTransformer(ComplexParamsMixin, JavaMLReadable, JavaMLWritable, JavaTransformer): @@ -31,7 +31,7 @@ class UDFTransformer(ComplexParamsMixin, JavaMLReadable, JavaMLWritable, JavaTra @keyword_only def __init__(self, inputCol=None, inputCols=None, outputCol=None, udf=None): super(UDFTransformer, self).__init__() - self._java_obj = self._new_java_obj("com.microsoft.ml.spark.stages.UDFTransformer") + self._java_obj = self._new_java_obj("com.microsoft.azure.synapse.ml.stages.UDFTransformer") self.inputCol = Param(self, "inputCol", "inputCol: The name of the input column (default: )") self.inputCols = Param(self, "inputCols", "inputCols: The names of the input columns (default: )") self.outputCol = Param(self, "outputCol", "outputCol: The name of the output column") @@ -128,7 +128,7 @@ def read(cls): @staticmethod def getJavaPackage(): """ Returns package name String. """ - return "com.microsoft.ml.spark.stages.UDFTransformer" + return "com.microsoft.azure.synapse.ml.stages.UDFTransformer" @staticmethod def _from_java(java_stage): diff --git a/core/src/main/python/mmlspark/stages/__init__.py b/core/src/main/python/synapse/ml/stages/__init__.py similarity index 100% rename from core/src/main/python/mmlspark/stages/__init__.py rename to core/src/main/python/synapse/ml/stages/__init__.py diff --git a/core/src/main/scala/com/microsoft/ml/spark/automl/DefaultHyperparams.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/DefaultHyperparams.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/automl/DefaultHyperparams.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/automl/DefaultHyperparams.scala index e254d084f1..5bdd04dab5 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/automl/DefaultHyperparams.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/DefaultHyperparams.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.automl +package com.microsoft.azure.synapse.ml.automl import org.apache.spark.ml.classification._ import org.apache.spark.ml.param.Param diff --git a/core/src/main/scala/com/microsoft/ml/spark/automl/EvaluationUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/EvaluationUtils.scala similarity index 91% rename from core/src/main/scala/com/microsoft/ml/spark/automl/EvaluationUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/automl/EvaluationUtils.scala index ac01a268a6..d82776ee40 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/automl/EvaluationUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/EvaluationUtils.scala @@ -1,16 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.automl +package com.microsoft.azure.synapse.ml.automl -import com.microsoft.ml.spark.core.metrics.MetricConstants -import com.microsoft.ml.spark.core.schema.SchemaConstants -import com.microsoft.ml.spark.train.{TrainClassifier, TrainRegressor, TrainedClassifierModel, TrainedRegressorModel} +import com.microsoft.azure.synapse.ml.core.metrics.MetricConstants +import com.microsoft.azure.synapse.ml.core.schema.SchemaConstants +import com.microsoft.azure.synapse.ml.train.{ + TrainClassifier, TrainRegressor, TrainedClassifierModel, TrainedRegressorModel} import org.apache.spark.injections.RegressionUtils import org.apache.spark.ml.classification.{ClassificationModel, Classifier} -import org.apache.spark.ml.{PipelineStage, Transformer} -import org.apache.spark.ml.param.{Param, ParamMap} +import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.regression._ +import org.apache.spark.ml.{PipelineStage, Transformer} object EvaluationUtils { val ModelTypeUnsupportedErr = "Model type not supported for evaluation" diff --git a/core/src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/FindBestModel.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/automl/FindBestModel.scala index a2f3e83101..cbaa250799 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/FindBestModel.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.automl +package com.microsoft.azure.synapse.ml.automl -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.HasEvaluationMetric -import com.microsoft.ml.spark.core.metrics.MetricConstants -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.train.ComputeModelStatistics +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.HasEvaluationMetric +import com.microsoft.azure.synapse.ml.core.metrics.MetricConstants +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.train.ComputeModelStatistics import org.apache.spark.ml._ import org.apache.spark.ml.param.{DataFrameParam, ParamMap, Params, TransformerArrayParam, TransformerParam} import org.apache.spark.ml.util._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/FindBestModel.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/automl/FindBestModel.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/automl/HyperparamBuilder.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/HyperparamBuilder.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/automl/HyperparamBuilder.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/automl/HyperparamBuilder.scala index bd31252599..1025535230 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/automl/HyperparamBuilder.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/HyperparamBuilder.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.automl +package com.microsoft.azure.synapse.ml.automl import org.apache.spark.ml.param._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/automl/ParamSpace.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/ParamSpace.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/automl/ParamSpace.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/automl/ParamSpace.scala index 89c9b42b57..f0b121c7e6 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/automl/ParamSpace.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/ParamSpace.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.automl +package com.microsoft.azure.synapse.ml.automl import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, ParamSpace} diff --git a/core/src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/TuneHyperparameters.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/automl/TuneHyperparameters.scala index 1d74928780..5e75a6cd22 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/TuneHyperparameters.scala @@ -1,15 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.automl +package com.microsoft.azure.synapse.ml.automl import java.util.concurrent._ import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder} -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.HasEvaluationMetric -import com.microsoft.ml.spark.core.metrics.MetricConstants -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.train.{ComputeModelStatistics, TrainedClassifierModel, TrainedRegressorModel} +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.HasEvaluationMetric +import com.microsoft.azure.synapse.ml.core.metrics.MetricConstants +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.train.{ComputeModelStatistics, TrainedClassifierModel, TrainedRegressorModel} import org.apache.spark.SparkException import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.param._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/automl/TuneHyperparameters.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/automl/TuneHyperparameters.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/codegen/CodeGen.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodeGen.scala similarity index 84% rename from core/src/main/scala/com/microsoft/ml/spark/codegen/CodeGen.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodeGen.scala index 581b5a431e..ceba45d8c0 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/codegen/CodeGen.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodeGen.scala @@ -1,14 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.codegen +package com.microsoft.azure.synapse.ml.codegen import java.io.File -import com.microsoft.ml.spark.codegen.CodegenConfigProtocol._ -import com.microsoft.ml.spark.core.env.FileUtilities._ +import CodegenConfigProtocol._ +import com.microsoft.azure.synapse.ml.core.env.FileUtilities._ import org.apache.commons.io.FileUtils import org.apache.commons.io.FilenameUtils._ -import com.microsoft.ml.spark.core.utils.JarLoadingUtils.instantiateServices +import com.microsoft.azure.synapse.ml.core.utils.JarLoadingUtils.instantiateServices import org.apache.spark.ml.{Estimator, Model, PipelineStage} import spray.json._ @@ -42,13 +42,13 @@ object CodeGen { } private def makeInitFiles(conf: CodegenConfig, packageFolder: String = ""): Unit = { - val dir = new File(new File(conf.pySrcDir, "mmlspark"), packageFolder) + val dir = join(conf.pySrcDir, "synapse", "ml", packageFolder) val packageString = if (packageFolder != "") packageFolder.replace("/", ".") else "" val importStrings = dir.listFiles.filter(_.isFile).sorted .map(_.getName) .filter(name => name.endsWith(".py") && !name.startsWith("_") && !name.startsWith("test")) - .map(name => s"from mmlspark$packageString.${getBaseName(name)} import *\n").mkString("") + .map(name => s"from synapse.ml$packageString.${getBaseName(name)} import *\n").mkString("") val initFile = new File(dir, "__init__.py") if (packageFolder != "") { writeFile(initFile, conf.packageHelp(importStrings)) @@ -68,15 +68,15 @@ object CodeGen { conf.rSrcDir.mkdirs() writeFile(new File(conf.rSrcDir.getParentFile, "DESCRIPTION"), - s"""|Package: ${conf.name.replace("-",".")} - |Title: Access to MMLSpark via R - |Description: Provides an interface to MMLSpark. + s"""|Package: ${conf.name.replace("-", ".")} + |Title: Access to SynapseML via R + |Description: Provides an interface to SynapseML. |Version: ${conf.rVersion} |Date: $today |Author: Microsoft Corporation - |Maintainer: MMLSpark Team - |URL: https://github.com/Azure/mmlspark - |BugReports: https://github.com/Azure/mmlspark/issues + |Maintainer: SynapseML Team + |URL: https://github.com/Microsoft/SynapseML + |BugReports: https://github.com/Microsoft/SynapseML/issues |Depends: | R (>= 2.12.0) |Imports: @@ -93,7 +93,7 @@ object CodeGen { | spark_dependency( | jars = c(), | packages = c( - | "com.microsoft.ml.spark:${conf.name}:${conf.version}" + | "com.microsoft.azure:${conf.name}:${conf.version}" | ), | repositories = c("https://mmlspark.azureedge.net/maven") | ) @@ -105,7 +105,7 @@ object CodeGen { |} |""".stripMargin) - writeFile(new File(conf.rSrcDir.getParentFile, "mmlspark.Rproj"), + writeFile(new File(conf.rSrcDir.getParentFile, "synapseml.Rproj"), """ |Version: 1.0 | @@ -151,8 +151,8 @@ object CodeGen { | long_description="Microsoft ML for Apache Spark contains Microsoft's open source " | + "contributions to the Apache Spark ecosystem", | license="MIT", - | packages=find_namespace_packages(include=['mmlspark.*']), - | url="https://github.com/Azure/mmlspark", + | packages=find_namespace_packages(include=['synapse.ml.*']), + | url="https://github.com/Microsoft/SynapseML", | author="Microsoft", | author_email="mmlspark-support@microsoft.com", | classifiers=[ @@ -165,7 +165,7 @@ object CodeGen { | "Programming Language :: Python :: 3", | ], | zip_safe=True, - | package_data={"mmlspark": ["../LICENSE.txt", "../README.txt"]}, + | package_data={"synapseml": ["../LICENSE.txt", "../README.txt"]}, |) | |""".stripMargin) diff --git a/core/src/main/scala/com/microsoft/ml/spark/codegen/CodegenConfig.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodegenConfig.scala similarity index 80% rename from core/src/main/scala/com/microsoft/ml/spark/codegen/CodegenConfig.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodegenConfig.scala index 049eb1bb8f..345ac85bc4 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/codegen/CodegenConfig.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodegenConfig.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.codegen +package com.microsoft.azure.synapse.ml.codegen import java.io.File @@ -32,13 +32,12 @@ case class CodegenConfig(name: String, //R Codegen Constants def rSrcRoot: File = new File(srcDir, "R") - def rSrcDir: File = new File(rSrcRoot, "mmlspark/R") + def rSrcDir: File = new File(rSrcRoot, "synapseml/R") def rPackageDir: File = new File(packageDir, "R") - def rTestDir: File = new File(rSrcRoot, "mmlspark/tests") + def rTestDir: File = new File(rSrcRoot, "synapseml/tests") def rTestOverrideDir: File = new File(topDir, "src/test/R") def rSrcOverrideDir: File = new File(topDir, "src/main/R") - //val rPackageFile = new File(rPackageDir, s"mmlspark-$mmlVer.zip") def internalPrefix: String = "_" def scopeDepth: String = " " * 4 @@ -52,19 +51,19 @@ case class CodegenConfig(name: String, s"""|$copyrightLines | |"\"" - |MMLSpark is an ecosystem of tools aimed towards expanding the distributed computing framework - |Apache Spark in several new directions. MMLSpark adds many deep learning and data science tools to the Spark + |SynapseML is an ecosystem of tools aimed towards expanding the distributed computing framework + |Apache Spark in several new directions. SynapseML adds many deep learning and data science tools to the Spark |ecosystem, including seamless integration of Spark Machine Learning pipelines with |Microsoft Cognitive Toolkit (CNTK), LightGBM and OpenCV. These tools enable powerful and |highly-scalable predictive and analytical models for a variety of datasources. | - |MMLSpark also brings new networking capabilities to the Spark Ecosystem. With the HTTP on Spark project, - |users can embed any web service into their SparkML models. In this vein, MMLSpark provides easy to use SparkML + |SynapseML also brings new networking capabilities to the Spark Ecosystem. With the HTTP on Spark project, + |users can embed any web service into their SparkML models. In this vein, SynapseML provides easy to use SparkML |transformers for a wide variety of Microsoft Cognitive Services. For production grade deployment, |the Spark Serving project enables high throughput, sub-millisecond latency web services, |backed by your Spark cluster. | - |MMLSpark requires Scala 2.11, Spark 2.4+, and Python 3.5+. + |SynapseML requires Scala 2.12, Spark 3.0+, and Python 3.6+. |"\"" | |__version__ = "$pythonizedVersion" diff --git a/core/src/main/scala/com/microsoft/ml/spark/codegen/GenerationUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/GenerationUtils.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/codegen/GenerationUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/GenerationUtils.scala index f6835f7f64..ec1e6cbd88 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/codegen/GenerationUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/GenerationUtils.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.codegen +package com.microsoft.azure.synapse.ml.codegen -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam import org.apache.spark.ml.param.{Param, ParamPair, PythonWrappableParam} object GenerationUtils { diff --git a/core/src/main/scala/com/microsoft/ml/spark/codegen/Wrappable.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/Wrappable.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/codegen/Wrappable.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/Wrappable.scala index 7e99e86f9a..b3f2045ed6 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/codegen/Wrappable.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/Wrappable.scala @@ -1,13 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.codegen +package com.microsoft.azure.synapse.ml.codegen + +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam import java.lang.reflect.ParameterizedType import java.nio.charset.StandardCharsets import java.nio.file.Files -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.serialize.ComplexParam import org.apache.spark.ml.evaluation.Evaluator import org.apache.spark.ml.param._ import org.apache.spark.ml.{Estimator, Model, Transformer} @@ -230,7 +231,7 @@ trait PythonWrappable extends BaseWrappable { this match { case _: Estimator[_] => val companionModelImport = companionModelClassName - .replaceAllLiterally("com.microsoft.ml.spark", "mmlspark") + .replaceAllLiterally("com.microsoft.azure.synapse.ml", "synapseml") .replaceAllLiterally("org.apache.spark", "pyspark") .split(".".toCharArray) val path = if (companionModelImport.head == "pyspark") { @@ -320,13 +321,13 @@ trait PythonWrappable extends BaseWrappable { |from pyspark.ml.param.shared import * |from pyspark import keyword_only |from pyspark.ml.util import JavaMLReadable, JavaMLWritable - |from mmlspark.core.serialize.java_params_patch import * + |from synapse.ml.core.serialize.java_params_patch import * |from pyspark.ml.wrapper import JavaTransformer, JavaEstimator, JavaModel |from pyspark.ml.evaluation import JavaEvaluator |from pyspark.ml.common import inherit_doc - |from mmlspark.core.schema.Utils import * + |from synapse.ml.core.schema.Utils import * |from pyspark.ml.param import TypeConverters - |from mmlspark.core.schema.TypeConversionUtils import generateTypeConverter, complexTypeConverter + |from synapse.ml.core.schema.TypeConversionUtils import generateTypeConverter, complexTypeConverter |$pyExtraEstimatorImports | |@inherit_doc @@ -380,8 +381,8 @@ trait PythonWrappable extends BaseWrappable { def makePyFile(conf: CodegenConfig): Unit = { val importPath = this.getClass.getName.split(".".toCharArray).dropRight(1) val srcFolders = importPath.mkString(".") - .replaceAllLiterally("com.microsoft.ml.spark", "mmlspark").split(".".toCharArray) - val srcDir = FileUtilities.join((Seq(conf.pySrcDir.toString) ++ srcFolders.toSeq): _*) + .replaceAllLiterally("com.microsoft.azure.synapse.ml.", "").split(".".toCharArray) + val srcDir = FileUtilities.join((Seq(conf.pySrcDir.toString, "synapse", "ml") ++ srcFolders.toSeq): _*) srcDir.mkdirs() Files.write( FileUtilities.join(srcDir, pyClassName + ".py").toPath, diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/contracts/Metrics.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/contracts/Metrics.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/core/contracts/Metrics.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/contracts/Metrics.scala index d74ff8d3f4..1758675cdd 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/contracts/Metrics.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/contracts/Metrics.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.contracts +package com.microsoft.azure.synapse.ml.core.contracts // Case class matching sealed abstract class Metric @@ -11,7 +11,7 @@ object ConvenienceTypes { type UniqueName = String type MetricTable = Map[UniqueName, Seq[Metric]] } -import com.microsoft.ml.spark.core.contracts.ConvenienceTypes._ +import ConvenienceTypes._ // One option case class TypedMetric[T](name: UniqueName, value: T) extends Metric diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/contracts/Params.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/contracts/Params.scala similarity index 99% rename from core/src/main/scala/com/microsoft/ml/spark/core/contracts/Params.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/contracts/Params.scala index 8af7c81a7b..18d32ce59f 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/contracts/Params.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/contracts/Params.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.contracts +package com.microsoft.azure.synapse.ml.core.contracts import org.apache.spark.ml.param._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/env/FileUtilities.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/FileUtilities.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/core/env/FileUtilities.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/FileUtilities.scala index 1ff3d32d13..f2b2907f9e 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/env/FileUtilities.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/FileUtilities.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.env +package com.microsoft.azure.synapse.ml.core.env import java.io.File import java.nio.file.{Files, StandardCopyOption} diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/env/NativeLoader.java b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/NativeLoader.java similarity index 99% rename from core/src/main/scala/com/microsoft/ml/spark/core/env/NativeLoader.java rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/NativeLoader.java index bb8c5df7ea..f7d3d7ccce 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/env/NativeLoader.java +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/NativeLoader.java @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.env; +package com.microsoft.azure.synapse.ml.core.env; import java.io.*; import java.nio.file.Files; diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/env/StreamUtilities.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/StreamUtilities.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/core/env/StreamUtilities.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/StreamUtilities.scala index e96c9aeb7b..af57a38e6c 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/env/StreamUtilities.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/StreamUtilities.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.env +package com.microsoft.azure.synapse.ml.core.env import java.io.{ByteArrayOutputStream, InputStream} import java.util.zip.ZipInputStream diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/metrics/MetricConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/metrics/MetricConstants.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/core/metrics/MetricConstants.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/metrics/MetricConstants.scala index 2d2acc44ff..8047f53557 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/metrics/MetricConstants.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/metrics/MetricConstants.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.metrics +package com.microsoft.azure.synapse.ml.core.metrics /** Contains constants used by modules for metrics. */ object MetricConstants { diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/metrics/MetricUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/metrics/MetricUtils.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/core/metrics/MetricUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/metrics/MetricUtils.scala index 7edc6246b0..3c5bd28df7 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/metrics/MetricUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/metrics/MetricUtils.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.metrics +package com.microsoft.azure.synapse.ml.core.metrics -import com.microsoft.ml.spark.core.schema.{SchemaConstants, SparkSchema} -import com.microsoft.ml.spark.core.schema.SchemaConstants.MMLTag +import com.microsoft.azure.synapse.ml.core.schema.{SchemaConstants, SparkSchema} +import com.microsoft.azure.synapse.ml.core.schema.SchemaConstants.MMLTag import org.apache.spark.sql.types.injections.MetadataUtilities import org.apache.spark.sql.types.{Metadata, StructField, StructType} diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/schema/BinaryFileSchema.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/BinaryFileSchema.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/core/schema/BinaryFileSchema.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/BinaryFileSchema.scala index ac10e4b5cf..5b7bf029be 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/schema/BinaryFileSchema.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/BinaryFileSchema.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.schema +package com.microsoft.azure.synapse.ml.core.schema import org.apache.spark.sql.Row import org.apache.spark.sql.types._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/schema/Categoricals.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/Categoricals.scala similarity index 99% rename from core/src/main/scala/com/microsoft/ml/spark/core/schema/Categoricals.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/Categoricals.scala index de63dbfefc..412f4034f7 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/schema/Categoricals.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/Categoricals.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.schema +package com.microsoft.azure.synapse.ml.core.schema /** Contains objects and functions to manipulate Categoricals */ -import com.microsoft.ml.spark.core.schema.SchemaConstants._ +import SchemaConstants._ import javassist.bytecode.DuplicateMemberException import org.apache.spark.ml.attribute._ import org.apache.spark.sql.DataFrame diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/schema/DatasetExtensions.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/DatasetExtensions.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/core/schema/DatasetExtensions.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/DatasetExtensions.scala index 8ad909b8b4..c0175db061 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/schema/DatasetExtensions.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/DatasetExtensions.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.schema +package com.microsoft.azure.synapse.ml.core.schema import org.apache.spark.ml.linalg.{DenseVector, SparseVector} import org.apache.spark.sql.Dataset diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/schema/ImageSchemaUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/ImageSchemaUtils.scala similarity index 95% rename from core/src/main/scala/com/microsoft/ml/spark/core/schema/ImageSchemaUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/ImageSchemaUtils.scala index 3139f97e5f..7ff1daeb9b 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/schema/ImageSchemaUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/ImageSchemaUtils.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.schema +package com.microsoft.azure.synapse.ml.core.schema import org.apache.spark.ml.image.ImageSchema import org.apache.spark.sql.types._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/schema/SchemaConstants.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/SchemaConstants.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/core/schema/SchemaConstants.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/SchemaConstants.scala index 2f839267fc..d916b258a3 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/schema/SchemaConstants.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/SchemaConstants.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.schema +package com.microsoft.azure.synapse.ml.core.schema /** Contains constants used by modules for schema. */ object SchemaConstants { diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/schema/SparkBindings.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/SparkBindings.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/core/schema/SparkBindings.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/SparkBindings.scala index e48286bf51..2fb39424ab 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/schema/SparkBindings.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/SparkBindings.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.schema +package com.microsoft.azure.synapse.ml.core.schema import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/schema/SparkSchema.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/SparkSchema.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/core/schema/SparkSchema.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/SparkSchema.scala index facc6e95b8..516fde3c6f 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/schema/SparkSchema.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/schema/SparkSchema.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.schema +package com.microsoft.azure.synapse.ml.core.schema -import com.microsoft.ml.spark.core.schema.SchemaConstants._ +import SchemaConstants._ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/serialize/ComplexParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/serialize/ComplexParam.scala similarity index 95% rename from core/src/main/scala/com/microsoft/ml/spark/core/serialize/ComplexParam.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/serialize/ComplexParam.scala index 8772d49a2e..3e104d18d6 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/serialize/ComplexParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/serialize/ComplexParam.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.serialize +package com.microsoft.azure.synapse.ml.core.serialize import org.apache.hadoop.fs.Path import org.apache.spark.ml.Serializer diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/spark/FluentAPI.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/spark/FluentAPI.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/core/spark/FluentAPI.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/spark/FluentAPI.scala index 528c75c5e2..1678bc2056 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/spark/FluentAPI.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/spark/FluentAPI.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark +package com.microsoft.azure.synapse.ml.core.spark import org.apache.spark.ml.{Estimator, Model, NamespaceInjections, Transformer} import org.apache.spark.sql.DataFrame diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/utils/AsyncUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/AsyncUtils.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/core/utils/AsyncUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/AsyncUtils.scala index fe0121b262..5e54e9ac51 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/utils/AsyncUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/AsyncUtils.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils import scala.concurrent.duration.Duration import scala.concurrent.{Await, ExecutionContext, Future} diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/utils/BreezeUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/BreezeUtils.scala similarity index 95% rename from core/src/main/scala/com/microsoft/ml/spark/core/utils/BreezeUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/BreezeUtils.scala index ca68b2c08a..a96dfaa23e 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/utils/BreezeUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/BreezeUtils.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils import org.apache.spark.ml.linalg.{Matrices, Matrix, Vector, Vectors} import breeze.linalg.{DenseVector => BDV, DenseMatrix => BDM, SparseVector => BSV, Vector => BV} diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/utils/CastUtilities.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/CastUtilities.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/core/utils/CastUtilities.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/CastUtilities.scala index 8395c6bb79..d2b9ba2578 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/utils/CastUtilities.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/CastUtilities.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils import org.apache.spark.sql.catalyst.expressions.{Cast, Literal} import org.apache.spark.sql.types.DataType diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/utils/ClusterUtil.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/ClusterUtil.scala similarity index 99% rename from core/src/main/scala/com/microsoft/ml/spark/core/utils/ClusterUtil.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/ClusterUtil.scala index fff0f1f941..25ad5db4ec 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/utils/ClusterUtil.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/ClusterUtil.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils import java.net.InetAddress diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/utils/ContextObjectInputStream.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/ContextObjectInputStream.scala similarity index 91% rename from core/src/main/scala/com/microsoft/ml/spark/core/utils/ContextObjectInputStream.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/ContextObjectInputStream.scala index baff4885bd..b97e245fa3 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/utils/ContextObjectInputStream.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/ContextObjectInputStream.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils import java.io.{InputStream, ObjectInputStream, ObjectStreamClass} diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/utils/FaultToleranceUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/FaultToleranceUtils.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/core/utils/FaultToleranceUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/FaultToleranceUtils.scala index 7bbe1b1d0c..53c15aff13 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/utils/FaultToleranceUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/FaultToleranceUtils.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils import scala.concurrent.duration.Duration import scala.concurrent.{Await, ExecutionContext, Future} diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/utils/JarLoadingUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/JarLoadingUtils.scala similarity index 88% rename from core/src/main/scala/com/microsoft/ml/spark/core/utils/JarLoadingUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/JarLoadingUtils.scala index 478631f620..bf5a98efb4 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/utils/JarLoadingUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/JarLoadingUtils.scala @@ -1,12 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils -import java.lang.reflect.Modifier +import com.microsoft.azure.synapse.ml.codegen.Wrappable -import com.microsoft.ml.spark.codegen.Wrappable +import java.lang.reflect.Modifier import org.sparkproject.guava.reflect.ClassPath + import scala.collection.JavaConverters._ import scala.reflect.{ClassTag, classTag} @@ -22,11 +23,11 @@ object JarLoadingUtils { } } - private[spark] val AllClasses = { + private[ml] val AllClasses = { ClassPath.from(getClass.getClassLoader) .getResources.asScala.toList .map(ri => className(ri.getResourceName)) - .filter(_.startsWith("com.microsoft.ml")) + .filter(_.startsWith("com.microsoft.azure.synapse")) .flatMap { cn => try { Some(Class.forName(cn)) @@ -36,7 +37,7 @@ object JarLoadingUtils { } } - private[spark] val WrappableClasses = { + private[ml] val WrappableClasses = { AllClasses.filter(classOf[Wrappable].isAssignableFrom(_)) } diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/utils/ModelEquality.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/ModelEquality.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/core/utils/ModelEquality.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/ModelEquality.scala index 746bc2796f..131ca5c0f3 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/utils/ModelEquality.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/ModelEquality.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils import org.apache.spark.ml.PipelineStage import org.apache.spark.ml.param.Param diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/utils/OsUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/OsUtils.scala similarity index 83% rename from core/src/main/scala/com/microsoft/ml/spark/core/utils/OsUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/OsUtils.scala index 80c4560fe8..ea001ae7b7 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/utils/OsUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/OsUtils.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils object OsUtils { val IsWindows: Boolean = System.getProperty("os.name").toLowerCase().indexOf("win") >= 0 diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/utils/SlicerFunctions.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/SlicerFunctions.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/core/utils/SlicerFunctions.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/SlicerFunctions.scala index 37b7200b48..38615d7638 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/utils/SlicerFunctions.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/SlicerFunctions.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.linalg.SQLDataTypes.VectorType @@ -9,7 +9,7 @@ import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.sql.expressions.UserDefinedFunction import org.apache.spark.sql.types._ -private[spark] object SlicerFunctions { +private[ml] object SlicerFunctions { private def slice[T](values: Int => T, indices: Seq[Int])(implicit num: Numeric[_]): Vector = { val n = num.asInstanceOf[Numeric[T]] Vectors.dense(indices.map(values.apply).map(n.toDouble).toArray) diff --git a/core/src/main/scala/com/microsoft/ml/spark/core/utils/StopWatch.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/StopWatch.scala similarity index 91% rename from core/src/main/scala/com/microsoft/ml/spark/core/utils/StopWatch.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/StopWatch.scala index 42274815c2..4713d816d5 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/core/utils/StopWatch.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/utils/StopWatch.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils class StopWatch { private var elapsedTime: Long = 0L diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/FeatureStats.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/FeatureStats.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/FeatureStats.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/FeatureStats.scala index 4c58869ecd..214f0624c7 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/FeatureStats.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/FeatureStats.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.numerics.abs import breeze.stats.distributions.{RandBasis, Uniform} diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ImageExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ImageExplainer.scala similarity index 84% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/ImageExplainer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ImageExplainer.scala index 8adfab8269..2100d19df8 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/ImageExplainer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ImageExplainer.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers -import com.microsoft.ml.spark.lime.{HasCellSize, HasModifier, SuperpixelTransformer} +import com.microsoft.azure.synapse.ml.lime.{HasCellSize, HasModifier, SuperpixelTransformer} import org.apache.spark.ml.param.shared.HasInputCol import org.apache.spark.sql.DataFrame diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ImageLIME.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ImageLIME.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/ImageLIME.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ImageLIME.scala index 761630174c..05da95cbf5 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/ImageLIME.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ImageLIME.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.stats.distributions.RandBasis -import com.microsoft.ml.spark.core.schema.{DatasetExtensions, ImageSchemaUtils} -import com.microsoft.ml.spark.io.image.ImageUtils -import com.microsoft.ml.spark.lime.{HasCellSize, HasModifier, SuperpixelData} +import com.microsoft.azure.synapse.ml.core.schema.{DatasetExtensions, ImageSchemaUtils} +import com.microsoft.azure.synapse.ml.io.image.ImageUtils +import com.microsoft.azure.synapse.ml.lime.{HasCellSize, HasModifier, SuperpixelData} import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.ComplexParamsReadable import org.apache.spark.ml.image.ImageSchema diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/ImageSHAP.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ImageSHAP.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/ImageSHAP.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ImageSHAP.scala index 4542bc6d8e..f663eb49d3 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/ImageSHAP.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ImageSHAP.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers -import com.microsoft.ml.spark.core.schema.{DatasetExtensions, ImageSchemaUtils} -import com.microsoft.ml.spark.io.image.ImageUtils -import com.microsoft.ml.spark.lime._ +import com.microsoft.azure.synapse.ml.core.schema.{DatasetExtensions, ImageSchemaUtils} +import com.microsoft.azure.synapse.ml.io.image.ImageUtils +import com.microsoft.azure.synapse.ml.lime.{HasCellSize, HasModifier, SuperpixelData} import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.ComplexParamsReadable import org.apache.spark.ml.image.ImageSchema diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/KernelSHAPBase.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/KernelSHAPBase.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/KernelSHAPBase.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/KernelSHAPBase.scala index 19838ad00e..9ae74e7138 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/KernelSHAPBase.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/KernelSHAPBase.scala @@ -1,12 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers + import breeze.linalg.{*, DenseMatrix => BDM, DenseVector => BDV} -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.schema.DatasetExtensions -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import org.apache.spark.ml.linalg.{Vector, Vectors} diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/KernelSHAPSampler.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/KernelSHAPSampler.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/KernelSHAPSampler.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/KernelSHAPSampler.scala index 412682fe52..9694eb0334 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/KernelSHAPSampler.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/KernelSHAPSampler.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.linalg.{sum, DenseVector => BDV} -import com.microsoft.ml.spark.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ import org.apache.commons.math3.util.CombinatoricsUtils.{binomialCoefficientDouble => comb} import org.apache.spark.ml.linalg.Vector diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/LIMEBase.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LIMEBase.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/LIMEBase.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LIMEBase.scala index d2ca2f1bbd..67afb5c103 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/LIMEBase.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LIMEBase.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.linalg.{*, DenseMatrix => BDM, DenseVector => BDV} -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.schema.DatasetExtensions -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.Transformer import org.apache.spark.ml.linalg.SQLDataTypes.VectorType diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/LIMESampler.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LIMESampler.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/LIMESampler.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LIMESampler.scala index 07ff855a93..daa2450043 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/LIMESampler.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LIMESampler.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.linalg.{BitVector, axpy, norm, DenseVector => BDV} import breeze.stats.distributions.RandBasis import org.apache.spark.ml.linalg.Vector -import com.microsoft.ml.spark.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ private[explainers] trait LIMESampler[TObservation] extends Sampler[TObservation, Vector] { def sample: (TObservation, Vector, Double) = { diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/LassoRegression.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LassoRegression.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/LassoRegression.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LassoRegression.scala index 8a11899794..5633088774 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/LassoRegression.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LassoRegression.scala @@ -1,13 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers + import breeze.linalg.{isClose, sum, DenseMatrix => BDM, DenseVector => BDV} import breeze.numerics.abs import scala.annotation.tailrec -private[spark] case class CoordinateDescentLasso(alpha: Double, maxIterations: Int, tol: Double) { +private[ml] case class CoordinateDescentLasso(alpha: Double, maxIterations: Int, tol: Double) { require(maxIterations >= 1) require(tol >= 0) diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/LeastSquaresRegression.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LeastSquaresRegression.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/LeastSquaresRegression.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LeastSquaresRegression.scala index d3da8f17ed..a7f3b365e4 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/LeastSquaresRegression.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LeastSquaresRegression.scala @@ -1,7 +1,8 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers + import breeze.linalg.{DenseMatrix, DenseVector, sum} import breeze.stats.regression.leastSquares diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/LocalExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LocalExplainer.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/LocalExplainer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LocalExplainer.scala index dd1578fb75..b2f8fec717 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/LocalExplainer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/LocalExplainer.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers -import com.microsoft.ml.spark.core.utils.SlicerFunctions +import com.microsoft.azure.synapse.ml.core.utils.SlicerFunctions import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.linalg.SQLDataTypes.{MatrixType, VectorType} import org.apache.spark.ml.linalg.Vectors diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/RegressionBase.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/RegressionBase.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/RegressionBase.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/RegressionBase.scala index d3095b3fea..d07601195b 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/RegressionBase.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/RegressionBase.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.linalg.{*, sum, DenseMatrix => BDM, DenseVector => BDV, Vector => BV, Matrix => BM} import breeze.numerics.sqrt diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/RowUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/RowUtils.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/RowUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/RowUtils.scala index ba95a294a8..7c583361b8 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/RowUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/RowUtils.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import org.apache.spark.sql.Row diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/Sampler.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/Sampler.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/Sampler.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/Sampler.scala index 25b51b6867..783fc24b87 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/Sampler.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/Sampler.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.linalg.{norm, DenseVector => BDV} import breeze.stats.distributions.RandBasis -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.explainers.RowUtils.RowCanGetAsDouble -import com.microsoft.ml.spark.lime.{Superpixel, SuperpixelData} +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import RowUtils.RowCanGetAsDouble +import com.microsoft.azure.synapse.ml.lime.{Superpixel, SuperpixelData} import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.sql.Row diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/SharedParams.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/SharedParams.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/SharedParams.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/SharedParams.scala index 5502cfa35b..d246901d0d 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/SharedParams.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/SharedParams.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import org.apache.spark.ml.Transformer import org.apache.spark.ml.param._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/TabularLIME.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TabularLIME.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/TabularLIME.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TabularLIME.scala index 8294b04477..ef58a4b7bc 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/TabularLIME.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TabularLIME.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.stats.distributions.RandBasis -import com.microsoft.ml.spark.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.param.StringArrayParam import org.apache.spark.ml.param.shared.HasInputCols diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/TabularSHAP.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TabularSHAP.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/TabularSHAP.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TabularSHAP.scala index adfc28f04e..4581c3111e 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/TabularSHAP.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TabularSHAP.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.stats.distributions.RandBasis -import com.microsoft.ml.spark.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.ComplexParamsReadable import org.apache.spark.ml.param.shared.HasInputCols diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/TextExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TextExplainer.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/TextExplainer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TextExplainer.scala index 5a7726f87c..41f56c23de 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/TextExplainer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TextExplainer.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import org.apache.spark.ml.feature.Tokenizer import org.apache.spark.ml.param.shared.HasInputCol diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/TextLIME.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TextLIME.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/TextLIME.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TextLIME.scala index b214545395..e499317189 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/TextLIME.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TextLIME.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.stats.distributions.RandBasis -import com.microsoft.ml.spark.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.ComplexParamsReadable import org.apache.spark.ml.linalg.SQLDataTypes.VectorType diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/TextSHAP.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TextSHAP.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/TextSHAP.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TextSHAP.scala index 234e6bd3cd..b54b81b050 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/TextSHAP.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/TextSHAP.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers -import com.microsoft.ml.spark.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.ComplexParamsReadable import org.apache.spark.ml.linalg.SQLDataTypes.VectorType diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/VectorLIME.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/VectorLIME.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/VectorLIME.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/VectorLIME.scala index 2900d6b337..32255813d8 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/VectorLIME.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/VectorLIME.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.stats.distributions.RandBasis -import com.microsoft.ml.spark.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.ComplexParamsReadable import org.apache.spark.ml.linalg.SQLDataTypes.VectorType diff --git a/core/src/main/scala/com/microsoft/ml/spark/explainers/VectorSHAP.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/VectorSHAP.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/explainers/VectorSHAP.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/VectorSHAP.scala index 08da77c543..f443b4fdb0 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/explainers/VectorSHAP.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/VectorSHAP.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers import breeze.stats.distributions.RandBasis -import com.microsoft.ml.spark.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.ComplexParamsReadable import org.apache.spark.ml.linalg.SQLDataTypes.VectorType diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/CleanMissingData.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/CleanMissingData.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/CleanMissingData.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/CleanMissingData.scala index 8dcd19350e..02936edc10 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/featurize/CleanMissingData.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/CleanMissingData.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize +package com.microsoft.azure.synapse.ml.featurize -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCols, HasOutputCols} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCols, HasOutputCols} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml._ import org.apache.spark.ml.param._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/CleanMissingData.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/CleanMissingData.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/CleanMissingData.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/CleanMissingData.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/CountSelector.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/CountSelector.scala similarity index 91% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/CountSelector.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/CountSelector.scala index ff982133f3..bea3774451 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/featurize/CountSelector.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/CountSelector.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize +package com.microsoft.azure.synapse.ml.featurize -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.feature._ import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import org.apache.spark.ml.linalg.Vector diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/DataConversion.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/DataConversion.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/DataConversion.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/DataConversion.scala index a00bcd297c..990af2fd4f 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/featurize/DataConversion.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/DataConversion.scala @@ -1,11 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize +package com.microsoft.azure.synapse.ml.featurize + +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import java.sql.Timestamp -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.{Param, ParamMap, StringArrayParam} import org.apache.spark.ml.util.{DefaultParamsWritable, Identifiable} @@ -111,7 +112,7 @@ class DataConversion(override val uid: String) extends Transformer schema } - /** Copy the class, with extra com.microsoft.ml.spark.core.serialize.params + /** Copy the class, with extra com.microsoft.azure.synapse.ml.core.serialize.params * @param extra * @return */ diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/DataConversion.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/DataConversion.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/DataConversion.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/DataConversion.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/Featurize.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/Featurize.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/Featurize.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/Featurize.scala index 58d9eb61b3..7d5b7bb3c4 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/featurize/Featurize.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/Featurize.scala @@ -1,16 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize +package com.microsoft.azure.synapse.ml.featurize + +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCols, HasOutputCol} +import com.microsoft.azure.synapse.ml.featurize.text.TextFeaturizer import java.sql.{Date, Timestamp} import java.time.temporal.ChronoField -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCols, HasOutputCol} -import com.microsoft.ml.spark.core.schema.DatasetExtensions._ -import com.microsoft.ml.spark.featurize.text.TextFeaturizer -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages.{DropColumns, Lambda, UDFTransformer} +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions._ +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.{DropColumns, Lambda, UDFTransformer} import org.apache.spark.ml.feature.{Imputer, OneHotEncoder, SQLTransformer, VectorAssembler} import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import org.apache.spark.ml.linalg.Vectors @@ -23,7 +24,7 @@ import org.apache.spark.sql.types._ import scala.collection.mutable -private[spark] object FeaturizeUtilities { +private[ml] object FeaturizeUtilities { // 2^18 features by default val NumFeaturesDefault = 262144 // 2^12 features for tree-based or NN-based learners diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/Featurize.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/Featurize.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/Featurize.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/Featurize.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/IndexToValue.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/IndexToValue.scala similarity index 88% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/IndexToValue.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/IndexToValue.scala index 5da9937966..5efeaf2b42 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/featurize/IndexToValue.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/IndexToValue.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize +package com.microsoft.azure.synapse.ml.featurize -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.core.schema.{CategoricalColumnInfo, CategoricalUtilities} +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.schema.{CategoricalColumnInfo, CategoricalUtilities} import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.ml.Transformer import org.apache.spark.ml.param._ @@ -13,8 +13,8 @@ import org.apache.spark.ml.util._ import org.apache.spark.sql.expressions.UserDefinedFunction import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ -import com.microsoft.ml.spark.core.schema.SchemaConstants._ -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.core.schema.SchemaConstants._ +import com.microsoft.azure.synapse.ml.logging.BasicLogging import scala.reflect.ClassTag import reflect.runtime.universe.TypeTag diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/IndexToValue.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/IndexToValue.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/IndexToValue.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/IndexToValue.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/ValueIndexer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/ValueIndexer.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/ValueIndexer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/ValueIndexer.scala index 78b662b31c..b7e07fd956 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/featurize/ValueIndexer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/ValueIndexer.scala @@ -1,13 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize +package com.microsoft.azure.synapse.ml.featurize + +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.schema.CategoricalMap +import com.microsoft.azure.synapse.ml.logging.BasicLogging import java.lang.{Boolean => JBoolean, Double => JDouble, Integer => JInt, Long => JLong} -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.core.schema.CategoricalMap -import com.microsoft.ml.spark.logging.BasicLogging import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml._ import org.apache.spark.ml.attribute.NominalAttribute diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/ValueIndexer.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/ValueIndexer.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/ValueIndexer.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/ValueIndexer.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/ValueIndexerModel.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/ValueIndexerModel.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/ValueIndexerModel.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/ValueIndexerModel.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/text/MultiNGram.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/MultiNGram.scala similarity index 87% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/text/MultiNGram.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/MultiNGram.scala index 283c8b67dc..d2b2b98ac1 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/featurize/text/MultiNGram.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/MultiNGram.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize.text +package com.microsoft.azure.synapse.ml.featurize.text -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.core.schema.DatasetExtensions -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml._ import org.apache.spark.ml.feature._ import org.apache.spark.ml.param._ @@ -14,7 +14,6 @@ import org.apache.spark.ml.util._ import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Dataset, Row} - import spray.json.DefaultJsonProtocol._ object MultiNGram extends DefaultParamsReadable[MultiNGram] diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/text/PageSplitter.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/PageSplitter.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/text/PageSplitter.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/PageSplitter.scala index 814d642fed..7b653c8887 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/featurize/text/PageSplitter.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/PageSplitter.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize.text +package com.microsoft.azure.synapse.ml.featurize.text -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.injections.UDFUtils import org.apache.spark.ml._ import org.apache.spark.ml.param._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/TextFeaturizer.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/TextFeaturizer.scala index 3edd320621..7ecd4a48a6 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/TextFeaturizer.scala @@ -1,13 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize.text +package com.microsoft.azure.synapse.ml.featurize.text + +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.DropColumns import java.util.NoSuchElementException -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages.DropColumns import org.apache.spark.ml.{Pipeline, _} import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.feature._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizer.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/TextFeaturizer.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizer.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/text/TextFeaturizer.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/image/ResizeImageTransformer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/image/ResizeImageTransformer.scala similarity index 90% rename from core/src/main/scala/com/microsoft/ml/spark/image/ResizeImageTransformer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/image/ResizeImageTransformer.scala index 821789c733..2588ee664d 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/image/ResizeImageTransformer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/image/ResizeImageTransformer.scala @@ -1,24 +1,25 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.image +package com.microsoft.azure.synapse.ml.image -import java.awt.image.BufferedImage -import java.awt.{Image => JImage} -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.core.schema.ImageSchemaUtils -import com.microsoft.ml.spark.io.image.ImageUtils -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.schema.ImageSchemaUtils +import com.microsoft.azure.synapse.ml.io.image.ImageUtils +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.Transformer import org.apache.spark.ml.image.ImageSchema import org.apache.spark.ml.param.{IntParam, ParamMap} import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} -import org.apache.spark.sql.functions.{col, udf} +import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.{BinaryType, StructType} import org.apache.spark.sql.{DataFrame, Dataset, Row} +import java.awt.image.BufferedImage +import java.awt.{Image => JImage} + object ResizeUtils { def resizeBufferedImage(width: Int, height: Int, channels: Option[Int])(image: BufferedImage): BufferedImage = { diff --git a/core/src/main/scala/com/microsoft/ml/spark/image/UnrollImage.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/image/UnrollImage.scala similarity index 95% rename from core/src/main/scala/com/microsoft/ml/spark/image/UnrollImage.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/image/UnrollImage.scala index 032ad80d79..06556ea1de 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/image/UnrollImage.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/image/UnrollImage.scala @@ -1,16 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.image +package com.microsoft.azure.synapse.ml.image + +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.schema.ImageSchemaUtils +import com.microsoft.azure.synapse.ml.io.image.ImageUtils +import com.microsoft.azure.synapse.ml.logging.BasicLogging import java.awt.Color import java.awt.color.ColorSpace import java.awt.image.BufferedImage -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.core.schema.ImageSchemaUtils -import com.microsoft.ml.spark.io.image.ImageUtils -import com.microsoft.ml.spark.logging.BasicLogging import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.Transformer import org.apache.spark.ml.linalg.SQLDataTypes.VectorType diff --git a/core/src/main/scala/com/microsoft/ml/spark/image/UnrollImage.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/image/UnrollImage.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/image/UnrollImage.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/image/UnrollImage.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/IOImplicits.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/IOImplicits.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/io/IOImplicits.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/IOImplicits.scala index cb512aca81..4f6ab18a75 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/IOImplicits.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/IOImplicits.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io +package com.microsoft.azure.synapse.ml.io -import com.microsoft.ml.spark.io.http.{HTTPRequestData, HTTPSchema} -import org.apache.spark.binary.BinaryFileFormat +import com.microsoft.azure.synapse.ml.io.binary.BinaryFileFormat +import com.microsoft.azure.synapse.ml.io.http.{HTTPRequestData, HTTPSchema} import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.image.ImageSchema import org.apache.spark.ml.source.image.PatchedImageFileFormat diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/binary/Binary.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/Binary.scala similarity index 91% rename from core/src/main/scala/com/microsoft/ml/spark/io/binary/Binary.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/Binary.scala index 54b3f0c712..1c37923906 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/binary/Binary.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/Binary.scala @@ -1,14 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark +package com.microsoft.azure.synapse.ml.io.binary import org.apache.spark.sql.{DataFrame, SparkSession} import scala.language.implicitConversions /** Implicit conversion allows sparkSession.readImages(...) syntax * Example: - * import com.microsoft.ml.spark.Readers.implicits._ + * import com.microsoft.azure.synapse.ml.Readers.implicits._ * sparkSession.readImages(path, recursive = false) */ object Binary { diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/binary/BinaryFileFormat.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/BinaryFileFormat.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/io/binary/BinaryFileFormat.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/BinaryFileFormat.scala index 9b5684ad2b..9b57c35f34 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/binary/BinaryFileFormat.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/BinaryFileFormat.scala @@ -1,13 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package org.apache.spark.binary +package com.microsoft.azure.synapse.ml.io.binary + +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities.ZipIterator +import com.microsoft.azure.synapse.ml.core.schema.BinaryFileSchema import java.io.{Closeable, InputStream} import java.net.URI - -import com.microsoft.ml.spark.core.env.StreamUtilities.ZipIterator -import com.microsoft.ml.spark.core.schema.BinaryFileSchema +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities.ZipIterator import org.apache.commons.io.{FilenameUtils, IOUtils} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, Path} @@ -31,7 +32,7 @@ import scala.util.Random * @param subsample what ratio to subsample * @param inspectZip whether to inspect zip files */ -private[spark] class BinaryRecordReader(val subsample: Double, val inspectZip: Boolean, val seed: Long) +private[ml] class BinaryRecordReader(val subsample: Double, val inspectZip: Boolean, val seed: Long) extends RecordReader[String, Array[Byte]] { private var done: Boolean = false @@ -186,7 +187,7 @@ class BinaryFileFormat extends TextBasedFileFormat with DataSourceRegister { } /** Thin wrapper class analogous to others in the spark ecosystem */ -private[spark] class HadoopFileReader(file: PartitionedFile, +private[ml] class HadoopFileReader(file: PartitionedFile, conf: Configuration, subsample: Double, inspectZip: Boolean, diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/binary/BinaryFileReader.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/BinaryFileReader.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/io/binary/BinaryFileReader.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/BinaryFileReader.scala index 6554b0067a..b8cea182e9 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/binary/BinaryFileReader.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/BinaryFileReader.scala @@ -1,17 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark +package com.microsoft.azure.synapse.ml.io.binary -import com.microsoft.ml.spark.core.env.StreamUtilities -import com.microsoft.ml.spark.core.schema.BinaryFileSchema -import com.microsoft.ml.spark.core.utils.AsyncUtils +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities +import com.microsoft.azure.synapse.ml.core.schema.BinaryFileSchema +import com.microsoft.azure.synapse.ml.core.utils.AsyncUtils import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} -import org.apache.spark.binary.BinaryFileFormat import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.{DataFrame, Row, SparkSession} -import org.apache.spark.binary.ConfUtils import org.apache.spark.sql.types.BinaryType import scala.concurrent.{ExecutionContext, Future} diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/binary/KeyValueReaderIterator.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/KeyValueReaderIterator.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/io/binary/KeyValueReaderIterator.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/KeyValueReaderIterator.scala index 44f925ef47..b272fc18f6 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/binary/KeyValueReaderIterator.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/binary/KeyValueReaderIterator.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package org.apache.spark.binary +package com.microsoft.azure.synapse.ml.io.binary import java.io.Closeable import org.apache.hadoop.mapreduce.RecordReader diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/http/Clients.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/Clients.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/io/http/Clients.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/Clients.scala index 701d76670e..c192a0fa81 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/http/Clients.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/Clients.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.http +package com.microsoft.azure.synapse.ml.io.http -import com.microsoft.ml.spark.core.utils.AsyncUtils +import com.microsoft.azure.synapse.ml.core.utils.AsyncUtils import org.apache.log4j.{LogManager, Logger} import scala.concurrent.duration.Duration diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/http/HTTPClients.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPClients.scala similarity index 99% rename from core/src/main/scala/com/microsoft/ml/spark/io/http/HTTPClients.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPClients.scala index 53f9de4b9e..e730b8d916 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/http/HTTPClients.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPClients.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.http +package com.microsoft.azure.synapse.ml.io.http import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/http/HTTPSchema.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPSchema.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/io/http/HTTPSchema.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPSchema.scala index 23051f6778..1c4cd1e43f 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/http/HTTPSchema.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPSchema.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.http +package com.microsoft.azure.synapse.ml.io.http -import java.net.{SocketException, URI} +import com.microsoft.azure.synapse.ml.core.schema.SparkBindings -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.StreamUtilities.using -import com.microsoft.ml.spark.core.schema.SparkBindings +import java.net.{SocketException, URI} +import com.microsoft.azure.synapse.ml.build.BuildInfo +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities.using import com.sun.net.httpserver.HttpExchange import org.apache.commons.io.IOUtils import org.apache.http._ @@ -199,7 +199,7 @@ case class HTTPRequestData(requestLine: RequestLineData, request.setProtocolVersion(pv.toHTTPCore)) request.setHeaders(headers.map(_.toHTTPCore) ++ Array(new BasicHeader( - "User-Agent", s"mmlspark/${BuildInfo.version}${HeaderValues.PlatformInfo}"))) + "User-Agent", s"synapseml/${BuildInfo.version}${HeaderValues.PlatformInfo}"))) request } diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/http/HTTPTransformer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPTransformer.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/io/http/HTTPTransformer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPTransformer.scala index 8b6a656361..c905468fab 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/http/HTTPTransformer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/HTTPTransformer.scala @@ -1,18 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.http +package com.microsoft.azure.synapse.ml.io.http -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.io.http.HandlingUtils.HandlerFunc -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.io.http.HandlingUtils.HandlerFunc +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.injections.UDFUtils -import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} import org.apache.spark.ml.param._ import org.apache.spark.ml.util.Identifiable +import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.functions.udf import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Dataset, Row} diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/http/Parsers.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/Parsers.scala similarity index 95% rename from core/src/main/scala/com/microsoft/ml/spark/io/http/Parsers.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/Parsers.scala index c568a9ae70..5d68daf6b0 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/http/Parsers.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/Parsers.scala @@ -1,15 +1,16 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.http - -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.core.schema.DatasetExtensions.{findUnusedColumnName => newCol} -import com.microsoft.ml.spark.core.serialize.ComplexParam -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages.UDFTransformer +package com.microsoft.azure.synapse.ml.io.http + +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions.{findUnusedColumnName => newCol} +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.UDFTransformer import org.apache.http.client.methods.HttpRequestBase +import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.param._ import org.apache.spark.ml.util.Identifiable import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} @@ -19,7 +20,6 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{ArrayType, DataType, StringType, StructType} import org.apache.spark.sql.{DataFrame, Dataset, Row} import spray.json.DefaultJsonProtocol._ -import org.apache.spark.injections.UDFUtils import scala.reflect.runtime.universe.TypeTag diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/http/PortForwarding.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/PortForwarding.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/io/http/PortForwarding.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/PortForwarding.scala index 5ff6b246ad..cbb7bdc9fd 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/http/PortForwarding.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/PortForwarding.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.http +package com.microsoft.azure.synapse.ml.io.http import java.io.File import java.net.URI diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/http/SharedVariable.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/SharedVariable.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/io/http/SharedVariable.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/SharedVariable.scala index 7ee29a1176..e5c1c15469 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/http/SharedVariable.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/SharedVariable.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.http +package com.microsoft.azure.synapse.ml.io.http import java.util.UUID diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/http/SimpleHTTPTransformer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/SimpleHTTPTransformer.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/io/http/SimpleHTTPTransformer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/SimpleHTTPTransformer.scala index 6fcbf726b4..89a64bf2bc 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/http/SimpleHTTPTransformer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/http/SimpleHTTPTransformer.scala @@ -1,19 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.http +package com.microsoft.azure.synapse.ml.io.http -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.core.schema.DatasetExtensions.{findUnusedColumnName => newCol} -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages.{DropColumns, FlattenBatch, HasMiniBatcher, Lambda} +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions.{findUnusedColumnName => newCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.{DropColumns, FlattenBatch, HasMiniBatcher, Lambda} import org.apache.commons.io.IOUtils import org.apache.spark.injections.UDFUtils import org.apache.spark.ml._ import org.apache.spark.ml.param._ import org.apache.spark.ml.util.Identifiable import org.apache.spark.sql.expressions.UserDefinedFunction -import org.apache.spark.sql.functions.{col, udf} +import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.{StringType, StructType} import org.apache.spark.sql.{DataFrame, Dataset, Row} diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/image/ImageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/image/ImageUtils.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/io/image/ImageUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/image/ImageUtils.scala index 5bdb09f356..4b687625e1 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/image/ImageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/image/ImageUtils.scala @@ -1,26 +1,24 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.image +package com.microsoft.azure.synapse.ml.io.image -import java.awt.color.ColorSpace -import java.awt.image.{BufferedImage, DataBufferByte, Raster} -import java.awt.{Color, Point} -import java.io.{ByteArrayInputStream, ByteArrayOutputStream} - -import com.microsoft.ml.spark.core.env.StreamUtilities -import javax.imageio.ImageIO +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities +import com.microsoft.azure.synapse.ml.io.binary.ConfUtils import org.apache.commons.codec.binary.Base64 import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.Path -import org.apache.spark.binary.ConfUtils import org.apache.spark.ml.ImageInjections import org.apache.spark.ml.image.ImageSchema import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Row} +import java.awt.color.ColorSpace +import java.awt.image.{BufferedImage, DataBufferByte, Raster} +import java.awt.{Color, Point} +import java.io.ByteArrayInputStream +import javax.imageio.ImageIO import scala.util.Try object ImageUtils { diff --git a/core/src/main/scala/com/microsoft/ml/spark/io/powerbi/PowerBIWriter.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/powerbi/PowerBIWriter.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/io/powerbi/PowerBIWriter.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/io/powerbi/PowerBIWriter.scala index 201f4a3739..10a97fba20 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/io/powerbi/PowerBIWriter.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/io/powerbi/PowerBIWriter.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.powerbi +package com.microsoft.azure.synapse.ml.io.powerbi -import com.microsoft.ml.spark.io.http._ -import com.microsoft.ml.spark.stages._ +import com.microsoft.azure.synapse.ml.io.http.{CustomOutputParser, HTTPResponseData, SimpleHTTPTransformer} +import com.microsoft.azure.synapse.ml.stages.{ + DynamicMiniBatchTransformer, FixedMiniBatchTransformer, PartitionConsolidator, TimeIntervalMiniBatchTransformer} import org.apache.http.client.HttpResponseException import org.apache.log4j.{LogManager, Logger} -import org.apache.spark.ml.NamespaceInjections import org.apache.spark.sql.functions.{col, struct} import org.apache.spark.sql.streaming.DataStreamWriter import org.apache.spark.sql.{DataFrame, ForeachWriter, Row} diff --git a/core/src/main/scala/com/microsoft/ml/spark/isolationforest/IsolationForest.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/isolationforest/IsolationForest.scala similarity index 86% rename from core/src/main/scala/com/microsoft/ml/spark/isolationforest/IsolationForest.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/isolationforest/IsolationForest.scala index f3d5665737..e14c4ca538 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/isolationforest/IsolationForest.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/isolationforest/IsolationForest.scala @@ -1,15 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.isolationforest +package com.microsoft.azure.synapse.ml.isolationforest import org.apache.spark.ml.param.{ParamMap, TransformerParam} import org.apache.spark.ml.util._ import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Estimator, Model} -import com.linkedin.relevance.isolationforest.{IsolationForestParams, IsolationForest => IsolationForestSource, - IsolationForestModel => IsolationForestModelSource} -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.linkedin.relevance.isolationforest.{ + IsolationForestParams, IsolationForest => IsolationForestSource, IsolationForestModel => IsolationForestModelSource} +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.types.StructType diff --git a/core/src/main/scala/com/microsoft/ml/spark/lime/BreezeUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/lime/BreezeUtils.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/lime/BreezeUtils.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/lime/BreezeUtils.scala index 4c0c735c35..062ecb2831 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/lime/BreezeUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/lime/BreezeUtils.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lime +package com.microsoft.azure.synapse.ml.lime import breeze.generic.UFunc import breeze.linalg.{DenseMatrix, DenseVector} diff --git a/core/src/main/scala/com/microsoft/ml/spark/lime/LIME.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/lime/LIME.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/lime/LIME.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/lime/LIME.scala index 03557d15e1..b77187b080 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/lime/LIME.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/lime/LIME.scala @@ -1,24 +1,24 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lime +package com.microsoft.azure.synapse.ml.lime import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} import breeze.stats.distributions.Rand -import com.microsoft.ml.spark.FluentAPI._ -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.core.schema.{DatasetExtensions, ImageSchemaUtils} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.schema.{DatasetExtensions, ImageSchemaUtils} +import com.microsoft.azure.synapse.ml.core.spark.FluentAPI._ +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.injections.UDFUtils import org.apache.spark.internal.{Logging => SLogging} +import org.apache.spark.ml._ import org.apache.spark.ml.feature.StandardScaler import org.apache.spark.ml.linalg.SQLDataTypes.{MatrixType, VectorType} import org.apache.spark.ml.linalg.{DenseMatrix, DenseVector} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared.HasPredictionCol import org.apache.spark.ml.util.Identifiable -import org.apache.spark.ml._ import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.expressions.UserDefinedFunction import org.apache.spark.sql.functions._ @@ -164,10 +164,10 @@ trait LIMEBase extends LIMEParams with ComplexParamsWritable { } -@deprecated("Please use 'com.microsoft.ml.spark.explainers.VectorLIME'.", since="1.0.0-rc3") +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.VectorLIME'.", since="1.0.0-rc3") object TabularLIME extends ComplexParamsReadable[TabularLIME] -@deprecated("Please use 'com.microsoft.ml.spark.explainers.VectorLIME'.", since="1.0.0-rc3") +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.VectorLIME'.", since="1.0.0-rc3") class TabularLIME(val uid: String) extends Estimator[TabularLIMEModel] with LIMEParams with Wrappable with ComplexParamsWritable with BasicLogging { logClass() @@ -200,10 +200,10 @@ class TabularLIME(val uid: String) extends Estimator[TabularLIMEModel] } } -@deprecated("Please use 'com.microsoft.ml.spark.explainers.VectorLIME'.", since="1.0.0-rc3") +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.VectorLIME'.", since="1.0.0-rc3") object TabularLIMEModel extends ComplexParamsReadable[TabularLIMEModel] -@deprecated("Please use 'com.microsoft.ml.spark.explainers.VectorLIME'.", since="1.0.0-rc3") +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.VectorLIME'.", since="1.0.0-rc3") class TabularLIMEModel(val uid: String) extends Model[TabularLIMEModel] with LIMEBase with Wrappable with BasicLogging { logClass() @@ -256,7 +256,7 @@ class TabularLIMEModel(val uid: String) extends Model[TabularLIMEModel] } -@deprecated("Please use 'com.microsoft.ml.spark.explainers.ImageLIME'.", since="1.0.0-rc3") +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.ImageLIME'.", since="1.0.0-rc3") object ImageLIME extends ComplexParamsReadable[ImageLIME] /** Distributed implementation of @@ -264,7 +264,7 @@ object ImageLIME extends ComplexParamsReadable[ImageLIME] * * https://arxiv.org/pdf/1602.04938v1.pdf */ -@deprecated("Please use 'com.microsoft.ml.spark.explainers.ImageLIME'.", since="1.0.0-rc3") +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.ImageLIME'.", since="1.0.0-rc3") class ImageLIME(val uid: String) extends Transformer with LIMEBase with Wrappable with HasModifier with HasCellSize with BasicLogging { logClass() diff --git a/core/src/main/scala/com/microsoft/ml/spark/lime/Superpixel.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/lime/Superpixel.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/lime/Superpixel.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/lime/Superpixel.scala index cddb9b3b6d..ed5dbc3390 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/lime/Superpixel.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/lime/Superpixel.scala @@ -1,15 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lime +package com.microsoft.azure.synapse.ml.lime + +import com.microsoft.azure.synapse.ml.core.schema.ImageSchemaUtils +import com.microsoft.azure.synapse.ml.io.image.ImageUtils import java.awt.FlowLayout import java.awt.image.BufferedImage import java.io.File import java.util - -import com.microsoft.ml.spark.core.schema.ImageSchemaUtils -import com.microsoft.ml.spark.io.image.ImageUtils import javax.imageio.ImageIO import javax.swing.{ImageIcon, JFrame, JLabel} import org.apache.spark.injections.UDFUtils diff --git a/core/src/main/scala/com/microsoft/ml/spark/lime/SuperpixelTransformer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/lime/SuperpixelTransformer.scala similarity index 87% rename from core/src/main/scala/com/microsoft/ml/spark/lime/SuperpixelTransformer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/lime/SuperpixelTransformer.scala index 49a5fa68c1..8120c6f9e9 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/lime/SuperpixelTransformer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/lime/SuperpixelTransformer.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lime +package com.microsoft.azure.synapse.ml.lime -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.core.schema.ImageSchemaUtils -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.schema.ImageSchemaUtils +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.{DoubleParam, ParamMap, Params} import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} diff --git a/core/src/main/scala/com/microsoft/ml/spark/lime/TextLIME.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/lime/TextLIME.scala similarity index 89% rename from core/src/main/scala/com/microsoft/ml/spark/lime/TextLIME.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/lime/TextLIME.scala index 21b010632d..973a27c5da 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/lime/TextLIME.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/lime/TextLIME.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lime +package com.microsoft.azure.synapse.ml.lime -import com.microsoft.ml.spark.FluentAPI._ -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.schema.DatasetExtensions -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.core.spark.FluentAPI._ import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.feature.Tokenizer import org.apache.spark.ml.linalg.DenseVector @@ -25,7 +25,7 @@ object TextLIME extends ComplexParamsReadable[TextLIME] * * https://arxiv.org/pdf/1602.04938v1.pdf */ -@deprecated("Please use 'com.microsoft.ml.spark.explainers.TextLIME'.", since="1.0.0-rc3") +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.TextLIME'.", since="1.0.0-rc3") class TextLIME(val uid: String) extends Model[TextLIME] with LIMEBase with Wrappable with BasicLogging { logClass() diff --git a/core/src/main/scala/com/microsoft/ml/spark/logging/BasicLogging.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/BasicLogging.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/logging/BasicLogging.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/logging/BasicLogging.scala index 8dff78f746..3626165d82 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/logging/BasicLogging.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/logging/BasicLogging.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.logging +package com.microsoft.azure.synapse.ml.logging import org.apache.spark.internal.Logging import spray.json.{DefaultJsonProtocol, RootJsonFormat} -import com.microsoft.ml.spark.build.BuildInfo +import com.microsoft.azure.synapse.ml.build.BuildInfo case class BasicLogInfo( uid: String, diff --git a/core/src/main/scala/com/microsoft/ml/spark/nn/BallTree.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/BallTree.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/nn/BallTree.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/nn/BallTree.scala index aa94f6a362..540913efdc 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/nn/BallTree.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/BallTree.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nn +package com.microsoft.azure.synapse.ml.nn import java.io._ import breeze.linalg.functions.euclideanDistance import breeze.linalg.{DenseVector, norm, _} -import com.microsoft.ml.spark.core.env.StreamUtilities.using +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities.using import scala.collection.JavaConverters._ private case class Query(point: DenseVector[Double], diff --git a/core/src/main/scala/com/microsoft/ml/spark/nn/BoundedPriorityQueue.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/BoundedPriorityQueue.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/nn/BoundedPriorityQueue.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/nn/BoundedPriorityQueue.scala index 8833ba264c..2de384fdbf 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/nn/BoundedPriorityQueue.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/BoundedPriorityQueue.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nn +package com.microsoft.azure.synapse.ml.nn /* This file is taken from the Apache Spark project and is licensed under Apache License version 2.0. diff --git a/core/src/main/scala/com/microsoft/ml/spark/nn/ConditionalKNN.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/ConditionalKNN.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/nn/ConditionalKNN.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/nn/ConditionalKNN.scala index 7b25be7bd6..e55f8ece1a 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/nn/ConditionalKNN.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/ConditionalKNN.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nn +package com.microsoft.azure.synapse.ml.nn import breeze.linalg.{DenseVector => BDV} -import com.microsoft.ml.spark.core.contracts.HasLabelCol -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.core.contracts.HasLabelCol +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.broadcast.Broadcast import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.linalg.Vector diff --git a/core/src/main/scala/com/microsoft/ml/spark/nn/KNN.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/KNN.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/nn/KNN.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/nn/KNN.scala index 2acde7942b..217e954a6f 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/nn/KNN.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/KNN.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nn +package com.microsoft.azure.synapse.ml.nn import breeze.linalg.{DenseVector => BDV} -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasFeaturesCol, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasFeaturesCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.broadcast.Broadcast import org.apache.spark.injections.UDFUtils import org.apache.spark.ml._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/nn/Schemas.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/Schemas.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/nn/Schemas.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/nn/Schemas.scala index 59b2709a25..b3ce71df48 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/nn/Schemas.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/nn/Schemas.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nn +package com.microsoft.azure.synapse.ml.nn import java.io.Serializable diff --git a/core/src/main/scala/com/microsoft/ml/spark/recommendation/RankingAdapter.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapter.scala similarity index 95% rename from core/src/main/scala/com/microsoft/ml/spark/recommendation/RankingAdapter.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapter.scala index 0325bdd340..697c40db79 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/recommendation/RankingAdapter.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapter.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.HasLabelCol -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.HasLabelCol +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml._ import org.apache.spark.ml.param._ import org.apache.spark.ml.recommendation._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/recommendation/RankingEvaluator.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingEvaluator.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/recommendation/RankingEvaluator.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingEvaluator.scala index 744b2e056b..b237736979 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/recommendation/RankingEvaluator.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingEvaluator.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.internal.Logging import org.apache.spark.ml.evaluation.Evaluator import org.apache.spark.ml.param._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/recommendation/RankingTrainValidationSplit.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingTrainValidationSplit.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/recommendation/RankingTrainValidationSplit.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingTrainValidationSplit.scala index 1081dd76ac..f7c8101fca 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/recommendation/RankingTrainValidationSplit.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RankingTrainValidationSplit.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.evaluation.Evaluator import org.apache.spark.ml.param._ import org.apache.spark.ml.recommendation._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/recommendation/RecommendationIndexer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RecommendationIndexer.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/recommendation/RecommendationIndexer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RecommendationIndexer.scala index 68fb799c6b..98ef2fe96e 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/recommendation/RecommendationIndexer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/RecommendationIndexer.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.attribute.NominalAttribute import org.apache.spark.ml.feature.{StringIndexer, StringIndexerModel} import org.apache.spark.ml.param.{Param, ParamMap, Params, TransformerParam} diff --git a/core/src/main/scala/com/microsoft/ml/spark/recommendation/SAR.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/SAR.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/recommendation/SAR.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/SAR.scala index b0fe6c2a50..3f62399e96 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/recommendation/SAR.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/SAR.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation import java.text.SimpleDateFormat import java.util.{Calendar, Date} import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM} -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Estimator import org.apache.spark.ml.param._ import org.apache.spark.ml.recommendation.{RecommendationParams, Constants => C} @@ -83,7 +83,7 @@ class SAR(override val uid: String) extends Estimator[SARModel] * @param dataset * @return */ - private[spark] def calculateUserItemAffinities(dataset: Dataset[_]): DataFrame = { + private[ml] def calculateUserItemAffinities(dataset: Dataset[_]): DataFrame = { val referenceTime: Date = new SimpleDateFormat(getStartTimeFormat) .parse(get(startTime).getOrElse(Calendar.getInstance().getTime.toString)) @@ -149,7 +149,7 @@ class SAR(override val uid: String) extends Estimator[SARModel] * @param dataset * @return */ - private[spark] def calculateItemItemSimilarity(dataset: Dataset[_]): DataFrame = { + private[ml] def calculateItemItemSimilarity(dataset: Dataset[_]): DataFrame = { val itemCounts = dataset//.cache .groupBy(col(getItemCol)).agg(countDistinct(col(getUserCol))) diff --git a/core/src/main/scala/com/microsoft/ml/spark/recommendation/SARModel.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/SARModel.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/recommendation/SARModel.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/SARModel.scala index 5f238de2d4..7228bb2817 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/recommendation/SARModel.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/SARModel.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Model} import org.apache.spark.ml.param.{DataFrameParam, ParamMap} import org.apache.spark.ml.recommendation.{BaseRecommendationModel, Constants} diff --git a/core/src/main/scala/com/microsoft/ml/spark/recommendation/recommendation.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/recommendation.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/recommendation/recommendation.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/recommendation/recommendation.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/Batchers.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Batchers.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/stages/Batchers.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Batchers.scala index ad4cd90fdf..e51bb06d1e 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/Batchers.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Batchers.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages import java.util.concurrent.{BlockingQueue, CountDownLatch, LinkedBlockingQueue} diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/Cacher.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Cacher.scala similarity index 88% rename from core/src/main/scala/com/microsoft/ml/spark/stages/Cacher.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Cacher.scala index 9578521dbf..87d8593a58 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/Cacher.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Cacher.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.{BooleanParam, ParamMap} import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/ClassBalancer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/ClassBalancer.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/stages/ClassBalancer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/ClassBalancer.scala index 6c5147e981..47e0c64bef 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/ClassBalancer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/ClassBalancer.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.param.{BooleanParam, DataFrameParam, ParamMap} import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Estimator, Model} diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/DropColumns.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/DropColumns.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/stages/DropColumns.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/DropColumns.scala index 5dd5310183..6a66ed96e0 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/DropColumns.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/DropColumns.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/EnsembleByKey.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/EnsembleByKey.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/stages/EnsembleByKey.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/EnsembleByKey.scala index fffb989356..6290d3644d 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/EnsembleByKey.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/EnsembleByKey.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.linalg.SQLDataTypes._ import org.apache.spark.ml.param._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/EnsembleByKey.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/EnsembleByKey.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/stages/EnsembleByKey.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/EnsembleByKey.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/Explode.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Explode.scala similarity index 83% rename from core/src/main/scala/com/microsoft/ml/spark/stages/Explode.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Explode.scala index 9bc445d232..5a3ef20417 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/Explode.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Explode.scala @@ -1,17 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging -import org.apache.spark.sql.{DataFrame, Dataset} +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ +import org.apache.spark.sql.{DataFrame, Dataset} object Explode extends DefaultParamsReadable[Explode] diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/Lambda.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Lambda.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/stages/Lambda.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Lambda.scala index bba952a481..6c54dff2b8 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/Lambda.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Lambda.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.SparkContext import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/MiniBatchTransformer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/MiniBatchTransformer.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/stages/MiniBatchTransformer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/MiniBatchTransformer.scala index 2089827aeb..b4eec0b6ba 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/MiniBatchTransformer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/MiniBatchTransformer.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.param._ import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/MultiColumnAdapter.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/MultiColumnAdapter.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/stages/MultiColumnAdapter.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/MultiColumnAdapter.scala index 273b755154..53627244a4 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/MultiColumnAdapter.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/MultiColumnAdapter.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.sql.Dataset import org.apache.spark.ml._ import org.apache.spark.ml.param.{ParamMap, PipelineStageParam, StringArrayParam} @@ -84,7 +84,7 @@ class MultiColumnAdapter(override val uid: String) extends Estimator[PipelineMod setParamInternal(value, "outputCols", Array(this.uid + "_out")) } else { throw new IllegalArgumentException( - "Need to pass a pipeline stage with inputCol and outputCol com.microsoft.ml.spark.core.serialize.params") + "Need to pass a pipeline stage with inputCol and outputCol params") } set(baseStage, value) } diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/MultiColumnAdapter.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/MultiColumnAdapter.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/stages/MultiColumnAdapter.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/MultiColumnAdapter.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/PartitionConsolidator.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/PartitionConsolidator.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/stages/PartitionConsolidator.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/PartitionConsolidator.scala index 6f6ce174ff..fa67c26a33 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/PartitionConsolidator.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/PartitionConsolidator.scala @@ -1,13 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import java.util.concurrent.LinkedBlockingQueue - -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.io.http.{HTTPParams, SharedSingleton} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.io.http.{HTTPParams, SharedSingleton} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.param._ import org.apache.spark.ml.util.{DefaultParamsReadable, Identifiable} import org.apache.spark.ml.{ComplexParamsWritable, Transformer} @@ -15,6 +13,7 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Dataset, Row} +import java.util.concurrent.LinkedBlockingQueue import scala.concurrent.blocking object PartitionConsolidator extends DefaultParamsReadable[PartitionConsolidator] diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/RenameColumn.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/RenameColumn.scala similarity index 87% rename from core/src/main/scala/com/microsoft/ml/spark/stages/RenameColumn.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/RenameColumn.scala index aae42a5c9c..0da49ac0e6 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/RenameColumn.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/RenameColumn.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/Repartition.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Repartition.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/stages/Repartition.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Repartition.scala index e60dcb17aa..f0c7a49a8b 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/Repartition.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Repartition.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.sql.{DataFrame, Dataset, Row} import org.apache.spark.ml.Transformer import org.apache.spark.ml.param._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/Repartition.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Repartition.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/stages/Repartition.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Repartition.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/SelectColumns.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/SelectColumns.scala similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/stages/SelectColumns.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/SelectColumns.scala index 3cf73448d1..cd02f6c03f 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/SelectColumns.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/SelectColumns.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.ml.Transformer import org.apache.spark.ml.param._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/SelectColumns.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/SelectColumns.txt similarity index 93% rename from core/src/main/scala/com/microsoft/ml/spark/stages/SelectColumns.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/SelectColumns.txt index b8078890e4..f0e880c7f0 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/SelectColumns.txt +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/SelectColumns.txt @@ -5,7 +5,7 @@ not in the selection list are dropped. :Example: >>> import pandas as pd ->>> from mmlspark import SelectColumns +>>> from synapse.ml import SelectColumns >>> from pyspark.sql import SQLContext >>> spark = pyspark.sql.SparkSession.builder.appName("Test SelectCol").getOrCreate() >>> tmp1 = {"col1": [1, 2, 3, 4, 5], diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/StratifiedRepartition.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/StratifiedRepartition.scala similarity index 94% rename from core/src/main/scala/com/microsoft/ml/spark/stages/StratifiedRepartition.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/StratifiedRepartition.scala index d159e3968d..05199578bb 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/StratifiedRepartition.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/StratifiedRepartition.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.HasLabelCol -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.HasLabelCol +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.RangePartitioner import org.apache.spark.ml.Transformer import org.apache.spark.ml.param._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/SummarizeData.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/SummarizeData.scala similarity index 98% rename from core/src/main/scala/com/microsoft/ml/spark/stages/SummarizeData.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/SummarizeData.scala index 2e335eab70..755634c58b 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/SummarizeData.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/SummarizeData.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.{BooleanParam, DoubleParam, ParamMap} import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/SummarizeData.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/SummarizeData.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/stages/SummarizeData.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/SummarizeData.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/TextPreprocessor.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessor.scala similarity index 95% rename from core/src/main/scala/com/microsoft/ml/spark/stages/TextPreprocessor.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessor.scala index 5d7c01144b..909219ecf6 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/TextPreprocessor.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessor.scala @@ -1,17 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging -import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.param.{MapParam, Param, ParamMap} import org.apache.spark.ml.util.Identifiable +import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} +import org.apache.spark.sql.functions.udf import org.apache.spark.sql.types.{StringType, StructField, StructType} import org.apache.spark.sql.{DataFrame, Dataset} -import org.apache.spark.sql.functions.udf import spray.json.DefaultJsonProtocol._ class Trie(map: Map[Char, Trie] = Map.empty, diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/Timer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Timer.scala similarity index 96% rename from core/src/main/scala/com/microsoft/ml/spark/stages/Timer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Timer.scala index b0f69557d1..35aa5c224c 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/Timer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/Timer.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml._ import org.apache.spark.ml.param.{BooleanParam, ParamMap, PipelineStageParam, TransformerParam} import org.apache.spark.ml.util._ diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/UDFTransformer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/UDFTransformer.scala similarity index 92% rename from core/src/main/scala/com/microsoft/ml/spark/stages/UDFTransformer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/UDFTransformer.scala index f6a4a4ba8f..ebee840e3d 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/UDFTransformer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/UDFTransformer.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasInputCols, HasOutputCol} -import com.microsoft.ml.spark.core.serialize.ComplexParam -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasInputCols, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} import org.apache.spark.ml.param.{ParamMap, UDFParam, UDPyFParam} diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/UnicodeNormalize.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/UnicodeNormalize.scala similarity index 91% rename from core/src/main/scala/com/microsoft/ml/spark/stages/UnicodeNormalize.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/UnicodeNormalize.scala index 889d1d8522..24cd425961 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/UnicodeNormalize.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/UnicodeNormalize.scala @@ -1,19 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.param.{BooleanParam, Param, ParamMap} import org.apache.spark.ml.util.Identifiable -import org.apache.spark.sql.{DataFrame, Dataset} +import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} import org.apache.spark.sql.functions.udf +import org.apache.spark.sql.types.{StringType, StructField, StructType} +import org.apache.spark.sql.{DataFrame, Dataset} import java.text.Normalizer -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging -import org.apache.spark.sql.types.{StringType, StructField, StructType} object UnicodeNormalize extends ComplexParamsReadable[UnicodeNormalize] diff --git a/core/src/main/scala/com/microsoft/ml/spark/stages/udfs.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/udfs.scala similarity index 95% rename from core/src/main/scala/com/microsoft/ml/spark/stages/udfs.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/stages/udfs.scala index afd8b4342e..24328f1511 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/stages/udfs.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/stages/udfs.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.linalg.SQLDataTypes.VectorType diff --git a/core/src/main/scala/com/microsoft/ml/spark/train/AutoTrainedModel.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/AutoTrainedModel.scala similarity index 90% rename from core/src/main/scala/com/microsoft/ml/spark/train/AutoTrainedModel.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/train/AutoTrainedModel.scala index 3d2c018293..5807e5d547 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/train/AutoTrainedModel.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/AutoTrainedModel.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.train +package com.microsoft.azure.synapse.ml.train -import com.microsoft.ml.spark.core.contracts.{HasFeaturesCol, HasLabelCol} -import org.apache.spark.ml.{ComplexParamsWritable, Model, PipelineModel, Transformer} +import com.microsoft.azure.synapse.ml.core.contracts.{HasFeaturesCol, HasLabelCol} import org.apache.spark.ml.param.{ParamMap, TransformerParam} +import org.apache.spark.ml.{ComplexParamsWritable, Model, PipelineModel, Transformer} /** Defines common inheritance and functions across auto trained models. */ diff --git a/core/src/main/scala/com/microsoft/ml/spark/train/AutoTrainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/AutoTrainer.scala similarity index 87% rename from core/src/main/scala/com/microsoft/ml/spark/train/AutoTrainer.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/train/AutoTrainer.scala index 81e15ba43c..a79d5799b2 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/train/AutoTrainer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/AutoTrainer.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.train +package com.microsoft.azure.synapse.ml.train -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasFeaturesCol, HasLabelCol} +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasFeaturesCol, HasLabelCol} import org.apache.spark.ml.{ComplexParamsWritable, Estimator, Model} import org.apache.spark.ml.param.{EstimatorParam, IntParam} diff --git a/core/src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/ComputeModelStatistics.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/train/ComputeModelStatistics.scala index 26275c7c64..87c3b09d2f 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/ComputeModelStatistics.scala @@ -1,13 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.train - -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts._ -import com.microsoft.ml.spark.core.metrics.{MetricConstants, MetricUtils} -import com.microsoft.ml.spark.core.schema.{CategoricalUtilities, SchemaConstants, SparkSchema} -import com.microsoft.ml.spark.logging.BasicLogging +package com.microsoft.azure.synapse.ml.train + +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{ + HasEvaluationMetric, HasLabelCol, HasScoredLabelsCol, HasScoresCol, MetricData} +import com.microsoft.azure.synapse.ml.core.metrics.{MetricConstants, MetricUtils} +import com.microsoft.azure.synapse.ml.core.schema.{CategoricalUtilities, SchemaConstants, SparkSchema} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.linalg.{SQLDataTypes, Vector} import org.apache.spark.mllib.evaluation.{BinaryClassificationMetrics, MulticlassMetrics, RegressionMetrics} diff --git a/core/src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/ComputeModelStatistics.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/train/ComputeModelStatistics.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/ComputePerInstanceStatistics.scala similarity index 91% rename from core/src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/train/ComputePerInstanceStatistics.scala index decc8351cb..dbd840dc9b 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/ComputePerInstanceStatistics.scala @@ -1,19 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.train +package com.microsoft.azure.synapse.ml.train -import com.microsoft.ml.spark.core.contracts._ -import com.microsoft.ml.spark.core.metrics.{MetricConstants, MetricUtils} -import com.microsoft.ml.spark.core.schema.{CategoricalUtilities, SchemaConstants, SparkSchema} +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts._ +import com.microsoft.azure.synapse.ml.core.metrics.{MetricConstants, MetricUtils} +import com.microsoft.azure.synapse.ml.core.schema.{CategoricalUtilities, SchemaConstants, SparkSchema} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging object ComputePerInstanceStatistics extends DefaultParamsReadable[ComputePerInstanceStatistics] { diff --git a/core/src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/ComputePerInstanceStatistics.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/train/ComputePerInstanceStatistics.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/TrainClassifier.scala similarity index 97% rename from core/src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/train/TrainClassifier.scala index 902bd975a2..ee3d2965e1 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/TrainClassifier.scala @@ -1,14 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.train +package com.microsoft.azure.synapse.ml.train -import java.util.UUID -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.schema.{CategoricalUtilities, SchemaConstants, SparkSchema} -import com.microsoft.ml.spark.core.utils.CastUtilities._ -import com.microsoft.ml.spark.featurize.{Featurize, FeaturizeUtilities, ValueIndexer, ValueIndexerModel} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.schema.{CategoricalUtilities, SchemaConstants, SparkSchema} +import com.microsoft.azure.synapse.ml.core.utils.CastUtilities._ +import com.microsoft.azure.synapse.ml.featurize.{Featurize, FeaturizeUtilities, ValueIndexer, ValueIndexerModel} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml._ import org.apache.spark.ml.classification._ @@ -17,6 +16,8 @@ import org.apache.spark.ml.util._ import org.apache.spark.sql._ import org.apache.spark.sql.types.{DoubleType, StructField, StructType} +import java.util.UUID + /** Trains a classification model. Featurizes the given data into a vector of doubles. * * Note the behavior of the reindex and labels parameters, the parameters interact as: diff --git a/core/src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/TrainClassifier.txt similarity index 100% rename from core/src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/train/TrainClassifier.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/TrainRegressor.scala similarity index 95% rename from core/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.scala rename to core/src/main/scala/com/microsoft/azure/synapse/ml/train/TrainRegressor.scala index cc60e92366..b956579c4c 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/TrainRegressor.scala @@ -1,21 +1,22 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.train +package com.microsoft.azure.synapse.ml.train -import java.util.UUID -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.schema.{SchemaConstants, SparkSchema} -import com.microsoft.ml.spark.featurize.{Featurize, FeaturizeUtilities} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.schema.{SchemaConstants, SparkSchema} +import com.microsoft.azure.synapse.ml.featurize.{Featurize, FeaturizeUtilities} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.ml._ import org.apache.spark.ml.param._ import org.apache.spark.ml.regression._ import org.apache.spark.ml.util._ -import org.apache.spark.ml._ import org.apache.spark.sql._ import org.apache.spark.sql.types._ +import java.util.UUID + /** Trains a regression model. */ class TrainRegressor(override val uid: String) extends AutoTrainer[TrainedRegressorModel] with BasicLogging { logClass() diff --git a/core/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.txt b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/TrainRegressor.txt similarity index 91% rename from core/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.txt rename to core/src/main/scala/com/microsoft/azure/synapse/ml/train/TrainRegressor.txt index 86c10cbe43..7b17b86a8a 100644 --- a/core/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.txt +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/train/TrainRegressor.txt @@ -5,7 +5,7 @@ myDataFrame, with a label column, "MyLabel", split the DataFrame into train and test sets. Train a regressor on the dataset with a solver, such as l-bfgs: ->>> from mmlspark.TrainRegressor import TrainRegressor +>>> from synapse.ml.TrainRegressor import TrainRegressor >>> from pysppark.ml.regression import LinearRegression >>> lr = LinearRegression().setSolver("l-bfgs").setRegParam(0.1).setElasticNetParam(0.3) >>> model = TrainRegressor(model=lr, labelCol="MyLabel", numFeatures=1 << 18).fit(train) diff --git a/core/src/main/scala/org/apache/spark/ml/ComplexParamsSerializer.scala b/core/src/main/scala/org/apache/spark/ml/ComplexParamsSerializer.scala index af06597a7e..a0e7f41d0c 100644 --- a/core/src/main/scala/org/apache/spark/ml/ComplexParamsSerializer.scala +++ b/core/src/main/scala/org/apache/spark/ml/ComplexParamsSerializer.scala @@ -3,7 +3,7 @@ package org.apache.spark.ml -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam import org.apache.hadoop.fs.Path import org.apache.spark.SparkContext import org.apache.spark.ml.param.{ParamPair, Params} diff --git a/core/src/main/scala/org/apache/spark/ml/LimeNamespaceInjections.scala b/core/src/main/scala/org/apache/spark/ml/LimeNamespaceInjections.scala index 6f0089bd24..fe13c636ac 100644 --- a/core/src/main/scala/org/apache/spark/ml/LimeNamespaceInjections.scala +++ b/core/src/main/scala/org/apache/spark/ml/LimeNamespaceInjections.scala @@ -3,7 +3,7 @@ package org.apache.spark.ml -import com.microsoft.ml.spark.lime.LassoUtils +import com.microsoft.azure.synapse.ml.lime.LassoUtils import org.apache.spark.ml.linalg.{DenseMatrix, DenseVector} object LimeNamespaceInjections { diff --git a/core/src/main/scala/org/apache/spark/ml/Ranker.scala b/core/src/main/scala/org/apache/spark/ml/Ranker.scala index 71cd35ce05..ad299c73a5 100644 --- a/core/src/main/scala/org/apache/spark/ml/Ranker.scala +++ b/core/src/main/scala/org/apache/spark/ml/Ranker.scala @@ -3,8 +3,9 @@ package org.apache.spark.ml -// Note: a bit strange to have the mmlspark import here, but it works -import com.microsoft.ml.spark.core.contracts.HasGroupCol +import com.microsoft.azure.synapse.ml.core.contracts.HasGroupCol + +// Note: a bit strange to have the synapsemlimport here, but it works /** * Ranker base class diff --git a/core/src/main/scala/org/apache/spark/ml/Serializer.scala b/core/src/main/scala/org/apache/spark/ml/Serializer.scala index 9ca2996bee..6d7507d559 100644 --- a/core/src/main/scala/org/apache/spark/ml/Serializer.scala +++ b/core/src/main/scala/org/apache/spark/ml/Serializer.scala @@ -3,10 +3,10 @@ package org.apache.spark.ml -import java.io.{InputStream, ObjectOutputStream, OutputStream} +import com.microsoft.azure.synapse.ml.core.utils.ContextObjectInputStream -import com.microsoft.ml.spark.core.env.StreamUtilities._ -import com.microsoft.ml.spark.core.utils.ContextObjectInputStream +import java.io.{InputStream, ObjectOutputStream, OutputStream} +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities._ import org.apache.hadoop.fs.Path import org.apache.spark.SparkContext import org.apache.spark.ml.util.MLWritable diff --git a/core/src/main/scala/org/apache/spark/ml/param/ArrayParamMapParam.scala b/core/src/main/scala/org/apache/spark/ml/param/ArrayParamMapParam.scala index f84130c593..17b8c277f7 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/ArrayParamMapParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/ArrayParamMapParam.scala @@ -3,7 +3,7 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam /** Param for Array of ParamMaps. Needed as spark has explicit params for many different * types but not Array of ParamMaps. diff --git a/core/src/main/scala/org/apache/spark/ml/param/BallTreeParam.scala b/core/src/main/scala/org/apache/spark/ml/param/BallTreeParam.scala index 5cd5cc4e29..6e172ae449 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/BallTreeParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/BallTreeParam.scala @@ -3,8 +3,8 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam -import com.microsoft.ml.spark.nn.{BallTree, ConditionalBallTree} +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.nn.{BallTree, ConditionalBallTree} /** Param for a BallTree. */ diff --git a/core/src/main/scala/org/apache/spark/ml/param/ByteArrayParam.scala b/core/src/main/scala/org/apache/spark/ml/param/ByteArrayParam.scala index 0e2564479c..5d4e386b88 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/ByteArrayParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/ByteArrayParam.scala @@ -3,9 +3,9 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam -/** Param for ByteArray. Needed as spark has explicit com.microsoft.ml.spark.core.serialize.params for many different +/** Param for ByteArray. Needed as spark has explicit params for many different * types but not ByteArray. */ class ByteArrayParam(parent: Params, name: String, doc: String, isValid: Array[Byte] => Boolean) diff --git a/core/src/main/scala/org/apache/spark/ml/param/DataFrameParam.scala b/core/src/main/scala/org/apache/spark/ml/param/DataFrameParam.scala index 6932df74cb..84c147fe8b 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/DataFrameParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/DataFrameParam.scala @@ -3,8 +3,8 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam -import com.microsoft.ml.spark.core.utils.ParamEquality +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.utils.ParamEquality import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.sql.functions.col import org.apache.spark.sql.{DataFrame, Dataset, Row} @@ -111,7 +111,7 @@ trait DataFrameEquality extends Serializable { } -/** Param for DataFrame. Needed as spark has explicit com.microsoft.ml.spark.core.serialize.params for many different +/** Param for DataFrame. Needed as spark has explicit params for many different * types but not DataFrame. */ class DataFrameParam(parent: Params, name: String, doc: String, isValid: DataFrame => Boolean) diff --git a/core/src/main/scala/org/apache/spark/ml/param/DataTypeParam.scala b/core/src/main/scala/org/apache/spark/ml/param/DataTypeParam.scala index c3fbf08615..22cfba851e 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/DataTypeParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/DataTypeParam.scala @@ -3,7 +3,7 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam import org.apache.spark.sql.types.DataType /** Param for DataType */ diff --git a/core/src/main/scala/org/apache/spark/ml/param/EstimatorArrayParam.scala b/core/src/main/scala/org/apache/spark/ml/param/EstimatorArrayParam.scala index e174bd3c71..c64a5fb7eb 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/EstimatorArrayParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/EstimatorArrayParam.scala @@ -3,7 +3,7 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam import org.apache.spark.ml.Estimator import scala.collection.JavaConverters._ diff --git a/core/src/main/scala/org/apache/spark/ml/param/EstimatorParam.scala b/core/src/main/scala/org/apache/spark/ml/param/EstimatorParam.scala index 258e0978e5..875c115899 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/EstimatorParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/EstimatorParam.scala @@ -3,8 +3,8 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam -import com.microsoft.ml.spark.core.utils.{ModelEquality, ParamEquality} +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.utils.{ModelEquality, ParamEquality} import org.apache.spark.ml.{Estimator, Model, PipelineStage} trait PipelineStageWrappable[T <: PipelineStage] extends ExternalPythonWrappableParam[T] with ParamEquality[T] { @@ -32,7 +32,7 @@ trait PipelineStageWrappable[T <: PipelineStage] extends ExternalPythonWrappable } -/** Param for Estimator. Needed as spark has explicit com.microsoft.ml.spark.core.serialize.params for many different +/** Param for Estimator. Needed as spark has explicit params for many different * types but not Estimator. */ class EstimatorParam(parent: Params, name: String, doc: String, isValid: Estimator[_ <: Model[_]] => Boolean) diff --git a/core/src/main/scala/org/apache/spark/ml/param/EvaluatorParam.scala b/core/src/main/scala/org/apache/spark/ml/param/EvaluatorParam.scala index fe8c0ca60d..0b30e63843 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/EvaluatorParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/EvaluatorParam.scala @@ -3,10 +3,10 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam import org.apache.spark.ml.evaluation.Evaluator -/** Param for Evaluator. Needed as spark has explicit com.microsoft.ml.spark.core.serialize.params for many different +/** Param for Evaluator. Needed as spark has explicit params for many different * types but not Evaluator. */ class EvaluatorParam(parent: Params, name: String, doc: String, isValid: Evaluator => Boolean) diff --git a/core/src/main/scala/org/apache/spark/ml/param/ParamSpaceParam.scala b/core/src/main/scala/org/apache/spark/ml/param/ParamSpaceParam.scala index 1cba43d026..20849d8c63 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/ParamSpaceParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/ParamSpaceParam.scala @@ -3,7 +3,7 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam /** Represents the parameter values. */ @@ -11,7 +11,7 @@ abstract class ParamSpace { def paramMaps: Iterator[ParamMap] } -/** Param for ParamSpace. Needed as spark has explicit com.microsoft.ml.spark.core.serialize.params for many different +/** Param for ParamSpace. Needed as spark has explicit params for many different * types but not ParamSpace. */ class ParamSpaceParam(parent: Params, name: String, doc: String, isValid: ParamSpace => Boolean) diff --git a/core/src/main/scala/org/apache/spark/ml/param/PipelineStageParam.scala b/core/src/main/scala/org/apache/spark/ml/param/PipelineStageParam.scala index 95f34cd373..03ca523a1a 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/PipelineStageParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/PipelineStageParam.scala @@ -3,10 +3,10 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam import org.apache.spark.ml.PipelineStage -/** Param for Transformer. Needed as spark has explicit com.microsoft.ml.spark.core.serialize.params for many different +/** Param for Transformer. Needed as spark has explicit params for many different * types but not Transformer. */ class PipelineStageParam(parent: Params, name: String, doc: String, isValid: PipelineStage => Boolean) diff --git a/core/src/main/scala/org/apache/spark/ml/param/TransformerArrayParam.scala b/core/src/main/scala/org/apache/spark/ml/param/TransformerArrayParam.scala index ae15d4f6c9..547475e929 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/TransformerArrayParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/TransformerArrayParam.scala @@ -3,7 +3,7 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam import org.apache.spark.ml.Transformer import scala.collection.JavaConverters._ diff --git a/core/src/main/scala/org/apache/spark/ml/param/TransformerParam.scala b/core/src/main/scala/org/apache/spark/ml/param/TransformerParam.scala index 00ce86155c..a10b050fd6 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/TransformerParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/TransformerParam.scala @@ -3,10 +3,10 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam import org.apache.spark.ml.Transformer -/** Param for Transformer. Needed as spark has explicit com.microsoft.ml.spark.core.serialize.params for many different +/** Param for Transformer. Needed as spark has explicit params for many different * types but not Transformer. */ class TransformerParam(parent: Params, name: String, doc: String, isValid: Transformer => Boolean) diff --git a/core/src/main/scala/org/apache/spark/ml/param/UDFParam.scala b/core/src/main/scala/org/apache/spark/ml/param/UDFParam.scala index 1bb8ac56e6..9d26f5547f 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/UDFParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/UDFParam.scala @@ -3,8 +3,8 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam -import com.microsoft.ml.spark.core.utils.ParamEquality +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.utils.ParamEquality import org.apache.spark.injections.UDFUtils import org.apache.spark.sql.expressions.UserDefinedFunction import org.scalactic.TripleEquals._ diff --git a/core/src/main/scala/org/apache/spark/ml/param/UDPyFParam.scala b/core/src/main/scala/org/apache/spark/ml/param/UDPyFParam.scala index 738189ae1d..c4df37eb4b 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/UDPyFParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/UDPyFParam.scala @@ -3,7 +3,7 @@ package org.apache.spark.ml.param -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam import org.apache.spark.sql.execution.python.UserDefinedPythonFunction /** Param for UserDefinedPythonFunction. Needed as spark has explicit params for many different diff --git a/core/src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala b/core/src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala index 61fcf83717..6da30d598b 100644 --- a/core/src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala +++ b/core/src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala @@ -3,7 +3,7 @@ package org.apache.spark.ml.recommendation -import com.microsoft.ml.spark.codegen.Wrappable +import com.microsoft.azure.synapse.ml.codegen.Wrappable import org.apache.spark.ml.evaluation.Evaluator import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol, HasSeed} diff --git a/core/src/main/scala/org/apache/spark/ml/source/image/PatchedImageFileFormat.scala b/core/src/main/scala/org/apache/spark/ml/source/image/PatchedImageFileFormat.scala index eeeb810bed..e84c5fc5e1 100644 --- a/core/src/main/scala/org/apache/spark/ml/source/image/PatchedImageFileFormat.scala +++ b/core/src/main/scala/org/apache/spark/ml/source/image/PatchedImageFileFormat.scala @@ -4,9 +4,8 @@ package org.apache.spark.ml.source.image import com.google.common.io.{ByteStreams, Closeables} -import com.microsoft.ml.spark.core.schema.ImageSchemaUtils -import com.microsoft.ml.spark.io.image.ImageUtils -import javax.imageio.ImageIO +import com.microsoft.azure.synapse.ml.core.schema.ImageSchemaUtils +import com.microsoft.azure.synapse.ml.io.image.ImageUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce._ @@ -21,6 +20,8 @@ import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ import org.apache.spark.util.SerializableConfiguration +import javax.imageio.ImageIO + class PatchedImageFileFormat extends ImageFileFormat with Serializable with Logging { override def shortName(): String = "patchedImage" diff --git a/core/src/main/scala/org/apache/spark/sql/execution/streaming/DistributedHTTPSource.scala b/core/src/main/scala/org/apache/spark/sql/execution/streaming/DistributedHTTPSource.scala index 3584df7eb2..b4148fbdb1 100644 --- a/core/src/main/scala/org/apache/spark/sql/execution/streaming/DistributedHTTPSource.scala +++ b/core/src/main/scala/org/apache/spark/sql/execution/streaming/DistributedHTTPSource.scala @@ -3,24 +3,21 @@ package org.apache.spark.sql.execution.streaming -import java.net.{InetAddress, InetSocketAddress} -import java.util.UUID -import java.util.concurrent.Executors - -import com.microsoft.ml.spark.core.env.StreamUtilities.usingMany -import com.microsoft.ml.spark.io.http.{HTTPRequestData, HTTPResponseData, SharedSingleton} +import com.microsoft.azure.synapse.ml.io.http.{HTTPRequestData, HTTPResponseData, SharedSingleton} import com.sun.net.httpserver.{HttpExchange, HttpHandler, HttpServer} -import javax.annotation.concurrent.GuardedBy -import org.apache.commons.io.IOUtils import org.apache.spark.internal.Logging import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.encoders.RowEncoder +import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2} import org.apache.spark.sql.execution.streaming.continuous.HTTPSourceV2 import org.apache.spark.sql.sources.{DataSourceRegister, StreamSinkProvider, StreamSourceProvider} import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types._ -import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2} +import java.net.{InetAddress, InetSocketAddress} +import java.util.UUID +import java.util.concurrent.Executors +import javax.annotation.concurrent.GuardedBy import scala.collection.mutable.ListBuffer import scala.collection.{immutable, mutable} diff --git a/core/src/main/scala/org/apache/spark/sql/execution/streaming/HTTPSource.scala b/core/src/main/scala/org/apache/spark/sql/execution/streaming/HTTPSource.scala index 1e4a371551..12ac285bb8 100644 --- a/core/src/main/scala/org/apache/spark/sql/execution/streaming/HTTPSource.scala +++ b/core/src/main/scala/org/apache/spark/sql/execution/streaming/HTTPSource.scala @@ -3,10 +3,11 @@ package org.apache.spark.sql.execution.streaming -import java.net.{InetAddress, InetSocketAddress} +import com.microsoft.azure.synapse.ml.io.http.{HTTPRequestData, HTTPResponseData} -import com.microsoft.ml.spark.io.http.{HTTPRequestData, HTTPResponseData} +import java.net.{InetAddress, InetSocketAddress} import com.sun.net.httpserver.{HttpExchange, HttpHandler, HttpServer} + import javax.annotation.concurrent.GuardedBy import org.apache.spark.internal.Logging import org.apache.spark.sql._ diff --git a/core/src/main/scala/org/apache/spark/sql/execution/streaming/ServingUDFs.scala b/core/src/main/scala/org/apache/spark/sql/execution/streaming/ServingUDFs.scala index 6ae23fcaed..74a1de8160 100644 --- a/core/src/main/scala/org/apache/spark/sql/execution/streaming/ServingUDFs.scala +++ b/core/src/main/scala/org/apache/spark/sql/execution/streaming/ServingUDFs.scala @@ -3,8 +3,8 @@ package org.apache.spark.sql.execution.streaming -import com.microsoft.ml.spark.io.http.HTTPResponseData -import com.microsoft.ml.spark.io.http.HTTPSchema.{binary_to_response, empty_response, string_to_response} +import com.microsoft.azure.synapse.ml.io.http.HTTPResponseData +import com.microsoft.azure.synapse.ml.io.http.HTTPSchema.{binary_to_response, empty_response, string_to_response} import org.apache.spark.injections.UDFUtils import org.apache.spark.sql.execution.streaming.continuous.HTTPSourceStateHolder import org.apache.spark.sql.expressions.UserDefinedFunction diff --git a/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSinkV2.scala b/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSinkV2.scala index 2040099672..c14920970c 100644 --- a/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSinkV2.scala +++ b/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSinkV2.scala @@ -3,9 +3,9 @@ package org.apache.spark.sql.execution.streaming.continuous -import java.util +import com.microsoft.azure.synapse.ml.io.http.HTTPResponseData -import com.microsoft.ml.spark.io.http.HTTPResponseData +import java.util import org.apache.spark.TaskContext import org.apache.spark.internal.Logging import org.apache.spark.sql._ diff --git a/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSourceV2.scala b/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSourceV2.scala index 05b373a5f2..90879053d3 100644 --- a/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSourceV2.scala +++ b/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSourceV2.scala @@ -8,11 +8,12 @@ import java.net.{InetAddress, InetSocketAddress, ServerSocket, URL} import java.util import java.util.concurrent.{Executors, LinkedBlockingQueue, TimeUnit} import java.util.{Optional, UUID} - import com.jcraft.jsch.Session -import com.microsoft.ml.spark.core.env.StreamUtilities -import com.microsoft.ml.spark.io.http._ +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities +import com.microsoft.azure.synapse.ml.io.http.{ + HTTPRequestData, HTTPResponseData, HTTPSchema, PortForwarding, StatusLineData} import com.sun.net.httpserver.{HttpExchange, HttpHandler, HttpServer} + import javax.annotation.concurrent.GuardedBy import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig diff --git a/core/src/main/scala/org/apache/spark/sql/types/injections/OptimizedCKNNFitting.scala b/core/src/main/scala/org/apache/spark/sql/types/injections/OptimizedCKNNFitting.scala index 42d167750d..fbd2398eff 100644 --- a/core/src/main/scala/org/apache/spark/sql/types/injections/OptimizedCKNNFitting.scala +++ b/core/src/main/scala/org/apache/spark/sql/types/injections/OptimizedCKNNFitting.scala @@ -4,8 +4,8 @@ package org.apache.spark.sql.types.injections import breeze.linalg.{DenseVector => BDV} -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.nn._ +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.nn._ import org.apache.spark.ml.linalg.Vector import org.apache.spark.sql.Dataset import org.apache.spark.sql.types._ diff --git a/core/src/test/R/testthat.R b/core/src/test/R/testthat.R index 828f59e58e..a45e745b29 100644 --- a/core/src/test/R/testthat.R +++ b/core/src/test/R/testthat.R @@ -1,4 +1,4 @@ library(testthat) -library(mmlspark) +library(synapseml) -test_check("mmlspark") +test_check("synapseml") diff --git a/core/src/test/python/setup.py b/core/src/test/python/setup.py index 2feba30b02..4e37afa06d 100644 --- a/core/src/test/python/setup.py +++ b/core/src/test/python/setup.py @@ -5,7 +5,7 @@ from setuptools import setup, find_packages setup( - name="mmlsparktest", + name="synapsemltest", version=0.1, description="Microsoft ML for Spark Tests", long_description="Microsoft ML for Apache Spark contains Microsoft's open source " + @@ -14,7 +14,7 @@ packages=find_packages(), # Project's main homepage. - url="https://github.com/Azure/mmlspark", + url="https://github.com/Microsoft/SynapseML", # Author details author="Microsoft", author_email="mmlspark-support@microsoft.com", @@ -31,5 +31,5 @@ zip_safe=True, - package_data={"mmlspark": ["../LICENSE.txt", "../README.txt"]} + package_data={"synapseml": ["../LICENSE.txt", "../README.txt"]} ) diff --git a/core/src/test/python/mmlsparktest/__init__.py b/core/src/test/python/synapsemltest/__init__.py similarity index 100% rename from core/src/test/python/mmlsparktest/__init__.py rename to core/src/test/python/synapsemltest/__init__.py diff --git a/core/src/test/python/mmlsparktest/cyber/__init__.py b/core/src/test/python/synapsemltest/cyber/__init__.py similarity index 100% rename from core/src/test/python/mmlsparktest/cyber/__init__.py rename to core/src/test/python/synapsemltest/cyber/__init__.py diff --git a/core/src/test/python/mmlsparktest/cyber/anamoly/__init__.py b/core/src/test/python/synapsemltest/cyber/anamoly/__init__.py similarity index 100% rename from core/src/test/python/mmlsparktest/cyber/anamoly/__init__.py rename to core/src/test/python/synapsemltest/cyber/anamoly/__init__.py diff --git a/core/src/test/python/mmlsparktest/cyber/anamoly/test_collaborative_filtering.py b/core/src/test/python/synapsemltest/cyber/anamoly/test_collaborative_filtering.py similarity index 99% rename from core/src/test/python/mmlsparktest/cyber/anamoly/test_collaborative_filtering.py rename to core/src/test/python/synapsemltest/cyber/anamoly/test_collaborative_filtering.py index b3adbcdb45..13e536bb06 100644 --- a/core/src/test/python/mmlsparktest/cyber/anamoly/test_collaborative_filtering.py +++ b/core/src/test/python/synapsemltest/cyber/anamoly/test_collaborative_filtering.py @@ -7,14 +7,14 @@ from typing import Dict, Optional, Set, Type, Union from pandas.testing import assert_frame_equal from pyspark.sql import DataFrame, types as t, functions as f -from mmlspark.cyber.feature import indexers -from mmlspark.cyber import DataFactory -from mmlspark.cyber.anomaly.collaborative_filtering import \ +from synapse.ml.cyber.feature import indexers +from synapse.ml.cyber import DataFactory +from synapse.ml.cyber.anomaly.collaborative_filtering import \ AccessAnomaly, AccessAnomalyModel, AccessAnomalyConfig, ConnectedComponents, ModelNormalizeTransformer, \ _UserResourceFeatureVectorMapping as UserResourceFeatureVectorMapping -from mmlsparktest.cyber.explain_tester import ExplainTester -from mmlsparktest.spark import * +from synapse.mltest.cyber.explain_tester import ExplainTester +from synapsemltest.spark import * epsilon = 10 ** -3 diff --git a/core/src/test/python/mmlsparktest/cyber/anamoly/test_complement_access.py b/core/src/test/python/synapsemltest/cyber/anamoly/test_complement_access.py similarity index 95% rename from core/src/test/python/mmlsparktest/cyber/anamoly/test_complement_access.py rename to core/src/test/python/synapsemltest/cyber/anamoly/test_complement_access.py index 02c27f04a7..29fc879f68 100644 --- a/core/src/test/python/mmlsparktest/cyber/anamoly/test_complement_access.py +++ b/core/src/test/python/synapsemltest/cyber/anamoly/test_complement_access.py @@ -4,9 +4,9 @@ import unittest from typing import Type from pyspark.sql import DataFrame, types as t, functions as f -from mmlspark.cyber.anomaly.complement_access import ComplementAccessTransformer -from mmlsparktest.cyber.explain_tester import ExplainTester -from mmlsparktest.spark import * +from synapse.ml.cyber.anomaly.complement_access import ComplementAccessTransformer +from synapse.mltest.cyber.explain_tester import ExplainTester +from synapsemltest.spark import * class TestComplementAccessTransformer(unittest.TestCase): diff --git a/core/src/test/python/mmlsparktest/cyber/explain_tester.py b/core/src/test/python/synapsemltest/cyber/explain_tester.py similarity index 98% rename from core/src/test/python/mmlsparktest/cyber/explain_tester.py rename to core/src/test/python/synapsemltest/cyber/explain_tester.py index caa0f67377..d847fc45cd 100644 --- a/core/src/test/python/mmlsparktest/cyber/explain_tester.py +++ b/core/src/test/python/synapsemltest/cyber/explain_tester.py @@ -3,7 +3,7 @@ from typing import Any, Callable, List from pyspark.ml.param.shared import HasInputCol, HasOutputCol -from mmlsparktest.spark import * +from synapsemltest.spark import * class ExplainTester: diff --git a/core/src/test/python/mmlsparktest/cyber/feature/__init__.py b/core/src/test/python/synapsemltest/cyber/feature/__init__.py similarity index 100% rename from core/src/test/python/mmlsparktest/cyber/feature/__init__.py rename to core/src/test/python/synapsemltest/cyber/feature/__init__.py diff --git a/core/src/test/python/mmlsparktest/cyber/feature/test_indexers.py b/core/src/test/python/synapsemltest/cyber/feature/test_indexers.py similarity index 97% rename from core/src/test/python/mmlsparktest/cyber/feature/test_indexers.py rename to core/src/test/python/synapsemltest/cyber/feature/test_indexers.py index a8f3698b1a..e5ec565195 100644 --- a/core/src/test/python/mmlsparktest/cyber/feature/test_indexers.py +++ b/core/src/test/python/synapsemltest/cyber/feature/test_indexers.py @@ -4,9 +4,9 @@ import unittest from typing import Type from pyspark.sql import types as t, functions as f -from mmlspark.cyber.feature import indexers -from mmlsparktest.cyber.explain_tester import ExplainTester -from mmlsparktest.spark import * +from synapse.ml.cyber.feature import indexers +from synapse.mltest.cyber.explain_tester import ExplainTester +from synapsemltest.spark import * class TestIndexers(unittest.TestCase): diff --git a/core/src/test/python/mmlsparktest/cyber/feature/test_scalers.py b/core/src/test/python/synapsemltest/cyber/feature/test_scalers.py similarity index 96% rename from core/src/test/python/mmlsparktest/cyber/feature/test_scalers.py rename to core/src/test/python/synapsemltest/cyber/feature/test_scalers.py index 93679a3412..e938cdf488 100644 --- a/core/src/test/python/mmlsparktest/cyber/feature/test_scalers.py +++ b/core/src/test/python/synapsemltest/cyber/feature/test_scalers.py @@ -4,9 +4,9 @@ import unittest from typing import Type from pyspark.sql import functions as f, types as t -from mmlspark.cyber.feature import LinearScalarScaler, StandardScalarScaler -from mmlsparktest.cyber.explain_tester import ExplainTester -from mmlsparktest.spark import * +from synapse.ml.cyber.feature import LinearScalarScaler, StandardScalarScaler +from synapse.mltest.cyber.explain_tester import ExplainTester +from synapsemltest.spark import * class TestScalers(unittest.TestCase): diff --git a/core/src/test/python/mmlsparktest/cyber/utils/__init__.py b/core/src/test/python/synapsemltest/cyber/utils/__init__.py similarity index 100% rename from core/src/test/python/mmlsparktest/cyber/utils/__init__.py rename to core/src/test/python/synapsemltest/cyber/utils/__init__.py diff --git a/core/src/test/python/mmlsparktest/cyber/utils/test_spark_utils.py b/core/src/test/python/synapsemltest/cyber/utils/test_spark_utils.py similarity index 96% rename from core/src/test/python/mmlsparktest/cyber/utils/test_spark_utils.py rename to core/src/test/python/synapsemltest/cyber/utils/test_spark_utils.py index 5144aca30a..c22907c0a2 100644 --- a/core/src/test/python/mmlsparktest/cyber/utils/test_spark_utils.py +++ b/core/src/test/python/synapsemltest/cyber/utils/test_spark_utils.py @@ -8,8 +8,8 @@ from pyspark.ml import Transformer from pyspark.ml.param.shared import Param, Params -from mmlspark.cyber.utils.spark_utils import DataFrameUtils, ExplainBuilder, HasSetInputCol, HasSetOutputCol -from mmlsparktest.spark import * +from synapse.ml.cyber.utils.spark_utils import DataFrameUtils, ExplainBuilder, HasSetInputCol, HasSetOutputCol +from synapsemltest.spark import * class TestDataFrameUtils(unittest.TestCase): diff --git a/core/src/test/python/mmlsparktest/nn/__init__.py b/core/src/test/python/synapsemltest/nn/__init__.py similarity index 100% rename from core/src/test/python/mmlsparktest/nn/__init__.py rename to core/src/test/python/synapsemltest/nn/__init__.py diff --git a/core/src/test/python/mmlsparktest/nn/test_ball_tree.py b/core/src/test/python/synapsemltest/nn/test_ball_tree.py similarity index 88% rename from core/src/test/python/mmlsparktest/nn/test_ball_tree.py rename to core/src/test/python/synapsemltest/nn/test_ball_tree.py index bb51b06c1b..7b1beb3249 100644 --- a/core/src/test/python/mmlsparktest/nn/test_ball_tree.py +++ b/core/src/test/python/synapsemltest/nn/test_ball_tree.py @@ -4,8 +4,8 @@ # Prepare training and test data. import unittest -from mmlspark.nn.ConditionalBallTree import ConditionalBallTree -from mmlsparktest.spark import * +from synapse.ml.nn.ConditionalBallTree import ConditionalBallTree +from synapsemltest.spark import * class NNSpec(unittest.TestCase): diff --git a/core/src/test/python/mmlsparktest/recommendation/__init__.py b/core/src/test/python/synapsemltest/recommendation/__init__.py similarity index 100% rename from core/src/test/python/mmlsparktest/recommendation/__init__.py rename to core/src/test/python/synapsemltest/recommendation/__init__.py diff --git a/core/src/test/python/mmlsparktest/recommendation/test_ranking.py b/core/src/test/python/synapsemltest/recommendation/test_ranking.py similarity index 92% rename from core/src/test/python/mmlsparktest/recommendation/test_ranking.py rename to core/src/test/python/synapsemltest/recommendation/test_ranking.py index 024ffbb885..7bd54e2edf 100644 --- a/core/src/test/python/mmlsparktest/recommendation/test_ranking.py +++ b/core/src/test/python/synapsemltest/recommendation/test_ranking.py @@ -4,12 +4,12 @@ # Prepare training and test data. import unittest -from mmlspark.recommendation import RankingAdapter -from mmlspark.recommendation import RankingEvaluator -from mmlspark.recommendation import RankingTrainValidationSplit -from mmlspark.recommendation import RecommendationIndexer -from mmlspark.recommendation import SAR -from mmlsparktest.spark import * +from synapse.ml.recommendation import RankingAdapter +from synapse.ml.recommendation import RankingEvaluator +from synapse.ml.recommendation import RankingTrainValidationSplit +from synapse.ml.recommendation import RecommendationIndexer +from synapse.ml.recommendation import SAR +from synapsemltest.spark import * from pyspark.ml import Pipeline from pyspark.ml.feature import StringIndexer from pyspark.ml.recommendation import ALS diff --git a/core/src/test/scala/com/microsoft/ml/spark/Secrets.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala similarity index 98% rename from core/src/test/scala/com/microsoft/ml/spark/Secrets.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala index 3e52fa3867..52be84202f 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/Secrets.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark +package com.microsoft.azure.synapse.ml import java.io.IOException diff --git a/core/src/test/scala/com/microsoft/ml/spark/automl/VerifyFindBestModel.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/automl/VerifyFindBestModel.scala similarity index 93% rename from core/src/test/scala/com/microsoft/ml/spark/automl/VerifyFindBestModel.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/automl/VerifyFindBestModel.scala index 133e3542e9..0abe31cfdd 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/automl/VerifyFindBestModel.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/automl/VerifyFindBestModel.scala @@ -1,19 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.automl +package com.microsoft.azure.synapse.ml.automl -import java.io.File - -import com.microsoft.ml.spark.core.metrics.MetricConstants -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.metrics.MetricConstants +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} import org.apache.spark.ml.Transformer import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType} +import java.io.File + class VerifyFindBestModel extends EstimatorFuzzing[FindBestModel]{ - import com.microsoft.ml.spark.train.TrainClassifierTestUtilities._ + import com.microsoft.azure.synapse.ml.train.TrainClassifierTestUtilities._ val mockLabelColumn = "Label" diff --git a/core/src/test/scala/com/microsoft/ml/spark/automl/VerifyTuneHyperparameters.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/automl/VerifyTuneHyperparameters.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/automl/VerifyTuneHyperparameters.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/automl/VerifyTuneHyperparameters.scala index 063aa04947..54012e7bfe 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/automl/VerifyTuneHyperparameters.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/automl/VerifyTuneHyperparameters.scala @@ -1,12 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.automl +package com.microsoft.azure.synapse.ml.automl -import com.microsoft.ml.spark.core.metrics.MetricConstants -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.benchmarks.{Benchmarks, DatasetUtils} -import com.microsoft.ml.spark.core.test.fuzzing.TestObject +import com.microsoft.azure.synapse.ml.core.metrics.MetricConstants +import com.microsoft.azure.synapse.ml.core.test.benchmarks.{Benchmarks, DatasetUtils} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.TestObject import org.apache.spark.ml.Estimator import org.apache.spark.ml.classification._ import org.apache.spark.ml.param.Param @@ -17,7 +16,7 @@ import scala.collection.mutable.ListBuffer /** Tests to validate the functionality of Tune Hyperparameters module. */ class VerifyTuneHyperparameters extends Benchmarks { - import com.microsoft.ml.spark.train.TrainClassifierTestUtilities._ + import com.microsoft.azure.synapse.ml.train.TrainClassifierTestUtilities._ lazy val moduleName = "tune-hyperparameters" diff --git a/core/src/test/scala/com/microsoft/ml/spark/codegen/TestGen.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/TestGen.scala similarity index 76% rename from core/src/test/scala/com/microsoft/ml/spark/codegen/TestGen.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/TestGen.scala index 4cfbf22b9c..6badac698c 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/codegen/TestGen.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/TestGen.scala @@ -1,15 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.codegen +package com.microsoft.azure.synapse.ml.codegen -import java.io.File +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.PyTestFuzzing -import com.microsoft.ml.spark.codegen.CodegenConfigProtocol._ -import com.microsoft.ml.spark.core.env.FileUtilities._ -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.PyTestFuzzing -import com.microsoft.ml.spark.core.utils.JarLoadingUtils.instantiateServices +import java.io.File +import CodegenConfigProtocol._ +import com.microsoft.azure.synapse.ml.core.env.FileUtilities._ +import com.microsoft.azure.synapse.ml.core.utils.JarLoadingUtils.instantiateServices import org.apache.commons.io.FileUtils import spray.json._ @@ -30,7 +30,7 @@ object TestGen { } private def makeInitFiles(conf: CodegenConfig, packageFolder: String = ""): Unit = { - val dir = new File(new File(conf.pyTestDir, "mmlsparktest"), packageFolder) + val dir = new File(new File(conf.pyTestDir, "synapsemltest"), packageFolder) if (!dir.exists()){ dir.mkdirs() } @@ -46,20 +46,20 @@ object TestGen { if (!conf.pySrcDir.exists()) { conf.pySrcDir.mkdir() } - writeFile(join(conf.pyTestDir,"mmlsparktest", "spark.py"), + writeFile(join(conf.pyTestDir,"synapsemltest", "spark.py"), s""" |# Copyright (C) Microsoft Corporation. All rights reserved. |# Licensed under the MIT License. See LICENSE in project root for information. | |from pyspark.sql import SparkSession, SQLContext |import os - |import mmlspark - |from mmlspark.core import __spark_package_version__ + |import synapse.ml + |from synapse.ml.core import __spark_package_version__ | |spark = (SparkSession.builder | .master("local[*]") | .appName("PysparkTests") - | .config("spark.jars.packages", "com.microsoft.ml.spark:mmlspark:" + __spark_package_version__) + | .config("spark.jars.packages", "com.microsoft.azure:synapseml:" + __spark_package_version__) | .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") | .config("spark.executor.heartbeatInterval", "60s") | .config("spark.sql.shuffle.partitions", 10) diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/ml/HashingTFSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/HashingTFSpec.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/core/ml/HashingTFSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/HashingTFSpec.scala index 64775f3008..c822676840 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/ml/HashingTFSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/HashingTFSpec.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.ml +package com.microsoft.azure.synapse.ml.core.ml -import com.microsoft.ml.spark.core.schema.DatasetExtensions._ -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions._ import org.apache.spark.ml.feature.{HashingTF, Tokenizer} import org.apache.spark.ml.linalg.SparseVector diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/ml/IDFSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/IDFSpec.scala similarity index 96% rename from core/src/test/scala/com/microsoft/ml/spark/core/ml/IDFSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/IDFSpec.scala index cafb9abc2a..5b6a3c693b 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/ml/IDFSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/IDFSpec.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.ml +package com.microsoft.azure.synapse.ml.core.ml -import com.microsoft.ml.spark.core.schema.DatasetExtensions._ -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions._ import org.apache.spark.ml.feature.{HashingTF, IDF, Tokenizer} import org.apache.spark.ml.linalg.{DenseVector, SparseVector} diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/ml/NGramSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/NGramSpec.scala similarity index 96% rename from core/src/test/scala/com/microsoft/ml/spark/core/ml/NGramSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/NGramSpec.scala index c0f7abda13..98f9840f25 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/ml/NGramSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/NGramSpec.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.ml +package com.microsoft.azure.synapse.ml.core.ml -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase import org.apache.spark.ml.feature.{NGram, Tokenizer} import org.apache.spark.sql.DataFrame diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/ml/OneHotEncoderSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/OneHotEncoderSpec.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/core/ml/OneHotEncoderSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/OneHotEncoderSpec.scala index a510557770..f9e3d7bcb3 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/ml/OneHotEncoderSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/OneHotEncoderSpec.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.ml +package com.microsoft.azure.synapse.ml.core.ml -import com.microsoft.ml.spark.core.schema.DatasetExtensions._ -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions._ import org.apache.spark._ import org.apache.spark.ml.feature.OneHotEncoder import org.apache.spark.ml.linalg.SparseVector diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/ml/Word2VecSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/Word2VecSpec.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/core/ml/Word2VecSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/Word2VecSpec.scala index 1888b74108..e3ac46ac8a 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/ml/Word2VecSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/ml/Word2VecSpec.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.ml +package com.microsoft.azure.synapse.ml.core.ml -import com.microsoft.ml.spark.core.schema.DatasetExtensions._ -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions._ import org.apache.spark.ml.feature.Word2Vec import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.sql.DataFrame diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/schema/SparkBindingsTest.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/SparkBindingsTest.scala similarity index 89% rename from core/src/test/scala/com/microsoft/ml/spark/core/schema/SparkBindingsTest.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/SparkBindingsTest.scala index 437cfa52e4..936b2ef333 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/schema/SparkBindingsTest.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/SparkBindingsTest.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.schema +package com.microsoft.azure.synapse.ml.core.schema -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase import org.apache.spark.injections.UDFUtils import org.apache.spark.sql.Row import org.apache.spark.sql.functions.{col, udf} diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/schema/TestCategoricals.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/TestCategoricals.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/core/schema/TestCategoricals.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/TestCategoricals.scala index fe207eeec8..1131b9d6a6 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/schema/TestCategoricals.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/TestCategoricals.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.schema +package com.microsoft.azure.synapse.ml.core.schema -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase import org.apache.spark.sql.types._ import scala.reflect.{ClassTag, classTag} diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/schema/VerifyFastVectorAssembler.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/VerifyFastVectorAssembler.scala similarity index 97% rename from core/src/test/scala/com/microsoft/ml/spark/core/schema/VerifyFastVectorAssembler.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/VerifyFastVectorAssembler.scala index bdea66ab28..2577ffd566 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/schema/VerifyFastVectorAssembler.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/VerifyFastVectorAssembler.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.schema +package com.microsoft.azure.synapse.ml.core.schema -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase import org.apache.spark.SparkException import org.apache.spark.ml.feature.{FastVectorAssembler, StringIndexer} import org.apache.spark.sql.DataFrame diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/schema/VerifySparkSchema.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/VerifySparkSchema.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/core/schema/VerifySparkSchema.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/VerifySparkSchema.scala index 10d57f3342..b36dfc1df8 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/schema/VerifySparkSchema.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/schema/VerifySparkSchema.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.schema +package com.microsoft.azure.synapse.ml.core.schema -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase /** Verifies the spark schema functions. */ class VerifySparkSchema extends TestBase { diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/serialize/ValidateComplexParamSerializer.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/serialize/ValidateComplexParamSerializer.scala similarity index 97% rename from core/src/test/scala/com/microsoft/ml/spark/core/serialize/ValidateComplexParamSerializer.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/serialize/ValidateComplexParamSerializer.scala index e02c5e75c2..cc7b4ac340 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/serialize/ValidateComplexParamSerializer.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/serialize/ValidateComplexParamSerializer.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.serialize +package com.microsoft.azure.synapse.ml.core.serialize -import java.io.File +import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.base.TestBase +import java.io.File import org.apache.commons.io.FileUtils import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} import org.apache.spark.ml.param.{ByteArrayParam, Param, ParamMap, Params} diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/test/base/SparkSessionFactory.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/base/SparkSessionFactory.scala similarity index 97% rename from core/src/test/scala/com/microsoft/ml/spark/core/test/base/SparkSessionFactory.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/base/SparkSessionFactory.scala index 8658b3b600..e659e914e5 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/test/base/SparkSessionFactory.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/base/SparkSessionFactory.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.test.base +package com.microsoft.azure.synapse.ml.core.test.base import java.io.File diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/test/base/TestBase.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/base/TestBase.scala similarity index 98% rename from core/src/test/scala/com/microsoft/ml/spark/core/test/base/TestBase.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/base/TestBase.scala index 21efc75d3f..f97fa2fa15 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/test/base/TestBase.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/base/TestBase.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.test.base +package com.microsoft.azure.synapse.ml.core.test.base import java.nio.file.Files @@ -26,7 +26,7 @@ import scala.reflect.ClassTag object TestBase { // Run only on Linux - object LinuxOnly extends Tag("com.microsoft.ml.spark.test.tags.linuxonly") + object LinuxOnly extends Tag("com.microsoft.azure.synapse.ml.test.tags.linuxonly") def sc: SparkContext = spark.sparkContext def ssc: StreamingContext = new StreamingContext(sc, SparkSeconds(1)) diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/test/benchmarks/Benchmarks.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/benchmarks/Benchmarks.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/core/test/benchmarks/Benchmarks.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/benchmarks/Benchmarks.scala index 3f51ffaa9b..e37da57536 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/test/benchmarks/Benchmarks.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/benchmarks/Benchmarks.scala @@ -1,16 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.test.benchmarks +package com.microsoft.azure.synapse.ml.core.test.benchmarks -import java.io.{File, PrintWriter} - -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.{FileUtilities, StreamUtilities} -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.env.{FileUtilities, StreamUtilities} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.spark.sql.Row import org.scalatest.Assertion +import java.io.{File, PrintWriter} import scala.collection.mutable.ListBuffer case class Benchmark(name: String, diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/Fuzzing.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/Fuzzing.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/Fuzzing.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/Fuzzing.scala index 7c6540c886..1771734466 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/Fuzzing.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/Fuzzing.scala @@ -1,21 +1,21 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.test.fuzzing +package com.microsoft.azure.synapse.ml.core.test.fuzzing + +import com.microsoft.azure.synapse.ml.codegen.CodegenConfig +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.test.base.TestBase import java.io.File import java.nio.charset.StandardCharsets import java.nio.file.Files - -import com.microsoft.ml.spark.codegen.CodegenConfig -import com.microsoft.ml.spark.core.env.FileUtilities import org.apache.commons.io.FileUtils import org.apache.spark.ml._ import org.apache.spark.ml.param.{DataFrameEquality, ExternalPythonWrappableParam, ParamPair} import org.apache.spark.ml.util.{MLReadable, MLWritable} import org.apache.spark.sql.DataFrame -import com.microsoft.ml.spark.codegen.GenerationUtils._ -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.codegen.GenerationUtils._ /** * Class for holding test information, call by name to avoid uneccesary computations in test generations @@ -151,10 +151,10 @@ trait PyTestFuzzing[S <: PipelineStage] extends TestBase with DataFrameEquality val stage = pyTestObjects().head.stage val stageName = stage.getClass.getName.split(".".toCharArray).last val importPath = stage.getClass.getName.split(".".toCharArray).dropRight(1) - val importPathString = importPath.mkString(".").replaceAllLiterally("com.microsoft.ml.spark", "mmlspark") + val importPathString = importPath.mkString(".").replaceAllLiterally("com.microsoft.azure.synapse.ml", "synapseml") val testClass = s"""import unittest - |from mmlsparktest.spark import * + |from synapsemltest.spark import * |from $importPathString import $stageName |from os.path import join |import json @@ -165,7 +165,7 @@ trait PyTestFuzzing[S <: PipelineStage] extends TestBase with DataFrameEquality |class $testClassName(unittest.TestCase): | def assert_correspondence(self, model, name, num): | model.write().overwrite().save(join(test_data_dir, name)) - | sc._jvm.com.microsoft.ml.spark.core.utils.ModelEquality.assertEqual( + | sc._jvm.com.microsoft.azure.synapse.ml.core.utils.ModelEquality.assertEqual( | "${stage.getClass.getName}", | str(join(test_data_dir, name)), | str(join(test_data_dir, "model-{}.model".format(num))) @@ -179,7 +179,7 @@ trait PyTestFuzzing[S <: PipelineStage] extends TestBase with DataFrameEquality |""".stripMargin val testFolders = importPath.mkString(".") - .replaceAllLiterally("com.microsoft.ml.spark", "mmlsparktest").split(".".toCharArray) + .replaceAllLiterally("com.microsoft.azure.synapse.ml", "synapsemltest").split(".".toCharArray) val testDir = FileUtilities.join((Seq(conf.pyTestDir.toString) ++ testFolders.toSeq): _*) testDir.mkdirs() Files.write( diff --git a/core/src/test/scala/com/microsoft/ml/spark/core/utils/VerifyClusterUtil.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/utils/VerifyClusterUtil.scala similarity index 84% rename from core/src/test/scala/com/microsoft/ml/spark/core/utils/VerifyClusterUtil.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/core/utils/VerifyClusterUtil.scala index dd3c11ab62..303ad7d6e6 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/core/utils/VerifyClusterUtil.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/utils/VerifyClusterUtil.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.azure.synapse.ml.core.utils -import com.microsoft.ml.spark.core.test.base.{SparkSessionFactory, TestBase} +import com.microsoft.azure.synapse.ml.core.test.base.{SparkSessionFactory, TestBase} import org.slf4j.LoggerFactory class VerifyClusterUtil extends TestBase { diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/HasExplainTargetSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/HasExplainTargetSuite.scala similarity index 80% rename from core/src/test/scala/com/microsoft/ml/spark/explainers/split1/HasExplainTargetSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/HasExplainTargetSuite.scala index 6f23a386e1..1c9241c3d2 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/HasExplainTargetSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/HasExplainTargetSuite.scala @@ -1,10 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split1 +package com.microsoft.azure.synapse.ml.explainers.split1 -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.explainers.LocalExplainer +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.explainers.LocalExplainer +import com.microsoft.azure.synapse.ml.explainers.LocalExplainer.LIME import org.apache.spark.ml.linalg.{Vector, Vectors} class HasExplainTargetSuite extends TestBase { @@ -16,7 +17,7 @@ class HasExplainTargetSuite extends TestBase { ) toDF("label1", "label2", "label3", "targets") // array of Int - val target1 = LocalExplainer.LIME.vector + val target1 = LIME.vector .setTargetCol("label1") .extractTarget(df.schema, "targets") diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/KernelSHAPSamplerSupportSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/KernelSHAPSamplerSupportSuite.scala similarity index 93% rename from core/src/test/scala/com/microsoft/ml/spark/explainers/split1/KernelSHAPSamplerSupportSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/KernelSHAPSamplerSupportSuite.scala index 56ea081754..0d8fabe2fb 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/KernelSHAPSamplerSupportSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/KernelSHAPSamplerSupportSuite.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split1 +package com.microsoft.azure.synapse.ml.explainers.split1 import breeze.linalg.sum -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.explainers.KernelSHAPSamplerSupport +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.explainers.KernelSHAPSamplerSupport import org.scalatest.Matchers._ class KernelSHAPSamplerSupportSuite extends TestBase { diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/LassoRegressionSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/LassoRegressionSuite.scala similarity index 96% rename from core/src/test/scala/com/microsoft/ml/spark/explainers/split1/LassoRegressionSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/LassoRegressionSuite.scala index f1a2118685..eda15bf18e 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/LassoRegressionSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/LassoRegressionSuite.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split1 +package com.microsoft.azure.synapse.ml.explainers.split1 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.explainers.LassoRegression +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.explainers.LassoRegression import org.scalactic.{Equality, TolerantNumerics} class LassoRegressionSuite extends TestBase { diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/LeastSquaresRegressionSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/LeastSquaresRegressionSuite.scala similarity index 96% rename from core/src/test/scala/com/microsoft/ml/spark/explainers/split1/LeastSquaresRegressionSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/LeastSquaresRegressionSuite.scala index 5bf13ca533..7e5b48500b 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/LeastSquaresRegressionSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/LeastSquaresRegressionSuite.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split1 +package com.microsoft.azure.synapse.ml.explainers.split1 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.explainers.LeastSquaresRegression +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.explainers.LeastSquaresRegression import org.scalactic.{Equality, TolerantNumerics} class LeastSquaresRegressionSuite extends TestBase { diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/SamplerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/SamplerSuite.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/explainers/split1/SamplerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/SamplerSuite.scala index 1716380a2f..799f192517 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/SamplerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/SamplerSuite.scala @@ -1,24 +1,24 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split1 +package com.microsoft.azure.synapse.ml.explainers.split1 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} import breeze.stats.distributions.RandBasis import breeze.stats.{mean, stddev} -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.explainers._ -import com.microsoft.ml.spark.io.image.ImageUtils -import com.microsoft.ml.spark.lime.{Superpixel, SuperpixelData} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.explainers._ +import com.microsoft.azure.synapse.ml.io.image.ImageUtils +import com.microsoft.azure.synapse.ml.lime.{Superpixel, SuperpixelData} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types._ import org.scalactic.{Equality, TolerantNumerics} import org.scalatest.Matchers._ -import java.nio.file.{Files, Paths} -import com.microsoft.ml.spark.core.test.base.TestBase +import java.nio.file.{Files, Paths} import javax.imageio.ImageIO class SamplerSuite extends TestBase { @@ -157,8 +157,8 @@ class SamplerSuite extends TestBase { // Uncomment the following lines lines to view the randomly masked image. // Change the RandBasis seed to see a different mask image. - // import com.microsoft.ml.spark.io.image.ImageUtils - // import com.microsoft.ml.spark.lime.Superpixel + // import com.microsoft.azure.synapse.ml.io.image.ImageUtils + // import com.microsoft.azure.synapse.ml.lime.Superpixel // val maskedImage = ImageUtils.toBufferedImage(data, width, height, nChannels) // Superpixel.displayImage(maskedImage) // Thread.sleep(100000) @@ -277,8 +277,8 @@ class SamplerSuite extends TestBase { // Uncomment the following lines lines to view the randomly masked image. // Change the RandBasis seed to see a different mask image. - // import com.microsoft.ml.spark.io.image.ImageUtils - // import com.microsoft.ml.spark.lime.Superpixel + // import com.microsoft.azure.synapse.ml.io.image.ImageUtils + // import com.microsoft.azure.synapse.ml.lime.Superpixel // val maskedImage = ImageUtils.toBufferedImage(data, width, height, nChannels) // Superpixel.displayImage(maskedImage) // Thread.sleep(100000) diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/TabularLIMEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/TabularLIMEExplainerSuite.scala similarity index 93% rename from core/src/test/scala/com/microsoft/ml/spark/explainers/split1/TabularLIMEExplainerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/TabularLIMEExplainerSuite.scala index 76b2181399..fbf9e48d19 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/TabularLIMEExplainerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/TabularLIMEExplainerSuite.scala @@ -1,13 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split1 +package com.microsoft.azure.synapse.ml.explainers.split1 import breeze.linalg.{DenseVector => BDV} -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.explainers.{LocalExplainer, TabularLIME} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.explainers.LocalExplainer.LIME +import com.microsoft.azure.synapse.ml.explainers.{LocalExplainer, TabularLIME} import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel} import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer, VectorAssembler} import org.apache.spark.ml.linalg.Vector @@ -46,7 +47,7 @@ class TabularLIMEExplainerSuite extends TestBase Tuple1(0.0) ) toDF "col1" - val lime: TabularLIME = LocalExplainer.LIME.tabular + val lime: TabularLIME = LIME.tabular .setInputCols(Array("col1")) .setOutputCol("weights") .setBackgroundData(data) diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/TabularSHAPExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/TabularSHAPExplainerSuite.scala similarity index 87% rename from core/src/test/scala/com/microsoft/ml/spark/explainers/split1/TabularSHAPExplainerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/TabularSHAPExplainerSuite.scala index 00ae5d11ba..e1fc630f2e 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/TabularSHAPExplainerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/TabularSHAPExplainerSuite.scala @@ -1,12 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split1 +package com.microsoft.azure.synapse.ml.explainers.split1 -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.explainers.{LocalExplainer, TabularSHAP} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.explainers.LocalExplainer.KernelSHAP +import com.microsoft.azure.synapse.ml.explainers.TabularSHAP import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel} import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer, VectorAssembler} import org.apache.spark.ml.linalg.Vector @@ -41,7 +42,7 @@ class TabularSHAPExplainerSuite extends TestBase val model: PipelineModel = pipeline.fit(data) - val kernelShap: TabularSHAP = LocalExplainer.KernelSHAP.tabular + val kernelShap: TabularSHAP = KernelSHAP.tabular .setInputCols(Array("col1", "col2", "col3")) .setOutputCol("shapValues") .setBackgroundData(data) diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/TextExplainersSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/TextExplainersSuite.scala similarity index 88% rename from core/src/test/scala/com/microsoft/ml/spark/explainers/split1/TextExplainersSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/TextExplainersSuite.scala index 4906b7ec54..1f9bec49ee 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/TextExplainersSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/TextExplainersSuite.scala @@ -1,12 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split1 +package com.microsoft.azure.synapse.ml.explainers.split1 -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.explainers.{LocalExplainer, TextLIME, TextSHAP} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.explainers.LocalExplainer.{KernelSHAP, LIME} +import com.microsoft.azure.synapse.ml.explainers.{TextLIME, TextSHAP} import org.apache.spark.ml.classification.LogisticRegression import org.apache.spark.ml.feature.{HashingTF, Tokenizer} import org.apache.spark.ml.linalg.Vector @@ -43,7 +44,7 @@ abstract class TextExplainersSuite extends TestBase { textClassifier.fit(df) } - val shap: TextSHAP = LocalExplainer.KernelSHAP.text + val shap: TextSHAP = KernelSHAP.text .setModel(model) .setInputCol("text") .setTargetCol("prob") @@ -52,7 +53,7 @@ abstract class TextExplainersSuite extends TestBase { .setTokensCol("tokens") .setNumSamples(1000) - val lime: TextLIME = LocalExplainer.LIME.text + val lime: TextLIME = LIME.text .setModel(model) .setInputCol("text") .setTargetCol("prob") diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/VectorLIMEExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/VectorLIMEExplainerSuite.scala similarity index 82% rename from core/src/test/scala/com/microsoft/ml/spark/explainers/split1/VectorLIMEExplainerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/VectorLIMEExplainerSuite.scala index a47655ec4c..d40dfc9c09 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/VectorLIMEExplainerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/VectorLIMEExplainerSuite.scala @@ -1,14 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split1 +package com.microsoft.azure.synapse.ml.explainers.split1 import breeze.linalg.{*, DenseMatrix => BDM, DenseVector => BDV} import breeze.stats.distributions.Rand -import com.microsoft.ml.spark.core.test.base.{Flaky, TestBase} -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.explainers.{LocalExplainer, VectorLIME} +import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.explainers.LocalExplainer.LIME +import com.microsoft.azure.synapse.ml.explainers.VectorLIME import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel} import org.apache.spark.ml.util.MLReadable @@ -43,7 +44,7 @@ class VectorLIMEExplainerSuite extends TestBase with Flaky val model: LinearRegressionModel = new LinearRegression().fit(df) - val lime: VectorLIME = LocalExplainer.LIME.vector + val lime: VectorLIME = LIME.vector .setModel(model) .setBackgroundData(df) .setInputCol("features") diff --git a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/VectorSHAPExplainerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/VectorSHAPExplainerSuite.scala similarity index 90% rename from core/src/test/scala/com/microsoft/ml/spark/explainers/split1/VectorSHAPExplainerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/VectorSHAPExplainerSuite.scala index 9c1bc6a066..4662c3ded0 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/explainers/split1/VectorSHAPExplainerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split1/VectorSHAPExplainerSuite.scala @@ -1,15 +1,16 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split1 +package com.microsoft.azure.synapse.ml.explainers.split1 import breeze.linalg.{*, DenseMatrix => BDM, DenseVector => BDV} import breeze.stats.distributions.RandBasis -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.explainers.{LocalExplainer, VectorSHAP} -import com.microsoft.ml.spark.stages.UDFTransformer +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.explainers.LocalExplainer.KernelSHAP +import com.microsoft.azure.synapse.ml.explainers.{LocalExplainer, VectorSHAP} +import com.microsoft.azure.synapse.ml.stages.UDFTransformer import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel} import org.apache.spark.ml.feature.VectorAssembler @@ -51,7 +52,7 @@ class VectorSHAPExplainerSuite extends TestBase Tuple1(Vectors.dense(1d, 1d, 1d, 1d, 1d)) ) toDF "features" - val kernelShap: VectorSHAP = LocalExplainer.KernelSHAP.vector + val kernelShap: VectorSHAP = KernelSHAP.vector .setInputCol("features") .setOutputCol("shapValues") .setBackgroundData(data) diff --git a/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCleanMissingData.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyCleanMissingData.scala similarity index 97% rename from core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCleanMissingData.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyCleanMissingData.scala index 1ba983945e..7063853462 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCleanMissingData.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyCleanMissingData.scala @@ -1,15 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize +package com.microsoft.azure.synapse.ml.featurize -import java.lang.{Boolean => JBoolean, Double => JDouble, Integer => JInt} - -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame +import java.lang.{Boolean => JBoolean, Double => JDouble, Integer => JInt} + /** Tests to validate the functionality of Clean Missing Data estimator. */ class VerifyCleanMissingData extends TestBase with EstimatorFuzzing[CleanMissingData] { diff --git a/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCountSelector.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyCountSelector.scala similarity index 88% rename from core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCountSelector.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyCountSelector.scala index d8054d8166..7384c7a673 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCountSelector.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyCountSelector.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize +package com.microsoft.azure.synapse.ml.featurize -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vectors} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql._ diff --git a/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyDataConversion.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyDataConversion.scala similarity index 98% rename from core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyDataConversion.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyDataConversion.scala index bf44973b20..b45d95a8f1 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyDataConversion.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyDataConversion.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize +package com.microsoft.azure.synapse.ml.featurize -import java.sql.Timestamp +import com.microsoft.azure.synapse.ml.core.schema.SparkSchema +import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.ml.spark.core.schema.SparkSchema -import com.microsoft.ml.spark.core.test.base.TestBase +import java.sql.Timestamp import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ diff --git a/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyFeaturize.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyFeaturize.scala similarity index 98% rename from core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyFeaturize.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyFeaturize.scala index 72168f2bad..f7f76cb949 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyFeaturize.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyFeaturize.scala @@ -1,15 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize +package com.microsoft.azure.synapse.ml.featurize -import java.io.File -import java.nio.file.Files -import java.sql.{Date, Timestamp} -import java.util.GregorianCalendar - -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} import org.apache.commons.io.FileUtils import org.apache.spark.ml.PipelineModel import org.apache.spark.ml.feature.StringIndexer @@ -17,6 +12,11 @@ import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql._ +import java.io.File +import java.nio.file.Files +import java.sql.{Date, Timestamp} +import java.util.GregorianCalendar + class VerifyFeaturize extends TestBase with EstimatorFuzzing[Featurize] { val mockLabelColumn = "Label" diff --git a/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyValueIndexer.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyValueIndexer.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyValueIndexer.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyValueIndexer.scala index 931e35960d..62a7368510 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyValueIndexer.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/VerifyValueIndexer.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize +package com.microsoft.azure.synapse.ml.featurize -import com.microsoft.ml.spark.core.schema.{CategoricalColumnInfo, CategoricalUtilities, SparkSchema} -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.schema.{CategoricalColumnInfo, CategoricalUtilities, SparkSchema} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.Row diff --git a/core/src/test/scala/com/microsoft/ml/spark/featurize/text/MultiNGramSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/text/MultiNGramSpec.scala similarity index 88% rename from core/src/test/scala/com/microsoft/ml/spark/featurize/text/MultiNGramSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/text/MultiNGramSpec.scala index 0586d7c818..aeddd2e644 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/featurize/text/MultiNGramSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/text/MultiNGramSpec.scala @@ -1,14 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize.text +package com.microsoft.azure.synapse.ml.featurize.text -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.feature.Tokenizer import org.apache.spark.ml.util.MLReadable -import scala.collection.mutable - class MultiNGramSpec extends TransformerFuzzing[MultiNGram] { lazy val dfRaw = spark diff --git a/core/src/test/scala/com/microsoft/ml/spark/featurize/text/PageSplitterSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/text/PageSplitterSpec.scala similarity index 90% rename from core/src/test/scala/com/microsoft/ml/spark/featurize/text/PageSplitterSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/text/PageSplitterSpec.scala index 48d271ce57..9d7cf8b800 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/featurize/text/PageSplitterSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/text/PageSplitterSpec.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize.text +package com.microsoft.azure.synapse.ml.featurize.text -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable class PageSplitterSpec extends TransformerFuzzing[PageSplitter] { diff --git a/core/src/test/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizerSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/text/TextFeaturizerSpec.scala similarity index 93% rename from core/src/test/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizerSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/text/TextFeaturizerSpec.scala index fb61bdd05e..1494c5d813 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizerSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/featurize/text/TextFeaturizerSpec.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.featurize.text +package com.microsoft.azure.synapse.ml.featurize.text -import com.microsoft.ml.spark.core.schema.DatasetExtensions._ -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions._ +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} import org.apache.spark.ml.PipelineModel import org.apache.spark.ml.feature.{NGram, Tokenizer} import org.apache.spark.ml.util.MLReadable diff --git a/core/src/test/scala/com/microsoft/ml/spark/flaky/PartitionConsolidatorSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/flaky/PartitionConsolidatorSuite.scala similarity index 89% rename from core/src/test/scala/com/microsoft/ml/spark/flaky/PartitionConsolidatorSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/flaky/PartitionConsolidatorSuite.scala index 9c014e715a..acbc5e3e12 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/flaky/PartitionConsolidatorSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/flaky/PartitionConsolidatorSuite.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.flaky +package com.microsoft.azure.synapse.ml.flaky -import com.microsoft.ml.spark.core.test.base.{TestBase, TimeLimitedFlaky} -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.stages.PartitionConsolidator +import com.microsoft.azure.synapse.ml.core.test.base.{TestBase, TimeLimitedFlaky} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.stages.PartitionConsolidator import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.types.{DoubleType, StructType} diff --git a/core/src/test/scala/com/microsoft/ml/spark/image/ImageTestUtils.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/image/ImageTestUtils.scala similarity index 93% rename from core/src/test/scala/com/microsoft/ml/spark/image/ImageTestUtils.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/image/ImageTestUtils.scala index 63dbea6257..c93619ab5e 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/image/ImageTestUtils.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/image/ImageTestUtils.scala @@ -1,17 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.image +package com.microsoft.azure.synapse.ml.image + +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.test.base.TestBase import java.io.File import java.net.URL - -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.sql.{DataFrame, SparkSession} -import com.microsoft.ml.spark.io.IOImplicits.dfrToDfre +import com.microsoft.azure.synapse.ml.io.IOImplicits.dfrToDfre import org.apache.commons.io.FileUtils import org.apache.spark.sql.functions.col diff --git a/core/src/test/scala/com/microsoft/ml/spark/io/split1/BinaryFileReaderSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/BinaryFileReaderSuite.scala similarity index 90% rename from core/src/test/scala/com/microsoft/ml/spark/io/split1/BinaryFileReaderSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/BinaryFileReaderSuite.scala index f64c08c10b..df4524cf82 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/io/split1/BinaryFileReaderSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/BinaryFileReaderSuite.scala @@ -1,22 +1,21 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.split1 +package com.microsoft.azure.synapse.ml.io.split1 + +import com.microsoft.azure.synapse.ml.core.schema.BinaryFileSchema +import com.microsoft.azure.synapse.ml.core.test.base.TestBase import java.io.{File, FileOutputStream} import java.net.URI - -import com.microsoft.ml.spark.Binary.implicits._ -import com.microsoft.ml.spark.BinaryFileReader -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.env.FileUtilities.zipFolder -import com.microsoft.ml.spark.core.schema.BinaryFileSchema -import com.microsoft.ml.spark.core.schema.BinaryFileSchema.isBinaryFile -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.io.binary.Binary.implicits._ +import com.microsoft.azure.synapse.ml.build.BuildInfo +import com.microsoft.azure.synapse.ml.core.env.FileUtilities.zipFolder +import BinaryFileSchema.isBinaryFile +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.io.binary.{BinaryFileFormat, BinaryFileReader} import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.hadoop.fs.Path -import org.apache.spark.binary.BinaryFileFormat import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.param.DataFrameEquality import org.apache.spark.sql.functions.{col, udf} diff --git a/core/src/test/scala/com/microsoft/ml/spark/io/split1/HTTPTransformerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/HTTPTransformerSuite.scala similarity index 90% rename from core/src/test/scala/com/microsoft/ml/spark/io/split1/HTTPTransformerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/HTTPTransformerSuite.scala index f98cd70ba6..6c30e3538c 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/io/split1/HTTPTransformerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/HTTPTransformerSuite.scala @@ -1,21 +1,21 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.split1 +package com.microsoft.azure.synapse.ml.io.split1 -import java.net.{InetSocketAddress, ServerSocket} -import java.util.concurrent.Executors - -import com.microsoft.ml.spark.core.env.StreamUtilities -import com.microsoft.ml.spark.core.env.StreamUtilities.using -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.io.http.HTTPTransformer +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities.using +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.io.http.HTTPTransformer import com.sun.net.httpserver.{HttpExchange, HttpHandler, HttpServer} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.{DataFrame, Dataset} import org.scalactic.Equality +import java.net.{InetSocketAddress, ServerSocket} +import java.util.concurrent.Executors + object ServerUtils { private def respond(request: HttpExchange, code: Int, response: String): Unit = synchronized { val bytes = response.getBytes("UTF-8") diff --git a/core/src/test/scala/com/microsoft/ml/spark/io/split1/ImageReaderSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/ImageReaderSuite.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/io/split1/ImageReaderSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/ImageReaderSuite.scala index b611ef5158..490459ab13 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/io/split1/ImageReaderSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/ImageReaderSuite.scala @@ -1,15 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.split1 +package com.microsoft.azure.synapse.ml.io.split1 -import java.io.{File, FileInputStream} +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.schema.ImageSchemaUtils +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.utils.OsUtils +import com.microsoft.azure.synapse.ml.io.image.ImageUtils -import com.microsoft.ml.spark.core.utils.OsUtils -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.schema.ImageSchemaUtils -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.io.image.ImageUtils +import java.io.{File, FileInputStream} import org.apache.commons.codec.binary.Base64 import org.apache.commons.io.IOUtils import org.apache.spark.injections.UDFUtils diff --git a/core/src/test/scala/com/microsoft/ml/spark/io/split1/ParserSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/ParserSuite.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/io/split1/ParserSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/ParserSuite.scala index 47da5c5c86..4445dace26 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/io/split1/ParserSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/ParserSuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.split1 +package com.microsoft.azure.synapse.ml.io.split1 -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.io.http._ +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.io.http._ import org.apache.http.client.methods.HttpPost import org.apache.spark.ml.Transformer import org.apache.spark.ml.util.MLReadable diff --git a/core/src/test/scala/com/microsoft/ml/spark/io/split1/PowerBiSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/PowerBiSuite.scala similarity index 91% rename from core/src/test/scala/com/microsoft/ml/spark/io/split1/PowerBiSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/PowerBiSuite.scala index ec60e547f1..0bef1b9f4d 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/io/split1/PowerBiSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/PowerBiSuite.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.split1 +package com.microsoft.azure.synapse.ml.io.split1 -import java.io.File +import com.microsoft.azure.synapse.ml.Secrets +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.io.powerbi.PowerBIWriter -import com.microsoft.ml.spark.Secrets -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.io.powerbi.PowerBIWriter +import java.io.File import org.apache.spark.SparkException import org.apache.spark.sql.{DataFrame, Dataset, Row} import org.apache.spark.sql.catalyst.encoders.RowEncoder diff --git a/core/src/test/scala/com/microsoft/ml/spark/io/split1/SimpleHTTPTransformerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/SimpleHTTPTransformerSuite.scala similarity index 90% rename from core/src/test/scala/com/microsoft/ml/spark/io/split1/SimpleHTTPTransformerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/SimpleHTTPTransformerSuite.scala index 0acdaae314..13a674547b 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/io/split1/SimpleHTTPTransformerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split1/SimpleHTTPTransformerSuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.split1 +package com.microsoft.azure.synapse.ml.io.split1 -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.io.http.{HandlingUtils, JSONOutputParser, SimpleHTTPTransformer} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.io.http.{HandlingUtils, JSONOutputParser, SimpleHTTPTransformer} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.{StringType, StructType} diff --git a/core/src/test/scala/com/microsoft/ml/spark/io/split2/ContinuousHTTPSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/ContinuousHTTPSuite.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/io/split2/ContinuousHTTPSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/ContinuousHTTPSuite.scala index 40cf393619..ab1c5531f4 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/io/split2/ContinuousHTTPSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/ContinuousHTTPSuite.scala @@ -1,18 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.split2 +package com.microsoft.azure.synapse.ml.io.split2 -import java.io.File -import java.util.UUID - -import com.microsoft.ml.spark.core.test.base.{Flaky, TestBase} -import com.microsoft.ml.spark.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase} +import com.microsoft.azure.synapse.ml.io.IOImplicits._ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.{col, length} import org.apache.spark.sql.streaming.{DataStreamReader, StreamingQuery, Trigger} import org.apache.spark.sql.types.BinaryType +import java.io.File +import java.util.UUID import scala.concurrent.Await // scalastyle:off magic.number diff --git a/core/src/test/scala/com/microsoft/ml/spark/io/split2/DistributedHTTPSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/DistributedHTTPSuite.scala similarity index 96% rename from core/src/test/scala/com/microsoft/ml/spark/io/split2/DistributedHTTPSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/DistributedHTTPSuite.scala index d5d106315b..a8526841b8 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/io/split2/DistributedHTTPSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/DistributedHTTPSuite.scala @@ -1,19 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.split2 - -import java.io.File -import java.util.UUID -import java.util.concurrent.{Executors, TimeUnit, TimeoutException} - -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.test.base.{Flaky, TestBase} -import com.microsoft.ml.spark.io.IOImplicits._ -import com.microsoft.ml.spark.io.http.HTTPSchema.string_to_response -import com.microsoft.ml.spark.io.http.SharedSingleton -import com.microsoft.ml.spark.io.split1.WithFreeUrl +package com.microsoft.azure.synapse.ml.io.split2 + +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase} +import com.microsoft.azure.synapse.ml.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.io.http.HTTPSchema.string_to_response +import com.microsoft.azure.synapse.ml.io.http.SharedSingleton +import com.microsoft.azure.synapse.ml.io.split1.WithFreeUrl +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.HttpPost @@ -26,6 +22,9 @@ import org.apache.spark.sql.streaming.{DataStreamReader, DataStreamWriter, Strea import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Row} +import java.io.File +import java.util.UUID +import java.util.concurrent.{Executors, TimeUnit, TimeoutException} import scala.concurrent.duration.Duration import scala.concurrent.{Await, ExecutionContext, Future} import scala.util.parsing.json.JSONObject diff --git a/core/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/HTTPSuite.scala similarity index 89% rename from core/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/HTTPSuite.scala index 6f9891cfe6..541a258572 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/HTTPSuite.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.split2 +package com.microsoft.azure.synapse.ml.io.split2 -import java.io.File +import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.io.http.HTTPSchema.string_to_response +import java.io.File +import com.microsoft.azure.synapse.ml.io.http.HTTPSchema.string_to_response import org.apache.http.impl.client.HttpClientBuilder import org.apache.spark.sql.execution.streaming.{HTTPSinkProvider, HTTPSourceProvider} import org.apache.spark.sql.functions.col diff --git a/core/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPv2Suite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/HTTPv2Suite.scala similarity index 98% rename from core/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPv2Suite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/HTTPv2Suite.scala index e1fc47bc98..d6b79e75ca 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPv2Suite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/io/split2/HTTPv2Suite.scala @@ -1,13 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.io.split2 +package com.microsoft.azure.synapse.ml.io.split2 -import java.io.File -import java.util.UUID - -import com.microsoft.ml.spark.core.test.base.{Flaky, SparkSessionFactory, TestBase} -import com.microsoft.ml.spark.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, SparkSessionFactory, TestBase} +import com.microsoft.azure.synapse.ml.io.IOImplicits._ import org.apache.http.client.config.RequestConfig import org.apache.http.impl.client.{CloseableHttpClient, HttpClientBuilder} import org.apache.spark.injections.UDFUtils @@ -18,6 +15,8 @@ import org.apache.spark.sql.streaming.{DataStreamWriter, Trigger} import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Row, SparkSession} +import java.io.File +import java.util.UUID import scala.concurrent.{Await, Future} import scala.util.Try diff --git a/core/src/test/scala/com/microsoft/ml/spark/isolationforest/VerifyIsolationForest.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/isolationforest/VerifyIsolationForest.scala similarity index 84% rename from core/src/test/scala/com/microsoft/ml/spark/isolationforest/VerifyIsolationForest.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/isolationforest/VerifyIsolationForest.scala index 2ee5fd153e..0717d62c45 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/isolationforest/VerifyIsolationForest.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/isolationforest/VerifyIsolationForest.scala @@ -1,21 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.isolationforest - -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.metrics.MetricConstants -import com.microsoft.ml.spark.core.test.benchmarks.Benchmarks -import org.apache.spark.ml.util.MLReadable -import org.apache.spark.sql.{DataFrame, Dataset, Encoders, Row} -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} -import com.microsoft.ml.spark.stages.UDFTransformer +package com.microsoft.azure.synapse.ml.isolationforest + +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.metrics.MetricConstants +import com.microsoft.azure.synapse.ml.core.test.benchmarks.Benchmarks +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.train.ComputeModelStatistics +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.spark.ml.feature.VectorAssembler import org.apache.spark.ml.linalg.Vector -import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics +import org.apache.spark.ml.util.MLReadable +import org.apache.spark.sql.{DataFrame, Encoders} import org.scalactic.Tolerance._ -import com.microsoft.ml.spark.train.ComputeModelStatistics case class MammographyRecord(feature0: Double, feature1: Double, feature2: Double, feature3: Double, feature4: Double, feature5: Double, label: Double) diff --git a/core/src/test/scala/com/microsoft/ml/spark/lime/LIMESuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/lime/LIMESuite.scala similarity index 80% rename from core/src/test/scala/com/microsoft/ml/spark/lime/LIMESuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/lime/LIMESuite.scala index deb87ee43c..1be5631157 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/lime/LIMESuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/lime/LIMESuite.scala @@ -1,18 +1,18 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lime +package com.microsoft.azure.synapse.ml.lime import breeze.linalg.{*, DenseMatrix} import breeze.stats.distributions.Rand -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.ml.param.DataFrameEquality import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.ml.util.MLReadable -@deprecated("Please use 'com.microsoft.ml.spark.explainers.VectorLIME'.", since="1.0.0-rc3") +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.VectorLIME'.", since="1.0.0-rc3") trait LimeTestBase extends TestBase { import spark.implicits._ @@ -42,7 +42,7 @@ trait LimeTestBase extends TestBase { lazy val limeModel = lime.fit(df) } -@deprecated("Please use 'com.microsoft.ml.spark.explainers.TabularLIME'.", since="1.0.0-rc3") +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.TabularLIME'.", since="1.0.0-rc3") class TabularLIMESuite extends EstimatorFuzzing[TabularLIME] with DataFrameEquality with LimeTestBase { @@ -59,7 +59,7 @@ class TabularLIMESuite extends EstimatorFuzzing[TabularLIME] with override def modelReader: MLReadable[_] = TabularLIMEModel } -@deprecated("Please use 'com.microsoft.ml.spark.explainers.TextLIME'.", since="1.0.0-rc3") +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.TextLIME'.", since="1.0.0-rc3") class TabularLIMEModelSuite extends TransformerFuzzing[TabularLIMEModel] with DataFrameEquality with LimeTestBase { diff --git a/core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/lime/SuperpixelSuite.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/lime/SuperpixelSuite.scala index 289720f969..d53522a3ab 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/lime/SuperpixelSuite.scala @@ -1,16 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lime +package com.microsoft.azure.synapse.ml.lime + +import com.microsoft.azure.synapse.ml.image.ImageTestUtils +import com.microsoft.azure.synapse.ml.io.image.ImageUtils import java.awt.Color import java.awt.image.BufferedImage import java.io.File - -import com.microsoft.ml.spark.image.ImageTestUtils -import com.microsoft.ml.spark.io.image.ImageUtils import javax.imageio.ImageIO - import scala.util.Random class SuperpixelSuite extends ImageTestUtils { diff --git a/core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelTransformerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/lime/SuperpixelTransformerSuite.scala similarity index 76% rename from core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelTransformerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/lime/SuperpixelTransformerSuite.scala index 0c4a5b78d0..967f92b46b 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelTransformerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/lime/SuperpixelTransformerSuite.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lime +package com.microsoft.azure.synapse.ml.lime -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.image.ImageTestUtils -import com.microsoft.ml.spark.io.split1.FileReaderUtils +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.image.ImageTestUtils +import com.microsoft.azure.synapse.ml.io.split1.FileReaderUtils import org.apache.spark.ml.util.MLReadable class SuperpixelTransformerSuite extends TransformerFuzzing[SuperpixelTransformer] diff --git a/core/src/test/scala/com/microsoft/ml/spark/lime/TextLIMESuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/lime/TextLIMESuite.scala similarity index 89% rename from core/src/test/scala/com/microsoft/ml/spark/lime/TextLIMESuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/lime/TextLIMESuite.scala index 1151637b13..565d32e556 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/lime/TextLIMESuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/lime/TextLIMESuite.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lime +package com.microsoft.azure.synapse.ml.lime -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.stages.UDFTransformer -import com.microsoft.ml.spark.stages.udfs.get_value_udf +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.stages.UDFTransformer +import com.microsoft.azure.synapse.ml.stages.udfs.get_value_udf import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.classification.LogisticRegression import org.apache.spark.ml.feature.{HashingTF, Tokenizer} @@ -15,9 +15,8 @@ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.DoubleType import org.scalactic.Equality -import org.scalatest.Assertion -@deprecated("Please use 'com.microsoft.ml.spark.explainers.TextLIME'.", since="1.0.0-rc3") +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.TextLIME'.", since="1.0.0-rc3") class TextLIMESuite extends TransformerFuzzing[TextLIME] { import spark.implicits._ diff --git a/core/src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksTests.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksTests.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksTests.scala index 32945f93b8..63c88ac131 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksTests.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nbtest +package com.microsoft.azure.synapse.ml.nbtest -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.nbtest.DatabricksUtilities._ +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import DatabricksUtilities._ import java.util.concurrent.TimeUnit import scala.collection.mutable diff --git a/core/src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksUtilities.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala index 7f0d95f418..119b7a581f 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/DatabricksUtilities.scala @@ -1,18 +1,18 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nbtest +package com.microsoft.azure.synapse.ml.nbtest + +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.io.split2.HasHttpClient import java.io.{File, FileInputStream} import java.time.LocalDateTime import java.util.concurrent.TimeoutException - -import com.microsoft.ml.spark.nbtest.SprayImplicits._ -import com.microsoft.ml.spark.Secrets -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.env.StreamUtilities._ -import com.microsoft.ml.spark.io.split2.HasHttpClient +import SprayImplicits._ +import com.microsoft.azure.synapse.ml.Secrets +import com.microsoft.azure.synapse.ml.build.BuildInfo +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities._ import org.apache.commons.io.IOUtils import org.apache.http.client.methods.{HttpGet, HttpPost} import org.apache.http.entity.StringEntity @@ -39,10 +39,10 @@ object DatabricksUtilities extends HasHttpClient { lazy val PoolId: String = getPoolIdByName(PoolName) lazy val ClusterName = s"mmlspark-build-${LocalDateTime.now()}" - val Folder = s"/MMLSparkBuild/build_${BuildInfo.version}" + val Folder = s"/SynapseMLBuild/build_${BuildInfo.version}" - // MMLSpark info - val Version = s"com.microsoft.ml.spark:mmlspark:${BuildInfo.version}" + // SynapseML info + val Version = s"com.microsoft.azure:synapseml:${BuildInfo.version}" val Repository = "https://mmlspark.azureedge.net/maven" val Libraries: String = List( diff --git a/core/src/test/scala/com/microsoft/ml/spark/nbtest/SprayUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SprayUtilities.scala similarity index 97% rename from core/src/test/scala/com/microsoft/ml/spark/nbtest/SprayUtilities.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SprayUtilities.scala index 5014a0be51..5f20fb8815 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/nbtest/SprayUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SprayUtilities.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nbtest +package com.microsoft.azure.synapse.ml.nbtest import spray.json.{JsArray, JsObject, JsValue, JsonFormat} diff --git a/core/src/test/scala/com/microsoft/ml/spark/nbtest/SynapseTests.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseTests.scala similarity index 87% rename from core/src/test/scala/com/microsoft/ml/spark/nbtest/SynapseTests.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseTests.scala index d687862ac8..54009bfdf5 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/nbtest/SynapseTests.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseTests.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nbtest +package com.microsoft.azure.synapse.ml.nbtest -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.nbtest.SynapseUtilities.exec +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import SynapseUtilities.exec import java.io.File import java.util.concurrent.TimeUnit @@ -21,9 +21,9 @@ class SynapseTests extends TestBase { val os = sys.props("os.name").toLowerCase os match { case x if x contains "windows" => - exec("conda activate mmlspark && jupyter nbconvert --to script .\\notebooks\\*.ipynb") + exec("conda activate synapseml && jupyter nbconvert --to script .\\notebooks\\*.ipynb") case _ => - Process(s"conda init bash; conda activate mmlspark; jupyter nbconvert --to script ./notebooks/*.ipynb") + Process(s"conda init bash; conda activate synapseml; jupyter nbconvert --to script ./notebooks/*.ipynb") } SynapseUtilities.listPythonFiles().map(f => { diff --git a/core/src/test/scala/com/microsoft/ml/spark/nbtest/SynapseUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala similarity index 96% rename from core/src/test/scala/com/microsoft/ml/spark/nbtest/SynapseUtilities.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala index d61c5e3598..580435161f 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/nbtest/SynapseUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala @@ -1,11 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nbtest +package com.microsoft.azure.synapse.ml.nbtest -import com.microsoft.ml.spark.Secrets -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.Secrets +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.io.split2.HasHttpClient +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.commons.io.IOUtils import org.apache.http.client.entity.UrlEncodedFormEntity import org.apache.http.client.methods.{HttpDelete, HttpGet, HttpPost} @@ -18,7 +19,6 @@ import org.json4s.jackson.Serialization.write import org.json4s.{Formats, NoTypeHints} import spray.json.DefaultJsonProtocol._ import spray.json._ -import com.microsoft.ml.spark.io.split2.HasHttpClient import java.io.{File, InputStream} import java.util @@ -257,7 +257,7 @@ object SynapseUtilities extends HasHttpClient { | "numExecutors" : 2, | "conf" : | { - | "spark.jars.packages" : "com.microsoft.ml.spark:mmlspark:${BuildInfo.version}", + | "spark.jars.packages" : "com.microsoft.azure:synapseml:${BuildInfo.version}", | "spark.jars.repositories" : "https://mmlspark.azureedge.net/maven", | "spark.jars.excludes": "$excludes", | "spark.driver.userClassPathFirst": "true", diff --git a/core/src/test/scala/com/microsoft/ml/spark/nn/BallTreeTest.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nn/BallTreeTest.scala similarity index 97% rename from core/src/test/scala/com/microsoft/ml/spark/nn/BallTreeTest.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/nn/BallTreeTest.scala index e389d54aa2..3edaab68de 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/nn/BallTreeTest.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nn/BallTreeTest.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nn +package com.microsoft.azure.synapse.ml.nn import breeze.linalg.DenseVector -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase import org.apache.spark.ml.linalg.{DenseVector => SDV} import org.apache.spark.sql.functions.lit diff --git a/core/src/test/scala/com/microsoft/ml/spark/nn/ConditionalBallTreeTest.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nn/ConditionalBallTreeTest.scala similarity index 97% rename from core/src/test/scala/com/microsoft/ml/spark/nn/ConditionalBallTreeTest.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/nn/ConditionalBallTreeTest.scala index 30afd235bd..444b6815eb 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/nn/ConditionalBallTreeTest.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nn/ConditionalBallTreeTest.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nn +package com.microsoft.azure.synapse.ml.nn import breeze.linalg.DenseVector -import com.microsoft.ml.spark.core.test.benchmarks.Benchmarks +import com.microsoft.azure.synapse.ml.core.test.benchmarks.Benchmarks import scala.collection.mutable import scala.util.Random diff --git a/core/src/test/scala/com/microsoft/ml/spark/nn/KNNTest.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nn/KNNTest.scala similarity index 96% rename from core/src/test/scala/com/microsoft/ml/spark/nn/KNNTest.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/nn/KNNTest.scala index 735a37ee43..47ff367d1b 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/nn/KNNTest.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nn/KNNTest.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.nn +package com.microsoft.azure.synapse.ml.nn -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.{DataFrame, Row} import org.scalactic.Equality diff --git a/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingAdapterSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapterSpec.scala similarity index 83% rename from core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingAdapterSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapterSpec.scala index 731dc65a1d..6f7b45d4a3 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingAdapterSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingAdapterSpec.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable class RankingAdapterSpec extends RankingTestBase with EstimatorFuzzing[RankingAdapter] { diff --git a/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingEvaluatorSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingEvaluatorSpec.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingEvaluatorSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingEvaluatorSpec.scala index 42082dede9..3350b00979 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingEvaluatorSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingEvaluatorSpec.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase class RankingEvaluatorSpec extends TestBase { diff --git a/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTestBase.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingTestBase.scala similarity index 96% rename from core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTestBase.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingTestBase.scala index e8f265518f..d82cf87969 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTestBase.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingTestBase.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.recommendation.ALS import org.apache.spark.ml.tuning._ diff --git a/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTrainValidationSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingTrainValidationSpec.scala similarity index 89% rename from core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTrainValidationSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingTrainValidationSpec.scala index 8f191939dd..211b8a70cd 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTrainValidationSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RankingTrainValidationSpec.scala @@ -1,10 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.core.utils.ModelEquality +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} import org.apache.spark.ml.recommendation.ALSModel import org.apache.spark.ml.util.MLReadable diff --git a/core/src/test/scala/com/microsoft/ml/spark/recommendation/RecommendationIndexerSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RecommendationIndexerSpec.scala similarity index 89% rename from core/src/test/scala/com/microsoft/ml/spark/recommendation/RecommendationIndexerSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RecommendationIndexerSpec.scala index 15b08a199b..7ad01a2ebe 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/recommendation/RecommendationIndexerSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/RecommendationIndexerSpec.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} import org.apache.spark.ml.Pipeline import org.apache.spark.ml.util.MLReadable diff --git a/core/src/test/scala/com/microsoft/ml/spark/recommendation/SARSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/SARSpec.scala similarity index 98% rename from core/src/test/scala/com/microsoft/ml/spark/recommendation/SARSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/SARSpec.scala index 3470fa5a1b..e7635b3822 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/recommendation/SARSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/recommendation/SARSpec.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.recommendation +package com.microsoft.azure.synapse.ml.recommendation -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing} import org.apache.spark.ml.Pipeline import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/BatchIteratorSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/BatchIteratorSuite.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/stages/BatchIteratorSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/BatchIteratorSuite.scala index eeaae64253..7a626d114c 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/BatchIteratorSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/BatchIteratorSuite.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.{Flaky, TestBase} +import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase} import org.scalatest.Assertion class BatchIteratorSuite extends TestBase with Flaky { diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/CacherSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/CacherSuite.scala similarity index 88% rename from core/src/test/scala/com/microsoft/ml/spark/stages/CacherSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/CacherSuite.scala index 52d03ddc69..f72072550d 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/CacherSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/CacherSuite.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable class CacherSuite extends TransformerFuzzing[Cacher] { diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/ClassBalancerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/ClassBalancerSuite.scala similarity index 91% rename from core/src/test/scala/com/microsoft/ml/spark/stages/ClassBalancerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/ClassBalancerSuite.scala index ea12f3452e..2485547e63 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/ClassBalancerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/ClassBalancerSuite.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/DropColumnsSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/DropColumnsSuite.scala similarity index 88% rename from core/src/test/scala/com/microsoft/ml/spark/stages/DropColumnsSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/DropColumnsSuite.scala index c96764cfd2..1662f1ca96 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/DropColumnsSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/DropColumnsSuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable class DropColumnsSuite extends TestBase with TransformerFuzzing[DropColumns] { diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/EnsembleByKeySuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/EnsembleByKeySuite.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/stages/EnsembleByKeySuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/EnsembleByKeySuite.scala index 02b0871f62..52dfbbee79 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/EnsembleByKeySuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/EnsembleByKeySuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.feature.VectorAssembler import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.sql.DataFrame diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/ExplodeSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/ExplodeSuite.scala similarity index 87% rename from core/src/test/scala/com/microsoft/ml/spark/stages/ExplodeSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/ExplodeSuite.scala index d38285dfd8..915aad6b98 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/ExplodeSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/ExplodeSuite.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/LambdaSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/LambdaSuite.scala similarity index 83% rename from core/src/test/scala/com/microsoft/ml/spark/stages/LambdaSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/LambdaSuite.scala index 8bb31777fc..5963d96f28 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/LambdaSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/LambdaSuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.types.StructType diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/MiniBatchTransformerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/MiniBatchTransformerSuite.scala similarity index 92% rename from core/src/test/scala/com/microsoft/ml/spark/stages/MiniBatchTransformerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/MiniBatchTransformerSuite.scala index 0890b50f6e..f9f3d5b6b1 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/MiniBatchTransformerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/MiniBatchTransformerSuite.scala @@ -1,20 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.stages +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.param.DataFrameEquality import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.catalyst.encoders.RowEncoder +import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.{ArrayType, IntegerType, StringType, StructType} import org.apache.spark.sql.{DataFrame, Dataset} import org.scalactic.Equality import org.scalatest.Assertion -import org.apache.spark.sql.functions.{col, udf} trait MiniBatchTestUtils extends TestBase with DataFrameEquality { import spark.implicits._ @@ -128,7 +127,7 @@ class FlattenBatchSuite extends TransformerFuzzing[FlattenBatch] { lazy val df: DataFrame = sc.parallelize((1 to n).zip(List.fill(n)("foo"))).toDF("in1", "in2") test("null support"){ - val batchedDf = new stages.FixedMiniBatchTransformer().setBatchSize(3).transform(df) + val batchedDf = new FixedMiniBatchTransformer().setBatchSize(3).transform(df) val nullifiedDf = batchedDf.withColumn( "nullCol", UDFUtils.oldUdf(FlattenBatchUtils.nullify _, ArrayType(IntegerType))(col("in1"))) assert(new FlattenBatch().transform(nullifiedDf).count() == 1000) diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/MultiColumnAdapterSpec.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/MultiColumnAdapterSpec.scala similarity index 88% rename from core/src/test/scala/com/microsoft/ml/spark/stages/MultiColumnAdapterSpec.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/MultiColumnAdapterSpec.scala index 8384f56cf3..69e973a513 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/MultiColumnAdapterSpec.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/MultiColumnAdapterSpec.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.schema.DatasetExtensions._ -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions._ +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} import org.apache.spark.ml.PipelineModel import org.apache.spark.ml.feature.{StringIndexer, Tokenizer} import org.apache.spark.ml.util.MLReadable diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/RenameColumnSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/RenameColumnSuite.scala similarity index 84% rename from core/src/test/scala/com/microsoft/ml/spark/stages/RenameColumnSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/RenameColumnSuite.scala index 18a74cd001..31cecdf88f 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/RenameColumnSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/RenameColumnSuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable class RenameColumnSuite extends TestBase with TransformerFuzzing[RenameColumn] { diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/RepartitionSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/RepartitionSuite.scala similarity index 88% rename from core/src/test/scala/com/microsoft/ml/spark/stages/RepartitionSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/RepartitionSuite.scala index c5f888d0e7..8dceef84fd 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/RepartitionSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/RepartitionSuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.Pipeline import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.util.MLReadable diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/SelectColumnsSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/SelectColumnsSuite.scala similarity index 90% rename from core/src/test/scala/com/microsoft/ml/spark/stages/SelectColumnsSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/SelectColumnsSuite.scala index 7398dee406..9cfe75ab25 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/SelectColumnsSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/SelectColumnsSuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable class SelectColumnsSuite extends TestBase with TransformerFuzzing[SelectColumns] { diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/StratifiedRepartitionSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/StratifiedRepartitionSuite.scala similarity index 94% rename from core/src/test/scala/com/microsoft/ml/spark/stages/StratifiedRepartitionSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/StratifiedRepartitionSuite.scala index d72f39bd7d..16ca75c13e 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/StratifiedRepartitionSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/StratifiedRepartitionSuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.TaskContext import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.Row diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/SummarizeDataSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/SummarizeDataSuite.scala similarity index 92% rename from core/src/test/scala/com/microsoft/ml/spark/stages/SummarizeDataSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/SummarizeDataSuite.scala index ee3e351921..319bdf3664 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/SummarizeDataSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/SummarizeDataSuite.scala @@ -1,11 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages - -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import java.io.File +package com.microsoft.azure.synapse.ml.stages +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable class SummarizeDataSuite extends TransformerFuzzing[SummarizeData] { diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/TextPreprocessorSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessorSuite.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/stages/TextPreprocessorSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessorSuite.scala index d00e65e456..06cca3b2fa 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/TextPreprocessorSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessorSuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable class TextPreprocessorSuite extends TestBase with TransformerFuzzing[TextPreprocessor] { diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/TimerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/TimerSuite.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/stages/TimerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/TimerSuite.scala index c07c56e6d0..3b44d6e0c5 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/TimerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/TimerSuite.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature.{HashingTF, IDF, Tokenizer} import org.apache.spark.ml.util.MLReadable diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/UDFSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/UDFSuite.scala similarity index 84% rename from core/src/test/scala/com/microsoft/ml/spark/stages/UDFSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/UDFSuite.scala index c409af05db..040b5deaad 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/UDFSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/UDFSuite.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.base.TestBase import org.apache.spark.sql.DataFrame class UDFSuite extends TestBase { diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/UDFTransformerSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/UDFTransformerSuite.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/stages/UDFTransformerSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/UDFTransformerSuite.scala index 7840ebb53b..2e83d65138 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/UDFTransformerSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/UDFTransformerSuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame import org.apache.spark.sql.expressions.UserDefinedFunction diff --git a/core/src/test/scala/com/microsoft/ml/spark/stages/UnicodeNormalizeSuite.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/UnicodeNormalizeSuite.scala similarity index 89% rename from core/src/test/scala/com/microsoft/ml/spark/stages/UnicodeNormalizeSuite.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/stages/UnicodeNormalizeSuite.scala index c5efa77f46..e7d098ca6f 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/stages/UnicodeNormalizeSuite.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/stages/UnicodeNormalizeSuite.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.stages +package com.microsoft.azure.synapse.ml.stages -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame diff --git a/core/src/test/scala/com/microsoft/ml/spark/train/VerifyComputeModelStatistics.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyComputeModelStatistics.scala similarity index 95% rename from core/src/test/scala/com/microsoft/ml/spark/train/VerifyComputeModelStatistics.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyComputeModelStatistics.scala index 755fc861a2..9712ac124a 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/train/VerifyComputeModelStatistics.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyComputeModelStatistics.scala @@ -1,16 +1,16 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.train - -import java.io.File - -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.metrics.MetricConstants -import com.microsoft.ml.spark.core.schema.{CategoricalUtilities, SchemaConstants, SparkSchema} -import com.microsoft.ml.spark.core.test.benchmarks.DatasetUtils -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +package com.microsoft.azure.synapse.ml.train + +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.metrics.MetricConstants +import com.microsoft.azure.synapse.ml.core.schema.{CategoricalUtilities, SchemaConstants, SparkSchema} +import com.microsoft.azure.synapse.ml.core.test.benchmarks.DatasetUtils +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.train.TrainClassifierTestUtilities._ +import com.microsoft.azure.synapse.ml.train.TrainRegressorTestUtilities._ +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.spark.ml.classification.LogisticRegression import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator import org.apache.spark.ml.feature.FastVectorAssembler @@ -20,8 +20,6 @@ import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{DoubleType, StructField, StructType} -import com.microsoft.ml.spark.train.TrainRegressorTestUtilities._ -import com.microsoft.ml.spark.train.TrainClassifierTestUtilities._ import scala.util.Random diff --git a/core/src/test/scala/com/microsoft/ml/spark/train/VerifyComputePerInstanceStatistics.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyComputePerInstanceStatistics.scala similarity index 93% rename from core/src/test/scala/com/microsoft/ml/spark/train/VerifyComputePerInstanceStatistics.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyComputePerInstanceStatistics.scala index d704cc807b..8e76be89fe 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/train/VerifyComputePerInstanceStatistics.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyComputePerInstanceStatistics.scala @@ -1,16 +1,16 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.train +package com.microsoft.azure.synapse.ml.train -import com.microsoft.ml.spark.core.metrics.MetricConstants -import com.microsoft.ml.spark.core.schema.{SchemaConstants, SparkSchema} -import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.metrics.MetricConstants +import com.microsoft.azure.synapse.ml.core.schema.{SchemaConstants, SparkSchema} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.train.TrainClassifierTestUtilities._ +import com.microsoft.azure.synapse.ml.train.TrainRegressorTestUtilities._ import org.apache.spark.ml.classification.LogisticRegression import org.apache.spark.ml.feature.FastVectorAssembler import org.apache.spark.sql._ -import com.microsoft.ml.spark.train.TrainRegressorTestUtilities._ -import com.microsoft.ml.spark.train.TrainClassifierTestUtilities._ /** Tests to validate the functionality of Compute Per Instance Statistics module. */ class VerifyComputePerInstanceStatistics extends TestBase { diff --git a/core/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainClassifier.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyTrainClassifier.scala similarity index 96% rename from core/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainClassifier.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyTrainClassifier.scala index 387eb04e37..121cf562ce 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainClassifier.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyTrainClassifier.scala @@ -1,14 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.train +package com.microsoft.azure.synapse.ml.train -import java.io.File - -import com.microsoft.ml.spark.core.schema.SchemaConstants -import com.microsoft.ml.spark.core.test.benchmarks.Benchmarks -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} -import com.microsoft.ml.spark.featurize.ValueIndexer +import com.microsoft.azure.synapse.ml.core.schema.SchemaConstants +import com.microsoft.azure.synapse.ml.core.test.benchmarks.Benchmarks +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.featurize.ValueIndexer import org.apache.spark.ml.classification._ import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.ml.util.MLReadable @@ -16,8 +14,8 @@ import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.mllib.evaluation.{BinaryClassificationMetrics, MulticlassMetrics} import org.apache.spark.sql.functions._ import org.apache.spark.sql.{DataFrame, Row} -import com.microsoft.ml.spark.codegen.GenerationUtils -import com.microsoft.ml.spark.core.test.base.TestBase + +import java.io.File object ClassifierTestUtils { @@ -33,8 +31,8 @@ object ClassifierTestUtils { class VerifyTrainClassifier extends Benchmarks with EstimatorFuzzing[TrainClassifier] { import TrainClassifierTestUtilities._ - import com.microsoft.ml.spark.core.schema.CategoricalUtilities._ - import com.microsoft.ml.spark.core.test.benchmarks.DatasetUtils._ + import com.microsoft.azure.synapse.ml.core.schema.CategoricalUtilities._ + import com.microsoft.azure.synapse.ml.core.test.benchmarks.DatasetUtils._ val moduleName = "train-classifier" val lrName = "LogisticRegression" diff --git a/core/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainRegressor.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyTrainRegressor.scala similarity index 96% rename from core/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainRegressor.scala rename to core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyTrainRegressor.scala index 371e186df2..c90d5c94e1 100644 --- a/core/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainRegressor.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/train/VerifyTrainRegressor.scala @@ -1,19 +1,18 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.train +package com.microsoft.azure.synapse.ml.train -import java.io.File - -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.spark.ml.regression.{LinearRegression, RandomForestRegressor} import org.apache.spark.ml.util.{MLReadable, MLWritable} import org.apache.spark.ml.{Estimator, PipelineStage} import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types._ +import java.io.File import scala.collection.immutable.Seq /** Tests to validate the functionality of Train Regressor module. */ diff --git a/deep-learning/src/main/R/model_downloader.R b/deep-learning/src/main/R/model_downloader.R index ae658a3d00..10d47d2748 100644 --- a/deep-learning/src/main/R/model_downloader.R +++ b/deep-learning/src/main/R/model_downloader.R @@ -19,7 +19,7 @@ DEFAULT_URL = "https://mmlspark.azureedge.net/datasets/CNTKModels/" smd_model_downloader <- function(sc, localPath, serverURL=DEFAULT_URL, ...) { session <- spark_session(sc) env <- new.env(parent = emptyenv()) - env$model <- "com.microsoft.ml.spark.downloader.ModelDownloader" + env$model <- "com.microsoft.azure.synapse.ml.downloader.ModelDownloader" downloader <- invoke_new(sc, env$model, session, localPath, serverURL) } diff --git a/deep-learning/src/main/python/mmlspark/cntk/CNTKModel.py b/deep-learning/src/main/python/synapse/ml/cntk/CNTKModel.py similarity index 93% rename from deep-learning/src/main/python/mmlspark/cntk/CNTKModel.py rename to deep-learning/src/main/python/synapse/ml/cntk/CNTKModel.py index 1b79912535..3a15544ba6 100644 --- a/deep-learning/src/main/python/mmlspark/cntk/CNTKModel.py +++ b/deep-learning/src/main/python/synapse/ml/cntk/CNTKModel.py @@ -6,7 +6,7 @@ if sys.version >= '3': basestring = str -from mmlspark.cntk._CNTKModel import _CNTKModel +from synapse.ml.cntk._CNTKModel import _CNTKModel from pyspark.ml.common import inherit_doc, _java2py, _py2java from pyspark import SparkContext @@ -20,7 +20,7 @@ class CNTKModel(_CNTKModel): """ def _transfer_map_from_java(self, param): """ - Transforms the embedded com.microsoft.ml.spark.core.serialize.params from the companion Java object. + Transforms the embedded com.microsoft.azure.synapse.ml.core.serialize.params from the companion Java object. """ sc = SparkContext._active_spark_context if self._java_obj.hasParam(param.name): @@ -33,7 +33,7 @@ def _transfer_map_from_java(self, param): def _transfer_map_to_java(self, param): """ - Transforms the embedded com.microsoft.ml.spark.core.serialize.params to the companion Java object. + Transforms the embedded com.microsoft.azure.synapse.ml.core.serialize.params to the companion Java object. """ value = self.extractParamMap()[param] java_param = self._java_obj.getParam(param.name) diff --git a/deep-learning/src/main/python/mmlspark/cntk/ImageFeaturizer.py b/deep-learning/src/main/python/synapse/ml/cntk/ImageFeaturizer.py similarity index 93% rename from deep-learning/src/main/python/mmlspark/cntk/ImageFeaturizer.py rename to deep-learning/src/main/python/synapse/ml/cntk/ImageFeaturizer.py index a85cd56a09..35871efb4c 100644 --- a/deep-learning/src/main/python/mmlspark/cntk/ImageFeaturizer.py +++ b/deep-learning/src/main/python/synapse/ml/cntk/ImageFeaturizer.py @@ -6,7 +6,7 @@ if sys.version >= '3': basestring = str -from mmlspark.cntk._ImageFeaturizer import _ImageFeaturizer +from synapse.ml.cntk._ImageFeaturizer import _ImageFeaturizer from pyspark.ml.common import inherit_doc from pyspark.sql import SparkSession diff --git a/deep-learning/src/main/python/mmlspark/cntk/__init__.py b/deep-learning/src/main/python/synapse/ml/cntk/__init__.py similarity index 100% rename from deep-learning/src/main/python/mmlspark/cntk/__init__.py rename to deep-learning/src/main/python/synapse/ml/cntk/__init__.py diff --git a/deep-learning/src/main/python/mmlspark/onnx/ONNXModel.py b/deep-learning/src/main/python/synapse/ml/onnx/ONNXModel.py similarity index 98% rename from deep-learning/src/main/python/mmlspark/onnx/ONNXModel.py rename to deep-learning/src/main/python/synapse/ml/onnx/ONNXModel.py index 707399588e..a7dfb12fd4 100644 --- a/deep-learning/src/main/python/mmlspark/onnx/ONNXModel.py +++ b/deep-learning/src/main/python/synapse/ml/onnx/ONNXModel.py @@ -9,7 +9,7 @@ if sys.version >= "3": basestring = str -from mmlspark.onnx._ONNXModel import _ONNXModel +from synapse.ml.onnx._ONNXModel import _ONNXModel from pyspark.ml.common import inherit_doc from py4j.java_gateway import JavaObject diff --git a/deep-learning/src/main/python/mmlspark/onnx/__init__.py b/deep-learning/src/main/python/synapse/ml/onnx/__init__.py similarity index 100% rename from deep-learning/src/main/python/mmlspark/onnx/__init__.py rename to deep-learning/src/main/python/synapse/ml/onnx/__init__.py diff --git a/deep-learning/src/main/scala/com/microsoft/CNTK/SerializableFunction.scala b/deep-learning/src/main/scala/com/microsoft/CNTK/SerializableFunction.scala index 247f5eeeca..860f93892f 100644 --- a/deep-learning/src/main/scala/com/microsoft/CNTK/SerializableFunction.scala +++ b/deep-learning/src/main/scala/com/microsoft/CNTK/SerializableFunction.scala @@ -7,7 +7,7 @@ import java.io._ import java.util.UUID.randomUUID import com.microsoft.CNTK.CNTKUtils._ -import com.microsoft.ml.spark.core.env.StreamUtilities.using +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities.using import org.apache.commons.io.FileUtils.{forceDelete, getTempDirectoryPath, writeByteArrayToFile} import org.apache.commons.io.IOUtils import org.apache.spark.sql.types.{ArrayType, DoubleType, FloatType, StructField, DataType => SDataType} diff --git a/deep-learning/src/main/scala/com/microsoft/ml/spark/SharedParams.scala b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/SharedParams.scala similarity index 96% rename from deep-learning/src/main/scala/com/microsoft/ml/spark/SharedParams.scala rename to deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/SharedParams.scala index 1b43671743..0a48e4a8f4 100644 --- a/deep-learning/src/main/scala/com/microsoft/ml/spark/SharedParams.scala +++ b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/SharedParams.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark +package com.microsoft.azure.synapse.ml import org.apache.spark.ml.param.{MapParam, Param, Params} import spray.json.DefaultJsonProtocol._ diff --git a/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/CNTKFunctionParam.scala b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/CNTKFunctionParam.scala similarity index 81% rename from deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/CNTKFunctionParam.scala rename to deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/CNTKFunctionParam.scala index 46881e2dcf..952aa8919c 100644 --- a/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/CNTKFunctionParam.scala +++ b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/CNTKFunctionParam.scala @@ -1,18 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cntk +package com.microsoft.azure.synapse.ml.cntk import java.io.{ByteArrayOutputStream, ObjectOutputStream} - import com.microsoft.CNTK.SerializableFunction -import com.microsoft.ml.spark.core.env.StreamUtilities -import com.microsoft.ml.spark.core.serialize.ComplexParam -import com.microsoft.ml.spark.core.utils.ParamEquality +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.utils.ParamEquality import org.apache.spark.ml.param.Params import org.scalactic.TripleEquals._ -/** Param for ByteArray. Needed as spark has explicit com.microsoft.ml.spark.core.serialize.params for many different +/** Param for ByteArray. Needed as spark has explicit params for many different * types but not ByteArray. */ class CNTKFunctionParam(parent: Params, name: String, doc: String, diff --git a/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/CNTKModel.scala b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/CNTKModel.scala similarity index 96% rename from deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/CNTKModel.scala rename to deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/CNTKModel.scala index c4fc7040ea..c588b66b10 100644 --- a/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/CNTKModel.scala +++ b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/CNTKModel.scala @@ -1,32 +1,30 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cntk +package com.microsoft.azure.synapse.ml.cntk import com.microsoft.CNTK.CNTKExtensions._ import com.microsoft.CNTK.CNTKUtils._ import com.microsoft.CNTK.{CNTKExtensions, DataType => CNTKDataType, SerializableFunction => CNTKFunction, _} -import com.microsoft.ml.spark.HasFeedFetchDicts -import com.microsoft.ml.spark.cntk.ConversionUtils.GVV -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.schema.DatasetExtensions.findUnusedColumnName -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages.{FixedMiniBatchTransformer, FlattenBatch, HasMiniBatcher} +import com.microsoft.azure.synapse.ml.HasFeedFetchDicts +import com.microsoft.azure.synapse.ml.cntk.ConversionUtils.GVV +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions.findUnusedColumnName +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.{FixedMiniBatchTransformer, FlattenBatch, HasMiniBatcher} import org.apache.spark.SparkContext import org.apache.spark.broadcast._ import org.apache.spark.injections.UDFUtils -import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Model} -import org.apache.spark.ml.linalg.{SQLDataTypes, Vectors, Vector => SVector} import org.apache.spark.ml.linalg.SQLDataTypes.VectorType +import org.apache.spark.ml.linalg.{SQLDataTypes, Vectors, Vector => SVector} import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ +import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Model} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.expressions.UserDefinedFunction import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ -import spray.json.DefaultJsonProtocol._ import scala.collection.JavaConverters._ @@ -504,7 +502,7 @@ class CNTKModel(override val uid: String) extends Model[CNTKModel] with ComplexP val unbatchedDF = if (getBatchInput) { // TODO: The cache call is a workaround for issue 1075: - // https://github.com/Azure/mmlspark/issues/1075 + // https://github.com/Microsoft/SynapseML/issues/1075 val cacheAttempted = if (droppedDF.isStreaming) droppedDF else droppedDF.cache() new FlattenBatch().transform(cacheAttempted) } else { diff --git a/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/ConversionUtils.scala b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/ConversionUtils.scala similarity index 98% rename from deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/ConversionUtils.scala rename to deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/ConversionUtils.scala index d0a57ec74c..c06ec09e83 100644 --- a/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/ConversionUtils.scala +++ b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/ConversionUtils.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cntk +package com.microsoft.azure.synapse.ml.cntk import com.microsoft.CNTK.{DoubleVector, DoubleVectorVector, FloatVector, FloatVectorVector} import org.apache.spark.ml.linalg.{Vector=>SVector, Vectors} diff --git a/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/ImageFeaturizer.scala b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/ImageFeaturizer.scala similarity index 93% rename from deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/ImageFeaturizer.scala rename to deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/ImageFeaturizer.scala index 73dce56994..594d90ef0e 100644 --- a/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/ImageFeaturizer.scala +++ b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/ImageFeaturizer.scala @@ -1,16 +1,16 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cntk +package com.microsoft.azure.synapse.ml.cntk import com.microsoft.CNTK.CNTKExtensions._ import com.microsoft.CNTK.{SerializableFunction => CNTKFunction} -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.core.schema.{DatasetExtensions, ImageSchemaUtils} -import com.microsoft.ml.spark.downloader.ModelSchema -import com.microsoft.ml.spark.image.{ResizeImageTransformer, UnrollBinaryImage, UnrollImage} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.schema.{DatasetExtensions, ImageSchemaUtils} +import com.microsoft.azure.synapse.ml.downloader.ModelSchema +import com.microsoft.azure.synapse.ml.image.{ResizeImageTransformer, UnrollBinaryImage, UnrollImage} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import org.apache.spark.ml.param._ import org.apache.spark.ml.util.Identifiable diff --git a/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/ImageFeaturizer.txt b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/ImageFeaturizer.txt similarity index 100% rename from deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/ImageFeaturizer.txt rename to deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/ImageFeaturizer.txt diff --git a/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/_CNTKModel.txt b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/_CNTKModel.txt similarity index 100% rename from deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/_CNTKModel.txt rename to deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/cntk/_CNTKModel.txt diff --git a/deep-learning/src/main/scala/com/microsoft/ml/spark/downloader/ModelDownloader.scala b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/downloader/ModelDownloader.scala similarity index 94% rename from deep-learning/src/main/scala/com/microsoft/ml/spark/downloader/ModelDownloader.scala rename to deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/downloader/ModelDownloader.scala index 8c2a46c55e..16f3f5fa35 100644 --- a/deep-learning/src/main/scala/com/microsoft/ml/spark/downloader/ModelDownloader.scala +++ b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/downloader/ModelDownloader.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.downloader +package com.microsoft.azure.synapse.ml.downloader + +import com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils import java.io._ import java.net.{URI, URL} import java.util - -import com.microsoft.ml.spark.core.utils.FaultToleranceUtils import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.{Configuration => HadoopConf} import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, Path} @@ -23,7 +23,7 @@ import scala.concurrent.duration.Duration * * @tparam S an instantiation of the */ -private[spark] abstract class Repository[S <: Schema] { +private[ml] abstract class Repository[S <: Schema] { def listSchemas(): Iterable[S] @@ -39,7 +39,7 @@ private[spark] abstract class Repository[S <: Schema] { */ class ModelNotFoundException(uri: URI) extends FileNotFoundException(s"model located at $uri could not be found") -private[spark] class HDFSRepo[S <: Schema](val uri: URI, val hconf: HadoopConf) +private[ml] class HDFSRepo[S <: Schema](val uri: URI, val hconf: HadoopConf) (implicit val jsonFormat: JsonFormat[S]) extends Repository[S] { @@ -109,7 +109,7 @@ private[spark] class HDFSRepo[S <: Schema](val uri: URI, val hconf: HadoopConf) /** Class to represent repository of models that will eventually be hosted outside * the repo. */ -private[spark] class DefaultModelRepo(val baseURL: URL) extends Repository[ModelSchema] { +private[ml] class DefaultModelRepo(val baseURL: URL) extends Repository[ModelSchema] { val connectTimeout = 15000 val readTimeout = 5000 @@ -157,7 +157,7 @@ private[spark] class DefaultModelRepo(val baseURL: URL) extends Repository[Model throw new IllegalAccessError("Do not have the credentials to write a file to the remote repository") } -private[spark] abstract class Client { +private[ml] abstract class Client { var quiet = false private def log(s: String): Unit = { @@ -184,8 +184,8 @@ private[spark] abstract class Client { } -private[spark] object ModelDownloader { - private[spark] val DefaultURL = new URL("https://mmlspark.azureedge.net/datasets/CNTKModels/") +private[ml] object ModelDownloader { + private[ml] val DefaultURL = new URL("https://mmlspark.azureedge.net/datasets/CNTKModels/") } /** Class for downloading models from a server to Local or HDFS diff --git a/deep-learning/src/main/scala/com/microsoft/ml/spark/downloader/Schema.scala b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/downloader/Schema.scala similarity index 95% rename from deep-learning/src/main/scala/com/microsoft/ml/spark/downloader/Schema.scala rename to deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/downloader/Schema.scala index d5c209618c..82d58525b9 100644 --- a/deep-learning/src/main/scala/com/microsoft/ml/spark/downloader/Schema.scala +++ b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/downloader/Schema.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.downloader +package com.microsoft.azure.synapse.ml.downloader import java.io.InputStream import java.net.URI @@ -10,7 +10,7 @@ import spray.json._ import scala.collection.JavaConverters._ -private[spark] object NamingConventions { +private[ml] object NamingConventions { def canonicalModelFilename(name: String, dataset: String): String = s"${name}_$dataset.model" @@ -72,7 +72,7 @@ case class ModelSchema(name: String, } -private[spark] object SchemaJsonProtocol extends DefaultJsonProtocol { +private[ml] object SchemaJsonProtocol extends DefaultJsonProtocol { implicit object URIJsonFormat extends JsonFormat[URI] { def write(u: URI): JsValue = { diff --git a/deep-learning/src/main/scala/com/microsoft/ml/spark/onnx/ONNXModel.scala b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModel.scala similarity index 97% rename from deep-learning/src/main/scala/com/microsoft/ml/spark/onnx/ONNXModel.scala rename to deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModel.scala index 24d14cb7bd..79f8f1a411 100644 --- a/deep-learning/src/main/scala/com/microsoft/ml/spark/onnx/ONNXModel.scala +++ b/deep-learning/src/main/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModel.scala @@ -1,20 +1,20 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.onnx +package com.microsoft.azure.synapse.ml.onnx import ai.onnxruntime.OrtException.OrtErrorCode import ai.onnxruntime.OrtSession.SessionOptions import ai.onnxruntime.OrtSession.SessionOptions.OptLevel import ai.onnxruntime._ import breeze.linalg.{argmax, softmax, DenseVector => BDV} -import com.microsoft.ml.spark.HasFeedFetchDicts -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.env.StreamUtilities.using -import com.microsoft.ml.spark.core.schema.DatasetExtensions -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.stages._ +import com.microsoft.azure.synapse.ml.HasFeedFetchDicts +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities.using +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.stages.{FixedMiniBatchTransformer, FlattenBatch, HasMiniBatcher} import org.apache.spark.broadcast.Broadcast import org.apache.spark.injections.UDFUtils import org.apache.spark.internal.Logging @@ -490,7 +490,7 @@ class ONNXModel(override val uid: String) ) // The cache call is a workaround for GH issue 1075: - // https://github.com/Azure/mmlspark/issues/1075 + // https://github.com/Microsoft/SynapseML/issues/1075 val batchedDF = getMiniBatcher.transform(dataset) val batchedCache = if (batchedDF.isStreaming) batchedDF else batchedDF.cache().unpersist() val (coerced, feedDict) = coerceBatchedDf(batchedCache) @@ -508,7 +508,7 @@ class ONNXModel(override val uid: String) } // The cache call is a workaround for GH issue 1075: - // https://github.com/Azure/mmlspark/issues/1075 + // https://github.com/Microsoft/SynapseML/issues/1075 val outputCache = if (outputDf.isStreaming) outputDf else outputDf.cache().unpersist() val flattenedDF = new FlattenBatch().transform(outputCache) diff --git a/deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKBindingSuite.scala b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/cntk/CNTKBindingSuite.scala similarity index 94% rename from deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKBindingSuite.scala rename to deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/cntk/CNTKBindingSuite.scala index f848394536..9e2267e062 100644 --- a/deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKBindingSuite.scala +++ b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/cntk/CNTKBindingSuite.scala @@ -1,15 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cntk +package com.microsoft.azure.synapse.ml.cntk import java.io._ - import com.microsoft.CNTK.CNTKExtensions._ import com.microsoft.CNTK.{SerializableFunction => CNTKFunction, _} -import com.microsoft.ml.spark.core.env.StreamUtilities._ -import com.microsoft.ml.spark.core.test.base.LinuxOnly -import com.microsoft.ml.spark.image.ImageTestUtils +import com.microsoft.azure.synapse.ml.core.test.base.LinuxOnly +import com.microsoft.azure.synapse.ml.image.ImageTestUtils +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities._ import org.apache.commons.io.IOUtils import scala.collection.JavaConverters._ diff --git a/deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/cntk/CNTKModelSuite.scala similarity index 94% rename from deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala rename to deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/cntk/CNTKModelSuite.scala index f4398e691b..cea9fbf33d 100644 --- a/deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala +++ b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/cntk/CNTKModelSuite.scala @@ -1,16 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cntk +package com.microsoft.azure.synapse.ml.cntk -import java.io.File -import java.net.URL - -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.test.base.LinuxOnly -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.image.ImageTestUtils +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.test.base.LinuxOnly +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.image.ImageTestUtils +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.commons.io.FileUtils import org.apache.spark.ml.classification.LogisticRegression import org.apache.spark.ml.linalg.DenseVector @@ -20,6 +17,8 @@ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.sql.Row import org.apache.spark.sql.types._ +import java.io.File +import java.net.URL import scala.util.Random class CNTKModelSuite extends LinuxOnly with ImageTestUtils with TransformerFuzzing[CNTKModel] { diff --git a/deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/ImageFeaturizerSuite.scala b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/cntk/ImageFeaturizerSuite.scala similarity index 89% rename from deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/ImageFeaturizerSuite.scala rename to deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/cntk/ImageFeaturizerSuite.scala index 1f9ca641c5..193a0a2bfb 100644 --- a/deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/ImageFeaturizerSuite.scala +++ b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/cntk/ImageFeaturizerSuite.scala @@ -1,20 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.cntk - -import java.io.File -import java.net.URI - -import com.microsoft.ml.spark.Secrets -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.downloader.{ModelDownloader, ModelSchema} -import com.microsoft.ml.spark.image.ImageTestUtils -import com.microsoft.ml.spark.io.IOImplicits._ -import com.microsoft.ml.spark.io.powerbi.PowerBIWriter -import com.microsoft.ml.spark.io.split1.FileReaderUtils +package com.microsoft.azure.synapse.ml.cntk + +import com.microsoft.azure.synapse.ml.Secrets +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.downloader.{ModelDownloader, ModelSchema} +import com.microsoft.azure.synapse.ml.image.ImageTestUtils +import com.microsoft.azure.synapse.ml.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.io.powerbi.PowerBIWriter +import com.microsoft.azure.synapse.ml.io.split1.FileReaderUtils +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.ml.util.MLReadable @@ -22,6 +19,9 @@ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.StringType +import java.io.File +import java.net.URI + trait TrainedCNTKModelUtils extends ImageTestUtils with FileReaderUtils { lazy val modelDir = new File(filesRoot, "CNTKModel") diff --git a/deep-learning/src/test/scala/com/microsoft/ml/spark/downloader/DownloaderSuite.scala b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/downloader/DownloaderSuite.scala similarity index 93% rename from deep-learning/src/test/scala/com/microsoft/ml/spark/downloader/DownloaderSuite.scala rename to deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/downloader/DownloaderSuite.scala index f67e4b82d5..9bb74dd0ab 100644 --- a/deep-learning/src/test/scala/com/microsoft/ml/spark/downloader/DownloaderSuite.scala +++ b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/downloader/DownloaderSuite.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.downloader +package com.microsoft.azure.synapse.ml.downloader + +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils import java.io.File import java.nio.file.Files - -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.utils.FaultToleranceUtils import org.apache.commons.io.FileUtils import scala.collection.JavaConverters._ diff --git a/deep-learning/src/test/scala/com/microsoft/ml/spark/explainers/ImageExplainersSuite.scala b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/explainers/ImageExplainersSuite.scala similarity index 78% rename from deep-learning/src/test/scala/com/microsoft/ml/spark/explainers/ImageExplainersSuite.scala rename to deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/explainers/ImageExplainersSuite.scala index 94e7d9aeb5..12ad1c1089 100644 --- a/deep-learning/src/test/scala/com/microsoft/ml/spark/explainers/ImageExplainersSuite.scala +++ b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/explainers/ImageExplainersSuite.scala @@ -1,17 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers +package com.microsoft.azure.synapse.ml.explainers -import java.io.File -import java.net.URL - -import com.microsoft.ml.spark.cntk.{ImageFeaturizer, TrainedCNTKModelUtils} -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.cntk.{ImageFeaturizer, TrainedCNTKModelUtils} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.io.IOImplicits._ import org.apache.commons.io.FileUtils import org.apache.spark.sql.DataFrame +import java.io.File +import java.net.URL + abstract class ImageExplainersSuite extends TestBase with TrainedCNTKModelUtils { lazy val greyhoundImageLocation: String = { val loc = "/tmp/greyhound.jpg" diff --git a/deep-learning/src/test/scala/com/microsoft/ml/spark/explainers/split2/ImageLIMEExplainerSuite.scala b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split2/ImageLIMEExplainerSuite.scala similarity index 75% rename from deep-learning/src/test/scala/com/microsoft/ml/spark/explainers/split2/ImageLIMEExplainerSuite.scala rename to deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split2/ImageLIMEExplainerSuite.scala index 3869438f71..1ad99df538 100644 --- a/deep-learning/src/test/scala/com/microsoft/ml/spark/explainers/split2/ImageLIMEExplainerSuite.scala +++ b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split2/ImageLIMEExplainerSuite.scala @@ -1,13 +1,14 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split2 +package com.microsoft.azure.synapse.ml.explainers.split2 -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.explainers.{ImageExplainersSuite, ImageFormat, ImageLIME, LocalExplainer} -import com.microsoft.ml.spark.io.IOImplicits._ -import com.microsoft.ml.spark.lime.SuperpixelData +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.explainers.LocalExplainer.LIME +import com.microsoft.azure.synapse.ml.explainers.{ImageExplainersSuite, ImageFormat, ImageLIME} +import com.microsoft.azure.synapse.ml.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.lime.SuperpixelData import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.functions.col @@ -17,7 +18,7 @@ class ImageLIMEExplainerSuite extends ImageExplainersSuite import spark.implicits._ - val lime: ImageLIME = LocalExplainer.LIME.image + val lime: ImageLIME = LIME.image .setModel(resNetTransformer) .setTargetCol(resNetTransformer.getOutputCol) .setSamplingFraction(0.7) @@ -40,8 +41,8 @@ class ImageLIMEExplainerSuite extends ImageExplainersSuite val spStates = weights.head.toBreeze.map(_ >= 0.2).toArray // Uncomment the following lines lines to view the censoredImage image. - // import com.microsoft.ml.spark.io.image.ImageUtils - // import com.microsoft.ml.spark.lime.{Superpixel, SuperpixelData} + // import com.microsoft.azure.synapse.ml.io.image.ImageUtils + // import com.microsoft.azure.synapse.ml.lime.{Superpixel, SuperpixelData} // import java.awt.image.BufferedImage // val originalImage = ImageUtils.toBufferedImage(image.data, image.width, image.height, image.nChannels) // val censoredImage: BufferedImage = Superpixel.maskImage(originalImage, superpixels, spStates) @@ -62,8 +63,8 @@ class ImageLIMEExplainerSuite extends ImageExplainersSuite val spStates = weights.head.toBreeze.map(_ >= 0.2).toArray // Uncomment the following lines lines to view the censoredImage image. - // import com.microsoft.ml.spark.io.image.ImageUtils - // import com.microsoft.ml.spark.lime.{Superpixel, SuperpixelData} + // import com.microsoft.azure.synapse.ml.io.image.ImageUtils + // import com.microsoft.azure.synapse.ml.lime.{Superpixel, SuperpixelData} // import java.awt.image.BufferedImage // val originalImage = ImageUtils.toBufferedImage(image.data, image.width, image.height, image.nChannels) // val censoredImage: BufferedImage = Superpixel.maskImage(originalImage, superpixels, spStates) diff --git a/deep-learning/src/test/scala/com/microsoft/ml/spark/explainers/split3/ImageSHAPExplainerSuite.scala b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split3/ImageSHAPExplainerSuite.scala similarity index 72% rename from deep-learning/src/test/scala/com/microsoft/ml/spark/explainers/split3/ImageSHAPExplainerSuite.scala rename to deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split3/ImageSHAPExplainerSuite.scala index 9c2bee7055..fa37af76f7 100644 --- a/deep-learning/src/test/scala/com/microsoft/ml/spark/explainers/split3/ImageSHAPExplainerSuite.scala +++ b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/explainers/split3/ImageSHAPExplainerSuite.scala @@ -1,12 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.explainers.split3 +package com.microsoft.azure.synapse.ml.explainers.split3 -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.explainers.{ImageExplainersSuite, ImageFormat, ImageSHAP, LocalExplainer} -import com.microsoft.ml.spark.lime.SuperpixelData +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.explainers.LocalExplainer.KernelSHAP +import com.microsoft.azure.synapse.ml.explainers.{ImageExplainersSuite, ImageFormat, ImageSHAP} +import com.microsoft.azure.synapse.ml.lime.SuperpixelData import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.util.MLReadable @@ -15,7 +16,7 @@ class ImageSHAPExplainerSuite extends ImageExplainersSuite import spark.implicits._ - val shap: ImageSHAP = LocalExplainer.KernelSHAP.image + val shap: ImageSHAP = KernelSHAP.image .setModel(resNetTransformer) .setTargetCol(resNetTransformer.getOutputCol) .setTargetClasses(Array(172)) @@ -39,8 +40,8 @@ class ImageSHAPExplainerSuite extends ImageExplainersSuite val spStates = shapValues.head.toBreeze(1 to -1).map(_ >= 0.05).toArray // Uncomment the following lines lines to view the censoredImage image. - // import com.microsoft.ml.spark.io.image.ImageUtils - // import com.microsoft.ml.spark.lime.Superpixel + // import com.microsoft.azure.synapse.ml.io.image.ImageUtils + // import com.microsoft.azure.synapse.ml.lime.Superpixel // import java.awt.image.BufferedImage // val originalImage = ImageUtils.toBufferedImage(image.data, image.width, image.height, image.nChannels) // val censoredImage: BufferedImage = Superpixel.maskImage(originalImage, superpixels, spStates) diff --git a/deep-learning/src/test/scala/com/microsoft/ml/spark/lime/ImageLIMESuite.scala b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/lime/ImageLIMESuite.scala similarity index 86% rename from deep-learning/src/test/scala/com/microsoft/ml/spark/lime/ImageLIMESuite.scala rename to deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/lime/ImageLIMESuite.scala index f46249137a..109cb84b3b 100644 --- a/deep-learning/src/test/scala/com/microsoft/ml/spark/lime/ImageLIMESuite.scala +++ b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/lime/ImageLIMESuite.scala @@ -1,19 +1,15 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lime - -import java.awt.image.BufferedImage -import java.io.File -import java.net.URL - -import com.microsoft.ml.spark.cntk.{ImageFeaturizer, TrainedCNTKModelUtils} -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.io.IOImplicits._ -import com.microsoft.ml.spark.io.image.ImageUtils -import com.microsoft.ml.spark.io.split1.FileReaderUtils -import com.microsoft.ml.spark.stages.UDFTransformer -import com.microsoft.ml.spark.stages.udfs.get_value_udf +package com.microsoft.azure.synapse.ml.lime + +import com.microsoft.azure.synapse.ml.cntk.{ImageFeaturizer, TrainedCNTKModelUtils} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.io.image.ImageUtils +import com.microsoft.azure.synapse.ml.io.split1.FileReaderUtils +import com.microsoft.azure.synapse.ml.stages.UDFTransformer +import com.microsoft.azure.synapse.ml.stages.udfs.get_value_udf import org.apache.commons.io.FileUtils import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.ml.param.DataFrameEquality @@ -22,7 +18,11 @@ import org.apache.spark.ml.{NamespaceInjections, PipelineModel} import org.apache.spark.sql.functions.col import org.apache.spark.sql.{DataFrame, Row} -@deprecated("Please use 'com.microsoft.ml.spark.explainers.ImageLIME'.", since="1.0.0-RC3") +import java.awt.image.BufferedImage +import java.io.File +import java.net.URL + +@deprecated("Please use 'com.microsoft.azure.synapse.ml.explainers.ImageLIME'.", since="1.0.0-RC3") class ImageLIMESuite extends TransformerFuzzing[ImageLIME] with DataFrameEquality with TrainedCNTKModelUtils with FileReaderUtils { diff --git a/deep-learning/src/test/scala/com/microsoft/ml/spark/onnx/ONNXModelSuite.scala b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModelSuite.scala similarity index 95% rename from deep-learning/src/test/scala/com/microsoft/ml/spark/onnx/ONNXModelSuite.scala rename to deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModelSuite.scala index dd4b5ece47..dd5b47011f 100644 --- a/deep-learning/src/test/scala/com/microsoft/ml/spark/onnx/ONNXModelSuite.scala +++ b/deep-learning/src/test/scala/com/microsoft/azure/synapse/ml/onnx/ONNXModelSuite.scala @@ -1,16 +1,16 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.onnx +package com.microsoft.azure.synapse.ml.onnx import breeze.linalg.{argmax, argtopk} -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.core.utils.BreezeUtils._ -import com.microsoft.ml.spark.io.IOImplicits._ -import com.microsoft.ml.spark.opencv.ImageTransformer +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.utils.BreezeUtils._ +import com.microsoft.azure.synapse.ml.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.opencv.ImageTransformer +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.commons.io.FileUtils import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.image.ImageSchema diff --git a/docs/R-setup.md b/docs/R-setup.md index 9f69ba3bfa..da328df537 100644 --- a/docs/R-setup.md +++ b/docs/R-setup.md @@ -1,4 +1,4 @@ -# R setup and example for MMLSpark +# R setup and example for SynapseML ## Installation @@ -6,11 +6,11 @@ [devtools](https://github.com/hadley/devtools) installed on your machine. -To install the current MMLSpark package for R use: +To install the current SynapseML package for R use: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/mmlspark-1.0.0-rc4.zip") +devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.0-rc4.zip") ... ``` @@ -23,7 +23,7 @@ It will take some time to install all dependencies. Then, run: library(sparklyr) library(dplyr) config <- spark_config() -config$sparklyr.defaultPackages <- "com.microsoft.ml.spark:mmlspark:1.0.0-rc4" +config$sparklyr.defaultPackages <- "com.microsoft.azure:synapseml:1.0.0-rc4" sc <- spark_connect(master = "local", config = config) ... ``` @@ -34,7 +34,7 @@ We will then need to import the R wrappers: ```R ... -library(mmlspark) +library(synapseml) ... ``` @@ -83,7 +83,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/mmlspark-1.0.0-rc4.zip") +devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.0-rc4.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") @@ -96,17 +96,17 @@ ml_train_regressor(faithful_df, labelCol="eruptions", unfit_model) Our R bindings are built as part of the [normal build process](developer-readme.md). To get a quick build, start at the root -of the mmlspark directory, and: +of the synapsemldirectory, and: ```bash ./runme TESTS=NONE -unzip ./BuildArtifacts/packages/R/mmlspark-0.0.zip +unzip ./BuildArtifacts/packages/R/synapseml-0.0.zip ``` You can then run R in a terminal and install the above files directly: ```R ... -devtools::install_local("./BuildArtifacts/packages/R/mmlspark") +devtools::install_local("./BuildArtifacts/packages/R/synapseml") ... ``` diff --git a/docs/cogsvc.md b/docs/cogsvc.md index c72828c462..b283ac609a 100644 --- a/docs/cogsvc.md +++ b/docs/cogsvc.md @@ -12,40 +12,40 @@ Azure Cognitive Services on Spark enable working with Azure’s Intelligent Serv To see an example of Cognitive Services on Spark in action, take a look at [this sample notebook](../notebooks/CognitiveServices%20-%20Celebrity%20Quote%20Analysis.ipynb). ## Cognitive Services on Apache Spark™ -Currently, the following Cognitive Services are available on Apache Spark™ through MMLSpark: +Currently, the following Cognitive Services are available on Apache Spark™ through SynapseML: ### Vision [**Computer Vision**](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/) -- Describe: provides description of an image in human readable language ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DescribeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.DescribeImage)) -- Analyze (color, image type, face, adult/racy content): analyzes visual features of an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.AnalyzeImage)) -- OCR: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/OCR.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.OCR)) -- Recognize Text: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/RecognizeText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.RecognizeText)) -- Thumbnail: generates a thumbnail of user-specified size from the image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/GenerateThumbnails.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.GenerateThumbnails)) -- Recognize domain-specific content: recognizes domain-specific content (celebrity, landmark) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/RecognizeDomainSpecificContent.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.RecognizeDomainSpecificContent)) -- Tag: identifies list of words that are relevant to the in0put image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/TagImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.TagImage)) +- Describe: provides description of an image in human readable language ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DescribeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.DescribeImage)) +- Analyze (color, image type, face, adult/racy content): analyzes visual features of an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.AnalyzeImage)) +- OCR: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/OCR.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.OCR)) +- Recognize Text: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/RecognizeText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.RecognizeText)) +- Thumbnail: generates a thumbnail of user-specified size from the image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/GenerateThumbnails.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.GenerateThumbnails)) +- Recognize domain-specific content: recognizes domain-specific content (celebrity, landmark) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/RecognizeDomainSpecificContent.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.RecognizeDomainSpecificContent)) +- Tag: identifies list of words that are relevant to the in0put image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/TagImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.TagImage)) [**Face**](https://azure.microsoft.com/en-us/services/cognitive-services/face/) -- Detect: detects human faces in an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.DetectFace)) -- Verify: verifies whether two faces belong to a same person, or a face belongs to a person ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/VerifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.VerifyFaces)) -- Identify: finds the closest matches of the specific query person face from a person group ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/IdentifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.IdentifyFaces)) -- Find similar: finds similar faces to the query face in a face list ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/FindSimilarFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.FindSimilarFace)) -- Group: divides a group of faces into disjoint groups based on similarity ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/GroupFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.GroupFaces)) +- Detect: detects human faces in an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.DetectFace)) +- Verify: verifies whether two faces belong to a same person, or a face belongs to a person ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/VerifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.VerifyFaces)) +- Identify: finds the closest matches of the specific query person face from a person group ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/IdentifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.IdentifyFaces)) +- Find similar: finds similar faces to the query face in a face list ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/FindSimilarFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.FindSimilarFace)) +- Group: divides a group of faces into disjoint groups based on similarity ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/GroupFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.GroupFaces)) ### Speech [**Speech Services**](https://azure.microsoft.com/en-us/services/cognitive-services/speech-services/) -- Speech-to-text: transcribes audio streams ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/SpeechToText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.SpeechToText)) +- Speech-to-text: transcribes audio streams ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/SpeechToText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.SpeechToText)) ### Language [**Text Analytics**](https://azure.microsoft.com/en-us/services/cognitive-services/text-analytics/) -- Language detection: detects language of the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/LanguageDetector.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.LanguageDetector)) -- Key phrase extraction: identifies the key talking points in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/KeyPhraseExtractor.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.KeyPhraseExtractor)) -- Named entity recognition: identifies known entities and general named entities in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/NER.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.NER)) -- Sentiment analysis: returns a score betwee 0 and 1 indicating the sentiment in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/TextSentiment.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.TextSentiment)) +- Language detection: detects language of the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/LanguageDetector.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.LanguageDetector)) +- Key phrase extraction: identifies the key talking points in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/KeyPhraseExtractor.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.KeyPhraseExtractor)) +- Named entity recognition: identifies known entities and general named entities in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/NER.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.NER)) +- Sentiment analysis: returns a score betwee 0 and 1 indicating the sentiment in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/TextSentiment.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.TextSentiment)) ### Decision [**Anomaly Detector**](https://azure.microsoft.com/en-us/services/cognitive-services/anomaly-detector/) -- Anomaly status of latest point: generates a model using preceding points and determines whether the latest point is anomalous ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectLastAnomaly.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.DetectLastAnomaly)) -- Find anomalies: generates a model using an entire series and finds anomalies in the series ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectAnomalies.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.DetectAnomalies)) +- Anomaly status of latest point: generates a model using preceding points and determines whether the latest point is anomalous ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectLastAnomaly.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.DetectLastAnomaly)) +- Find anomalies: generates a model using an entire series and finds anomalies in the series ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectAnomalies.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.DetectAnomalies)) ### Web Search -- [Bing Image search](https://azure.microsoft.com/en-us/services/cognitive-services/bing-image-search-api/) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/BingImageSearch.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.BingImageSearch)) +- [Bing Image search](https://azure.microsoft.com/en-us/services/cognitive-services/bing-image-search-api/) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/BingImageSearch.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.BingImageSearch)) diff --git a/docs/cyber.md b/docs/cyber.md index c6c86db8e5..8ea7c3c15c 100644 --- a/docs/cyber.md +++ b/docs/cyber.md @@ -1,59 +1,59 @@ -# access anomalies: [complement_access.py](../src/main/python/mmlspark/cyber/anomaly/complement_access.py) +# access anomalies: [complement_access.py](../src/main/python/synapse/ml/cyber/anomaly/complement_access.py) - [Talk at European Spark Conference 2019](https://databricks.com/session_eu19/cybermltoolkit-anomaly-detection-as-a-scalable-generic-service-over-apache-spark) - [(Internal Microsoft) Talk at MLADS November 2018](https://resnet.microsoft.com/video/42395) - [(Internal Microsoft) Talk at MLADS June 2019](https://resnet.microsoft.com/video/43618) -1. [ComplementAccessTransformer](../src/main/python/mmlspark/cyber/anomaly/complement_access.py) +1. [ComplementAccessTransformer](../src/main/python/synapse/ml/cyber/anomaly/complement_access.py) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). Given a dataframe it returns a new dataframe containing new access patterns sampled from the set of possible access patterns which did not occur in the given dataframe (i.e., it returns a sample from the complement set). -# feature engineering: [indexers.py](../src/main/python/mmlspark/cyber/feature/indexers.py) -1. [IdIndexer](../src/main/python/mmlspark/cyber/feature/indexers.py) +# feature engineering: [indexers.py](../src/main/python/synapse/ml/cyber/feature/indexers.py) +1. [IdIndexer](../src/main/python/synapse/ml/cyber/feature/indexers.py) is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html). Given a dataframe, it creates an IdIndexerModel (described next) for categorical features which contains the information to map each partition and column seen in the given dataframe to an id. for each partition or one consecutive range for all partition and column values. -2. [IdIndexerModel](../src/main/python/mmlspark/cyber/feature/indexers.py) +2. [IdIndexerModel](../src/main/python/synapse/ml/cyber/feature/indexers.py) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). Given a dataframe maps each partition and column field to a consecutive integer id. Partitions or column values not encountered in the estimator are mapped to 0. The model can operate in two modes, either create consecutive integer id independently -3. [MultiIndexer](../src/main/python/mmlspark/cyber/feature/indexers.py) +3. [MultiIndexer](../src/main/python/synapse/ml/cyber/feature/indexers.py) is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html). Uses multiple IdIndexer to generate a MultiIndexerModel (described next) for categorical features which contains multiple IdIndexers for multiple partitions and columns. -4. [MultiIndexerModel](../src/main/python/mmlspark/cyber/feature/indexers.py) +4. [MultiIndexerModel](../src/main/python/synapse/ml/cyber/feature/indexers.py) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). Given a dataframe maps each partition and column field to a consecutive integer id. Partitions or column values not encountered in the estimator are mapped to 0. The model can operate in two modes, either create consecutive integer id independently -# feature engineering: [scalers.py](../src/main/python/mmlspark/cyber/feature/scalers.py) -1. [StandardScalarScaler](../src/main/python/mmlspark/cyber/feature/scalers.py) +# feature engineering: [scalers.py](../src/main/python/synapse/ml/cyber/feature/scalers.py) +1. [StandardScalarScaler](../src/main/python/synapse/ml/cyber/feature/scalers.py) is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html). Given a dataframe it creates a StandardScalarScalerModel (described next) which normalizes any given dataframe according to the mean and standard deviation calculated on the dataframe given to the estimator. -2. [StandardScalarScalerModel](../src/main/python/mmlspark/cyber/feature/scalers.py) +2. [StandardScalarScalerModel](../src/main/python/synapse/ml/cyber/feature/scalers.py) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). Given a dataframe with a value column x, the transformer changes its value as follows: x'=(x-mean)/stddev, i.e., if the transformer is given the same dataframe the estimator was given then the value column will have a mean of 0.0 and a standard deviation of 1.0. -3. [MinMaxScalarScaler](../src/main/python/mmlspark/cyber/feature/scalers.py) +3. [MinMaxScalarScaler](../src/main/python/synapse/ml/cyber/feature/scalers.py) is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html). Given a dataframe it creates a MinMaxScalarScalerModel (described next) which normalizes any given dataframe according to the minimum and maximum values calculated on the dataframe given to the estimator. -4. [MinMaxScalarScalerModel](../src/main/python/mmlspark/cyber/feature/scalers.py) +4. [MinMaxScalarScalerModel](../src/main/python/synapse/ml/cyber/feature/scalers.py) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). Given a dataframe with a value column x, the transformer changes its value such that if the transformer is given the same dataframe the estimator was given then the value column will be scaled linearly to the given ranges. -# access anomalies: [collaborative_filtering.py](../src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py) -1. [AccessAnomaly](../src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py) +# access anomalies: [collaborative_filtering.py](../src/main/python/synapse/ml/cyber/anomaly/collaborative_filtering.py) +1. [AccessAnomaly](../src/main/python/synapse/ml/cyber/anomaly/collaborative_filtering.py) is a SparkML [Estimator](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Estimator.html). Given a dataframe the estimator generates an AccessAnomalyModel (next described) which can detect anomalous access of users to resources in such a way where the access @@ -61,14 +61,14 @@ a resource from Finance. This is based solely on access patterns rather than explicit features. Internally this is based on Collaborative Filtering as implemented in Spark using Matrix Factorization with Alternating Least Squares. -2. [AccessAnomalyModel](../src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py) +2. [AccessAnomalyModel](../src/main/python/synapse/ml/cyber/anomaly/collaborative_filtering.py) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). Given a dataframe the transformer computes a value between (-inf, inf) where positive values indicate an anomaly score. Anomaly scores are computed to have a mean of 1.0 and a standard deviation of 1.0 over the original dataframe given to the estimator. -3. [ModelNormalizeTransformer](../src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py) +3. [ModelNormalizeTransformer](../src/main/python/synapse/ml/cyber/anomaly/collaborative_filtering.py) is a SparkML [Transformer](https://spark.apache.org/docs/2.2.0/api/java/index.html?org/apache/spark/ml/Transformer.html). This is a transformer used internally by AccessAnomaly to normalize a model to generate anomaly scores with mean 0.0 and standard deviation of 1.0. -4. [AccessAnomalyConfig](../src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py) +4. [AccessAnomalyConfig](../src/main/python/synapse/ml/cyber/anomaly/collaborative_filtering.py) contains the default values for AccessAnomaly. diff --git a/docs/datasets.md b/docs/datasets.md index 595ae3d409..a64e43943a 100644 --- a/docs/datasets.md +++ b/docs/datasets.md @@ -22,7 +22,7 @@ This dataset can be used to predict sentiment of book reviews. The dataset is a tab-separated file with 2 columns (`rating`, `text`) and 10000 rows. The `rating` column has integer values of 1, 2, 4 or 5, and the `text` column contains free-form text strings in English language. You can use -`mmlspark.TextFeaturizer` to convert the text into feature vectors for machine +`synapse.ml.TextFeaturizer` to convert the text into feature vectors for machine learning models ([see example](../notebooks/201%20-%20Amazon%20Book%20Reviews%20-%20TextFeaturizer.ipynb)). diff --git a/docs/developer-readme.md b/docs/developer-readme.md index 85dad08c62..4ba7738bbf 100644 --- a/docs/developer-readme.md +++ b/docs/developer-readme.md @@ -1,24 +1,24 @@ -# MMLSpark Development Setup +# SynapseML Development Setup 1) [Install SBT](https://www.scala-sbt.org/1.x/docs/Setup.html) - Make sure to download JDK 11 if you don't have it 3) Fork the repository on github - This is required if you would like to make PRs. If you choose the fork option, replace the clone link below with that of your fork. 2) Git Clone your fork, or the repo directly - - `git clone https://github.com/Azure/mmlspark.git` - - NOTE: If you would like to contribute to mmlspark regularly, add your fork as a remote named ``origin`` and Azure/mmlspark as a remote named ``upstream`` + - `git clone https://github.com/Microsoft/SynapseML.git` + - NOTE: If you would like to contribute to synapseml regularly, add your fork as a remote named ``origin`` and Microsoft/SynapseML as a remote named ``upstream`` 3) Run sbt to compile and grab datasets - - `cd mmlspark` + - `cd synapseml` - `sbt setup` 4) [Install IntelliJ](https://www.jetbrains.com/idea/download) - Install Scala plugins during install 5) Configure IntelliJ - - **OPEN** the mmlspark directory + - **OPEN** the synapseml directory - If the project does not automatically import,click on `build.sbt` and import project # Publishing and Using Build Secrets -To use secrets in the build you must be part of the mmlspark keyvault +To use secrets in the build you must be part of the synapsemlkeyvault and azure subscription. If you are MSFT internal would like to be added please reach out `mmlspark-support@microsoft.com` @@ -32,7 +32,7 @@ Compiles the main, test, and integration test classes respectively ### `test` -Runs all mmlspark tests +Runs all synapsemltests ### `scalastyle` @@ -46,12 +46,12 @@ Generates documentation for scala sources ### `createCondaEnv` -Creates a conda environment `mmlspark` from `environment.yaml` if it does not already exist. +Creates a conda environment `synapseml` from `environment.yaml` if it does not already exist. This env is used for python testing. **Activate this env before using python build commands.** ### `cleanCondaEnv` -Removes `mmlspark` conda env +Removes `synapseml` conda env ### `packagePython` @@ -85,7 +85,7 @@ Packages the library into a jar ### `publishBlob` -Publishes Jar to mmlspark's azure blob based maven repo. (Requires Keys) +Publishes Jar to synapseml's azure blob based maven repo. (Requires Keys) ### `publishLocal` @@ -93,7 +93,7 @@ Publishes library to local maven repo ### `publishDocs` -Publishes scala and python doc to mmlspark's build azure storage account. (Requires Keys) +Publishes scala and python doc to synapseml's build azure storage account. (Requires Keys) ### `publishSigned` diff --git a/docs/docker.md b/docs/docker.md index 653a0bd96f..57a8a9fb46 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -1,9 +1,9 @@ -# Using the MMLSpark Docker Image +# Using the SynapseML Docker Image ## Quickstart: install and run the Docker image Begin by installing [Docker for your OS][docker-products]. Then, to get the -MMLSpark image and run it, open a terminal (powershell/cmd on Windows) and run +SynapseML image and run it, open a terminal (powershell/cmd on Windows) and run ```bash docker run -it -p 8888:8888 mcr.microsoft.com/mmlspark/release @@ -19,7 +19,7 @@ docker run -it -p 8888:8888 -e ACCEPT_EULA=y mcr.microsoft.com/mmlspark/release You can now select one of the sample notebooks and run it, or create your own. -> Note: The EULA is needed only for running the MMLSpark Docker image; the +> Note: The EULA is needed only for running the SynapseML Docker image; the > source code is released under the MIT license (see the [LICENSE](../LICENSE) > file). @@ -32,7 +32,7 @@ version) that you want to use — specifying it explicitly looks like Leaving `mcr.microsoft.com/mmlspark/release` by itself has an implicit `latest` tag, so it is equivalent to `mcr.microsoft.com/mmlspark/release:latest`. The `latest` tag is identical to the -most recent stable MMLSpark version. You can see the current [mmlspark tags] on +most recent stable SynapseML version. You can see the current [synapsemltags] on our [Docker Hub repository][mmlspark-dockerhub]. ## A more practical example @@ -91,7 +91,7 @@ Let's break this command and go over the meaning of each part: - **`-p 127.0.0.1:80:8888`** - The Jupyter server in the MMLSpark image listens to port 8888 — but that is + The Jupyter server in the SynapseML image listens to port 8888 — but that is normally isolated from the actual network. Previously, we have used `-p 8888:8888` to say that we want to map port 8888 (LHS) on our actual machine to port 8888 (RHS) in the container. One problem with this is that `8888` might @@ -120,7 +120,7 @@ Let's break this command and go over the meaning of each part: the drive you want to use in the [Docker settings]. The path on the right side is used inside the container and it is therefore a - Linux path. The MMLSpark image runs Jupyter in the `/notebooks` directory, so + Linux path. The SynapseML image runs Jupyter in the `/notebooks` directory, so it is a good place for making your files available conveniently. This flag can be used more than once, to make several directories available in @@ -150,16 +150,16 @@ additional flag that is useful for this is `--name` that gives a convenient label to the running image: ```bash -docker run -d --name my-mmlspark ...flags... mcr.microsoft.com/mmlspark/release +docker run -d --name my-synapseml ...flags... mcr.microsoft.com/mmlspark/release ``` When running in this mode, you can use -- `docker stop my-mmlspark`: to stop the image +- `docker stop my-synapseml`: to stop the image -- `docker start my-mmlspark`: to start it again +- `docker start my-synapseml`: to start it again -- `docker logs my-mmlspark`: to see the log output it produced +- `docker logs my-synapseml`: to see the log output it produced ## Running other commands in an active container @@ -169,7 +169,7 @@ and the command to run. For example, with a detached container started as above, you can use ```bash -docker exec -it my-mmlspark bash +docker exec -it my-synapseml bash ``` to start a shell in the context of the server, roughly equivalent to starting a @@ -178,8 +178,8 @@ terminal in the Jupyter interface. Other common Linux executables can be used, e.g., ```bash -docker exec -it my-mmlspark top -docker exec my-mmlspark ps auxw +docker exec -it my-synapseml top +docker exec my-synapseml ps auxw ``` (Note that `ps` does not need `-it` since it's not an interactive command.) @@ -192,7 +192,7 @@ also get the container IDs and use those instead of names. Remember that the command given to `docker exec` is running in the context of the running container: you can only run executables that exist in the container, and the run is subject to the same resource restrictions (FS/network access, -etc) as the container. The MMLSpark image is based on a rather basic Ubuntu +etc) as the container. The SynapseML image is based on a rather basic Ubuntu installation (the `ubuntu` image from Docker Hub). ## Running other Spark executables @@ -208,7 +208,7 @@ docker run -it ...flags... mcr.microsoft.com/mmlspark/release bash This starts the container with bash instead of Jupyter. This environment has all of the Spark executables available in its `$PATH`. You still need to -specify the command-line flags that load the MMLSpark package, but there are +specify the command-line flags that load the SynapseML package, but there are convenient environment variables that hold the required package and repositories to use: @@ -219,9 +219,9 @@ pyspark --repositories "$MML_M2REPOS" --packages "$MML_PACKAGE" --master "local[ Many of the above listed flags are useful in this case too, such as mapping work directories with `-v`. -## Updating the MMLSpark image +## Updating the SynapseML image -New releases of MMLSpark are published from time to time, and they include a new +New releases of SynapseML are published from time to time, and they include a new Docker image. As an image consumer, you will normlly not notice such new versions: `docker run` will download an image if a copy of it does not exist locally, but if it does, then `docker run` will blindly run it, _without_ @@ -265,7 +265,7 @@ their tags, and `docker rmi :` to remove the unwanted ones. ## A note about security Executing code in a Docker container can be unsafe if the running user is -`root`. For this reason, the MMLSpark image uses a proper username instead. If +`root`. For this reason, the SynapseML image uses a proper username instead. If you still want to run as root (e.g., if you want to `apt install` an additional ubuntu package), then you should use `--user root`. This can be useful when combined with `docker exec` too do such administrative work while the image @@ -274,7 +274,7 @@ continues to run as usual. ## Further reading This text covers very briefly some of the useful things that you can do with the -MMLSpark Docker image (and other images in general). You can find much more +SynapseML Docker image (and other images in general). You can find much more documentation [online](https://docs.docker.com/). [docker-products]: http://www.docker.com/products/overview/ diff --git a/docs/http.md b/docs/http.md index a28c2991bd..d38a64af60 100644 --- a/docs/http.md +++ b/docs/http.md @@ -24,8 +24,8 @@ ### Send a JSON POST request ```python -import mmlspark -from mmlspark.io.http import SimpleHTTPTransformer, JSONOutputParser +import synapse.ml +from synapse.ml.io.http import SimpleHTTPTransformer, JSONOutputParser from pyspark.sql.types import StructType, StringType df = sc.parallelize([(x, ) for x in range(100)]).toDF("data") @@ -78,7 +78,7 @@ SimpleHTTPTransformer() \ HTTP on Spark encapsulates the entire HTTP protocol within Spark's datatypes. Uses can create flexible web clients that communicate with a -wide variety of endpoints. MMLSpark provides methods to convert between +wide variety of endpoints. SynapseML provides methods to convert between Scala case classes, Spark types, and Apache HTTP Core types. A common representation makes it easy to work with HTTP on spark from Scala, Python, or any other spark compatible language. This common @@ -95,7 +95,7 @@ requests. A schematic representation can be seen below: ## Schema This library adds Spark types that faithfully represent the HTTP -protocol for requests and responses. MMLSpark provides several ways to +protocol for requests and responses. SynapseML provides several ways to create these objects from the apache HTTP core library, and from a set of case classes. diff --git a/docs/lightgbm.md b/docs/lightgbm.md index 87d5c366f2..0541d23cbf 100644 --- a/docs/lightgbm.md +++ b/docs/lightgbm.md @@ -30,7 +30,7 @@ many other machine learning tasks. LightGBM is part of Microsoft's In PySpark, you can run the `LightGBMClassifier` via: ```python -from mmlspark.lightgbm import LightGBMClassifier +from synapse.ml.lightgbm import LightGBMClassifier model = LightGBMClassifier(learningRate=0.3, numIterations=100, numLeaves=31).fit(train) @@ -40,7 +40,7 @@ Similarly, you can run the `LightGBMRegressor` by setting the `application` and `alpha` parameters: ```python -from mmlspark.lightgbm import LightGBMRegressor +from synapse.ml.lightgbm import LightGBMRegressor model = LightGBMRegressor(application='quantile', alpha=0.3, learningRate=0.3, diff --git a/docs/mmlspark-serving.md b/docs/mmlspark-serving.md index 9471644805..4c30bfd30f 100644 --- a/docs/mmlspark-serving.md +++ b/docs/mmlspark-serving.md @@ -31,7 +31,7 @@ ### Spark Serving Hello World ```python -import mmlspark +import synapse.ml import pyspark from pyspark.sql.functions import udf, col, length from pyspark.sql.types import * @@ -56,8 +56,8 @@ server = replies\ ### Deploying a Deep Network with the CNTKModel ```python -import mmlspark -from mmlspark.cntk import CNTKModel +import synapse.ml +from synapse.ml.cntk import CNTKModel import pyspark from pyspark.sql.functions import udf, col @@ -102,7 +102,7 @@ You can deploy head node load balancing with the `HTTPSource` and distributes work across partitions, then collects response data back to the head node. All HTTP requests are kept and replied to on the head node. In both python and Scala these classes can be access by using -`spark.readStream.server()` after importing MMLSpark. +`spark.readStream.server()` after importing SynapseML. This mode allows for more complex windowing, repartitioning, and SQL operations. This option is also idea for rapid setup and testing, as it doesn't require any additional load balancing or network @@ -118,7 +118,7 @@ You can configure Spark Serving for a custom load balancer using the `DistributedHTTPSource` and `DistributedHTTPSink` classes. This mode spins up servers on each executor JVM. In both python and Scala these classes can be access by using -`spark.readStream.distributedServer()` after importing MMLSpark. +`spark.readStream.distributedServer()` after importing SynapseML. Each server will feed its executor's partitions in parallel. This mode is key for high throughput and low latency as data does not need to be transferred to and from the @@ -126,7 +126,7 @@ head node. This deployment results in several web services that all route into the same spark computation. You can deploy an external load balancer to unify the executor's services under a single IP address. Support for automatic load balancer management and deployment is -targeted for the next release of MMLSpark. A diagram of this +targeted for the next release of SynapseML. A diagram of this configuration can be seen below:

diff --git a/docs/onnx.md b/docs/onnx.md index 6c06e3a8d8..57c1cd06ff 100644 --- a/docs/onnx.md +++ b/docs/onnx.md @@ -9,11 +9,11 @@ description: Learn how to use the ONNX model transformer to run inference for an [ONNX](https://onnx.ai/) is an open format to represent both deep learning and traditional machine learning models. With ONNX, AI developers can more easily move models between state-of-the-art tools and choose the combination that is best for them. -MMLSpark now includes a Spark transformer to bring an trained ONNX model to Apache Spark, so you can run inference on your data with Spark's large-scale data processing power. +SynapseML now includes a Spark transformer to bring an trained ONNX model to Apache Spark, so you can run inference on your data with Spark's large-scale data processing power. ## Usage -1. Create a `com.microsoft.ml.spark.onnx.ONNXModel` object and use `setModelLocation` or `setModelPayload` to load the ONNX model. +1. Create a `com.microsoft.azure.synapse.ml.onnx.ONNXModel` object and use `setModelLocation` or `setModelPayload` to load the ONNX model. For example: @@ -27,7 +27,7 @@ MMLSpark now includes a Spark transformer to bring an trained ONNX model to Apac 3. Set the parameters properly to the `ONNXModel` object. - The `com.microsoft.ml.spark.onnx.ONNXModel` class provides a set of parameters to control the behavior of the inference. + The `com.microsoft.azure.synapse.ml.onnx.ONNXModel` class provides a set of parameters to control the behavior of the inference. | Parameter | Description | Default Value | |:------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------| diff --git a/docs/vagrant.md b/docs/vagrant.md index 52763a9040..4c8a1ac823 100644 --- a/docs/vagrant.md +++ b/docs/vagrant.md @@ -1,4 +1,4 @@ -# Using the MMLSpark Vagrant Image +# Using the SynapseML Vagrant Image ## Install Vagrant and Dependencies @@ -10,7 +10,7 @@ You will need to a few dependencies before we get started. These instructions ar ## Build the Vagrant Image -Start powershell as Administrator and go to the `mmlspark/tools/vagrant` directory and run +Start powershell as Administrator and go to the `synapseml/tools/vagrant` directory and run vagrant up diff --git a/docs/vw.md b/docs/vw.md index ddb0b7f692..2a3a36ace3 100644 --- a/docs/vw.md +++ b/docs/vw.md @@ -42,7 +42,7 @@ Furthermore it includes many advances in the area of reinforcement learning (e.g In PySpark, you can run the `VowpalWabbitClassifier` via: ```python -from mmlspark.vw import VowpalWabbitClassifier +from synapse.ml.vw import VowpalWabbitClassifier model = (VowpalWabbitClassifier(numPasses=5, args="--holdout_off --loss_function logistic") .fit(train)) ``` @@ -50,7 +50,7 @@ model = (VowpalWabbitClassifier(numPasses=5, args="--holdout_off --loss_function Similarly, you can run the `VowpalWabbitRegressor`: ```python -from mmlspark.vw import VowpalWabbitRegressor +from synapse.ml.vw import VowpalWabbitRegressor model = (VowpalWabbitRegressor(args="--holdout_off --loss_function quantile -q :: -l 0.1") .fit(train)) ``` @@ -62,7 +62,7 @@ example](../notebooks/Vowpal%20Wabbit%20-%20Quantile%20Regression%20for%20Drug%2 ### Hyper-parameter tuning -- Common parameters can also be set through methods enabling the use of SparkMLs ParamGridBuilder and CrossValidator ([example](https://github.com/Azure/mmlspark/blob/master/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitClassifier.scala#L29)). Note if +- Common parameters can also be set through methods enabling the use of SparkMLs ParamGridBuilder and CrossValidator ([example](https://github.com/Microsoft/SynapseML/blob/master/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitClassifier.scala#L29)). Note if the same parameters are passed through _args_ property (e.g. args="-l 0.2" and setLearningRate(0.5)) the _args_ value will take precedence. parameter @@ -87,7 +87,7 @@ To fluently embed VW into the Spark ML eco system the following adaptions were m - Pro: best composability with existing Spark ML components. - Cons: due to type restrictions (e.g. feature indicies are Java integers) the maximum model size is limited to 30-bits. One could overcome this restriction by adding additional type support to the classifier/regressor to directly operate on input features (e.g. strings, int, double, ...). -- VW hashing is separated out into the [VowpalWabbitFeaturizer](https://github.com/Azure/mmlspark/blob/master/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitFeaturizer.scala#L34) transformer. It supports mapping Spark Dataframe schema into VWs namespaces and sparse +- VW hashing is separated out into the [VowpalWabbitFeaturizer](https://github.com/Microsoft/SynapseML/blob/master/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitFeaturizer.scala#L34) transformer. It supports mapping Spark Dataframe schema into VWs namespaces and sparse features. - Pro: featurization can be scaled to many nodes, scale independent of distributed learning. - Pro: hashed features can be cached and efficiently re-used when performing hyper-parameter sweeps. diff --git a/docs/your-first-model.md b/docs/your-first-model.md index 6180791084..978ea95629 100644 --- a/docs/your-first-model.md +++ b/docs/your-first-model.md @@ -6,7 +6,7 @@ We also learn how to use Jupyter notebooks for developing and running the model. ### Prerequisites -- You have installed the MMLSpark package, either as a Docker image or on a +- You have installed the SynapseML package, either as a Docker image or on a Spark cluster, - You have basic knowledge of Python language, - You have basic understanding of machine learning concepts: training, testing, @@ -14,7 +14,7 @@ We also learn how to use Jupyter notebooks for developing and running the model. ### Working with Jupyter Notebooks -Once you have the MMLSpark package installed, open Jupyter notebooks folder in +Once you have the SynapseML package installed, open Jupyter notebooks folder in your web browser - Local Docker: `http://localhost:8888` @@ -69,12 +69,12 @@ train, test = data.randomSplit([0.75, 0.25], seed=123) ### Training a Model -To train the classifier model, we use the `mmlspark.TrainClassifier` class. It +To train the classifier model, we use the `synapseml.TrainClassifier` class. It takes in training data and a base SparkML classifier, maps the data into the format expected by the base classifier algorithm, and fits a model. ```python -from mmlspark.train import TrainClassifier +from synapse.ml.train import TrainClassifier from pyspark.ml.classification import LogisticRegression model = TrainClassifier(model=LogisticRegression(), labelCol=" income").fit(train) ``` @@ -85,22 +85,22 @@ binarizes the label column. ### Scoring and Evaluating the Model Finally, let's score the model against the test set, and use -`mmlspark.ComputeModelStatistics` class to compute metrics — accuracy, AUC, +`synapseml.ComputeModelStatistics` class to compute metrics — accuracy, AUC, precision, recall — from the scored data. ```python -from mmlspark.train import ComputeModelStatistics +from synapse.ml.train import ComputeModelStatistics prediction = model.transform(test) metrics = ComputeModelStatistics().transform(prediction) metrics.select('accuracy').show() ``` -And that's it: you've build your first machine learning model using the MMLSpark -package. For help on mmlspark classes and methods, you can use Python's help() +And that's it: you've build your first machine learning model using the SynapseML +package. For help on synapsemlclasses and methods, you can use Python's help() function, for example ```python -help(mmlspark.train.TrainClassifier) +help(synapse.ml.train.TrainClassifier) ``` Next, view our other tutorials to learn how to diff --git a/environment.yaml b/environment.yaml index 50602326d6..f71705581b 100644 --- a/environment.yaml +++ b/environment.yaml @@ -1,4 +1,4 @@ -name: mmlspark +name: synapseml channels: - conda-forge - default @@ -11,6 +11,7 @@ dependencies: - r-dplyr - r-sparklyr - r-devtools + - r-roxygen2 - pip: - wheel - sphinx diff --git a/lightgbm/src/main/python/mmlspark/lightgbm/LightGBMClassificationModel.py b/lightgbm/src/main/python/synapse/ml/lightgbm/LightGBMClassificationModel.py similarity index 74% rename from lightgbm/src/main/python/mmlspark/lightgbm/LightGBMClassificationModel.py rename to lightgbm/src/main/python/synapse/ml/lightgbm/LightGBMClassificationModel.py index f174c519e8..ae3f186678 100644 --- a/lightgbm/src/main/python/mmlspark/lightgbm/LightGBMClassificationModel.py +++ b/lightgbm/src/main/python/synapse/ml/lightgbm/LightGBMClassificationModel.py @@ -1,12 +1,12 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See LICENSE in project root for information. -from mmlspark.lightgbm._LightGBMClassificationModel import _LightGBMClassificationModel -from mmlspark.lightgbm.mixin import LightGBMModelMixin +from synapse.ml.lightgbm._LightGBMClassificationModel import _LightGBMClassificationModel +from synapse.ml.lightgbm.mixin import LightGBMModelMixin from pyspark import SparkContext from pyspark.ml.common import inherit_doc from pyspark.ml.wrapper import JavaParams -from mmlspark.core.serialize.java_params_patch import * +from synapse.ml.core.serialize.java_params_patch import * @inherit_doc @@ -17,7 +17,7 @@ def loadNativeModelFromFile(filename): Load the model from a native LightGBM text file. """ ctx = SparkContext._active_spark_context - loader = ctx._jvm.com.microsoft.ml.spark.lightgbm.LightGBMClassificationModel + loader = ctx._jvm.com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassificationModel java_model = loader.loadNativeModelFromFile(filename) return JavaParams._from_java(java_model) @@ -27,7 +27,7 @@ def loadNativeModelFromString(model): Load the model from a native LightGBM model string. """ ctx = SparkContext._active_spark_context - loader = ctx._jvm.com.microsoft.ml.spark.lightgbm.LightGBMClassificationModel + loader = ctx._jvm.com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassificationModel java_model = loader.loadNativeModelFromString(model) return JavaParams._from_java(java_model) diff --git a/lightgbm/src/main/python/mmlspark/lightgbm/LightGBMRankerModel.py b/lightgbm/src/main/python/synapse/ml/lightgbm/LightGBMRankerModel.py similarity index 76% rename from lightgbm/src/main/python/mmlspark/lightgbm/LightGBMRankerModel.py rename to lightgbm/src/main/python/synapse/ml/lightgbm/LightGBMRankerModel.py index cfa514d370..8a108a76db 100644 --- a/lightgbm/src/main/python/mmlspark/lightgbm/LightGBMRankerModel.py +++ b/lightgbm/src/main/python/synapse/ml/lightgbm/LightGBMRankerModel.py @@ -1,12 +1,12 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See LICENSE in project root for information. -from mmlspark.lightgbm._LightGBMRankerModel import _LightGBMRankerModel -from mmlspark.lightgbm.mixin import LightGBMModelMixin +from synapse.ml.lightgbm._LightGBMRankerModel import _LightGBMRankerModel +from synapse.ml.lightgbm.mixin import LightGBMModelMixin from pyspark import SparkContext from pyspark.ml.common import inherit_doc from pyspark.ml.wrapper import JavaParams -from mmlspark.core.serialize.java_params_patch import * +from synapse.ml.core.serialize.java_params_patch import * @inherit_doc @@ -17,7 +17,7 @@ def loadNativeModelFromFile(filename): Load the model from a native LightGBM text file. """ ctx = SparkContext._active_spark_context - loader = ctx._jvm.com.microsoft.ml.spark.lightgbm.LightGBMRankerModel + loader = ctx._jvm.com.microsoft.azure.synapse.ml.lightgbm.LightGBMRankerModel java_model = loader.loadNativeModelFromFile(filename) return JavaParams._from_java(java_model) @@ -27,7 +27,7 @@ def loadNativeModelFromString(model): Load the model from a native LightGBM model string. """ ctx = SparkContext._active_spark_context - loader = ctx._jvm.com.microsoft.ml.spark.lightgbm.LightGBMRankerModel + loader = ctx._jvm.com.microsoft.azure.synapse.ml.lightgbm.LightGBMRankerModel java_model = loader.loadNativeModelFromString(model) return JavaParams._from_java(java_model) diff --git a/lightgbm/src/main/python/mmlspark/lightgbm/LightGBMRegressionModel.py b/lightgbm/src/main/python/synapse/ml/lightgbm/LightGBMRegressionModel.py similarity index 73% rename from lightgbm/src/main/python/mmlspark/lightgbm/LightGBMRegressionModel.py rename to lightgbm/src/main/python/synapse/ml/lightgbm/LightGBMRegressionModel.py index dd9fbf43b7..fad265372f 100644 --- a/lightgbm/src/main/python/mmlspark/lightgbm/LightGBMRegressionModel.py +++ b/lightgbm/src/main/python/synapse/ml/lightgbm/LightGBMRegressionModel.py @@ -1,12 +1,12 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See LICENSE in project root for information. -from mmlspark.lightgbm._LightGBMRegressionModel import _LightGBMRegressionModel -from mmlspark.lightgbm.mixin import LightGBMModelMixin +from synapse.ml.lightgbm._LightGBMRegressionModel import _LightGBMRegressionModel +from synapse.ml.lightgbm.mixin import LightGBMModelMixin from pyspark import SparkContext from pyspark.ml.common import inherit_doc from pyspark.ml.wrapper import JavaParams -from mmlspark.core.serialize.java_params_patch import * +from synapse.ml.core.serialize.java_params_patch import * @inherit_doc class LightGBMRegressionModel(LightGBMModelMixin, _LightGBMRegressionModel): @@ -16,7 +16,7 @@ def loadNativeModelFromFile(filename): Load the model from a native LightGBM text file. """ ctx = SparkContext._active_spark_context - loader = ctx._jvm.com.microsoft.ml.spark.lightgbm.LightGBMRegressionModel + loader = ctx._jvm.com.microsoft.azure.synapse.ml.lightgbm.LightGBMRegressionModel java_model = loader.loadNativeModelFromFile(filename) return JavaParams._from_java(java_model) @@ -26,6 +26,6 @@ def loadNativeModelFromString(model): Load the model from a native LightGBM model string. """ ctx = SparkContext._active_spark_context - loader = ctx._jvm.com.microsoft.ml.spark.lightgbm.LightGBMRegressionModel + loader = ctx._jvm.com.microsoft.azure.synapse.ml.lightgbm.LightGBMRegressionModel java_model = loader.loadNativeModelFromString(model) return JavaParams._from_java(java_model) diff --git a/lightgbm/src/main/python/mmlspark/lightgbm/__init__.py b/lightgbm/src/main/python/synapse/ml/lightgbm/__init__.py similarity index 100% rename from lightgbm/src/main/python/mmlspark/lightgbm/__init__.py rename to lightgbm/src/main/python/synapse/ml/lightgbm/__init__.py diff --git a/lightgbm/src/main/python/mmlspark/lightgbm/mixin.py b/lightgbm/src/main/python/synapse/ml/lightgbm/mixin.py similarity index 97% rename from lightgbm/src/main/python/mmlspark/lightgbm/mixin.py rename to lightgbm/src/main/python/synapse/ml/lightgbm/mixin.py index 248d807646..c2b58e1a1f 100644 --- a/lightgbm/src/main/python/mmlspark/lightgbm/mixin.py +++ b/lightgbm/src/main/python/synapse/ml/lightgbm/mixin.py @@ -3,7 +3,7 @@ from pyspark.ml.linalg import SparseVector, DenseVector from pyspark.ml.common import inherit_doc -from mmlspark.core.serialize.java_params_patch import * +from synapse.ml.core.serialize.java_params_patch import * @inherit_doc class LightGBMModelMixin: diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMBase.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMBase.scala similarity index 96% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMBase.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMBase.scala index c916952491..f815c3f7e0 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMBase.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMBase.scala @@ -1,19 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm - -import com.microsoft.ml.lightgbm.{SWIGTYPE_p_int, lightgbmlib} -import com.microsoft.ml.spark.core.utils.ClusterUtil -import com.microsoft.ml.spark.io.http.SharedSingleton -import com.microsoft.ml.spark.lightgbm.ConnectionState.Finished -import com.microsoft.ml.spark.lightgbm.LightGBMUtils.{closeConnections, handleConnection, sendDataToExecutors} -import com.microsoft.ml.spark.lightgbm.TaskTrainingMethods.{isWorkerEnabled, prepareDatasets} -import com.microsoft.ml.spark.lightgbm.TrainUtils._ -import com.microsoft.ml.spark.lightgbm.booster.LightGBMBooster -import com.microsoft.ml.spark.lightgbm.dataset._ -import com.microsoft.ml.spark.lightgbm.params._ -import com.microsoft.ml.spark.logging.BasicLogging +package com.microsoft.azure.synapse.ml.lightgbm + +import com.microsoft.azure.synapse.ml.core.utils.ClusterUtil +import com.microsoft.azure.synapse.ml.io.http.SharedSingleton +import com.microsoft.azure.synapse.ml.lightgbm.ConnectionState.Finished +import com.microsoft.azure.synapse.ml.lightgbm.LightGBMUtils.{closeConnections, handleConnection, sendDataToExecutors} +import com.microsoft.azure.synapse.ml.lightgbm.TaskTrainingMethods.{isWorkerEnabled, prepareDatasets} +import com.microsoft.azure.synapse.ml.lightgbm.TrainUtils._ +import com.microsoft.azure.synapse.ml.lightgbm.booster.LightGBMBooster +import com.microsoft.azure.synapse.ml.lightgbm.dataset.{BaseAggregatedColumns, DatasetUtils, LightGBMDataset} +import com.microsoft.azure.synapse.ml.lightgbm.params._ +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.ml.lightgbm.lightgbmlib import org.apache.spark.broadcast.Broadcast import org.apache.spark.ml.attribute._ import org.apache.spark.ml.linalg.SQLDataTypes.VectorType diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMClassifier.scala similarity index 96% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMClassifier.scala index 312d624642..350c61dc7d 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMClassifier.scala @@ -1,17 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm +package com.microsoft.azure.synapse.ml.lightgbm -import com.microsoft.ml.spark.lightgbm.booster.LightGBMBooster -import com.microsoft.ml.spark.lightgbm.params.{ClassifierTrainParams, LightGBMModelParams, - LightGBMPredictionParams, TrainParams} -import com.microsoft.ml.spark.logging.BasicLogging -import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable} -import org.apache.spark.ml.param._ -import org.apache.spark.ml.util._ +import com.microsoft.azure.synapse.ml.lightgbm.booster.LightGBMBooster +import com.microsoft.azure.synapse.ml.lightgbm.params.{ + ClassifierTrainParams, LightGBMModelParams, LightGBMPredictionParams, TrainParams} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.classification.{ProbabilisticClassificationModel, ProbabilisticClassifier} import org.apache.spark.ml.linalg.{Vector, Vectors} +import org.apache.spark.ml.param._ +import org.apache.spark.ml.util._ +import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable} import org.apache.spark.sql._ import org.apache.spark.sql.functions.{col, udf} diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.txt b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMClassifier.txt similarity index 100% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.txt rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMClassifier.txt diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMConstants.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMConstants.scala similarity index 97% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMConstants.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMConstants.scala index 97989dcccb..48fefced40 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMConstants.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMConstants.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm +package com.microsoft.azure.synapse.ml.lightgbm object LightGBMConstants { /** The port for LightGBM Driver server, 0 (random) diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMDelegate.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMDelegate.scala similarity index 93% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMDelegate.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMDelegate.scala index 956de2c348..ca954effc6 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMDelegate.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMDelegate.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm +package com.microsoft.azure.synapse.ml.lightgbm -import com.microsoft.ml.spark.lightgbm.booster.LightGBMBooster -import com.microsoft.ml.spark.lightgbm.params.TrainParams +import com.microsoft.azure.synapse.ml.lightgbm.booster.LightGBMBooster +import com.microsoft.azure.synapse.ml.lightgbm.params.TrainParams import org.apache.spark.sql.Dataset import org.apache.spark.sql.types.StructType import org.slf4j.Logger diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMModelMethods.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMModelMethods.scala similarity index 97% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMModelMethods.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMModelMethods.scala index d65cfb8b77..47e9979695 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMModelMethods.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMModelMethods.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm +package com.microsoft.azure.synapse.ml.lightgbm -import com.microsoft.ml.spark.lightgbm.params.LightGBMModelParams +import com.microsoft.azure.synapse.ml.lightgbm.params.LightGBMModelParams import org.apache.spark.internal.Logging import org.apache.spark.ml.linalg.{Vector, Vectors} diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRanker.scala similarity index 95% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRanker.scala index 037bec175f..a7fdb1986f 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRanker.scala @@ -1,19 +1,18 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm +package com.microsoft.azure.synapse.ml.lightgbm -import com.microsoft.ml.spark.lightgbm.booster.LightGBMBooster -import com.microsoft.ml.spark.lightgbm.params.{LightGBMModelParams, LightGBMPredictionParams, - RankerTrainParams, TrainParams} -import com.microsoft.ml.spark.logging.BasicLogging -import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Ranker, RankerModel} +import com.microsoft.azure.synapse.ml.lightgbm.booster.LightGBMBooster +import com.microsoft.azure.synapse.ml.lightgbm.params.{ + LightGBMModelParams, LightGBMPredictionParams, RankerTrainParams, TrainParams} +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ -import org.apache.spark.ml.linalg.Vector +import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Ranker, RankerModel} import org.apache.spark.sql._ import org.apache.spark.sql.functions.{col, udf} -import org.apache.spark.sql.types.DataType object LightGBMRanker extends DefaultParamsReadable[LightGBMRanker] diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.txt b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRanker.txt similarity index 100% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.txt rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRanker.txt diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRegressor.scala similarity index 95% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRegressor.scala index c0333e3e29..b99f689ab0 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRegressor.scala @@ -1,17 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm +package com.microsoft.azure.synapse.ml.lightgbm -import com.microsoft.ml.spark.lightgbm.booster.LightGBMBooster -import com.microsoft.ml.spark.lightgbm.params.{LightGBMModelParams, LightGBMPredictionParams, - RegressorTrainParams, TrainParams} -import com.microsoft.ml.spark.logging.BasicLogging -import org.apache.spark.ml.{BaseRegressor, ComplexParamsReadable, ComplexParamsWritable} -import org.apache.spark.ml.param._ -import org.apache.spark.ml.util._ +import com.microsoft.azure.synapse.ml.lightgbm.booster.LightGBMBooster +import com.microsoft.azure.synapse.ml.lightgbm.params.{ + LightGBMModelParams, LightGBMPredictionParams, RegressorTrainParams, TrainParams} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.linalg.Vector +import org.apache.spark.ml.param._ import org.apache.spark.ml.regression.RegressionModel +import org.apache.spark.ml.util._ +import org.apache.spark.ml.{BaseRegressor, ComplexParamsReadable, ComplexParamsWritable} import org.apache.spark.sql._ import org.apache.spark.sql.functions.{col, udf} diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.txt b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRegressor.txt similarity index 100% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.txt rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRegressor.txt diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMUtils.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMUtils.scala similarity index 95% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMUtils.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMUtils.scala index 58d5e2eebb..f26c02533c 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMUtils.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMUtils.scala @@ -1,13 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm +package com.microsoft.azure.synapse.ml.lightgbm +import com.microsoft.azure.synapse.ml.core.env.NativeLoader +import com.microsoft.azure.synapse.ml.featurize.{Featurize, FeaturizeUtilities} +import com.microsoft.azure.synapse.ml.lightgbm.ConnectionState._ import com.microsoft.ml.lightgbm._ -import com.microsoft.ml.spark.core.env.NativeLoader -import com.microsoft.ml.spark.featurize.{Featurize, FeaturizeUtilities} -import com.microsoft.ml.spark.lightgbm.ConnectionState._ -import com.microsoft.ml.spark.lightgbm.params.TrainParams import org.apache.spark.ml.PipelineModel import org.apache.spark.sql.Dataset import org.apache.spark.{SparkEnv, TaskContext} diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/SharedState.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/SharedState.scala similarity index 92% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/SharedState.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/SharedState.scala index d61337e19c..45d103730b 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/SharedState.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/SharedState.scala @@ -1,18 +1,17 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm +package com.microsoft.azure.synapse.ml.lightgbm -import java.util.concurrent.CountDownLatch - -import com.microsoft.ml.spark.lightgbm.dataset.DatasetUtils._ -import com.microsoft.ml.spark.lightgbm.dataset._ -import com.microsoft.ml.spark.lightgbm.params.TrainParams -import org.apache.spark.ml.linalg.{DenseVector, SparseVector} +import com.microsoft.azure.synapse.ml.lightgbm.dataset.DatasetUtils._ +import com.microsoft.azure.synapse.ml.lightgbm.dataset._ +import com.microsoft.azure.synapse.ml.lightgbm.params.TrainParams import org.apache.spark.sql.Row import org.apache.spark.sql.types.StructType import org.slf4j.Logger +import java.util.concurrent.CountDownLatch + class SharedState(columnParams: ColumnParams, schema: StructType, trainParams: TrainParams) { diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/TaskTrainingMethods.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/TaskTrainingMethods.scala similarity index 90% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/TaskTrainingMethods.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/TaskTrainingMethods.scala index 1c1944a399..bd09b4d784 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/TaskTrainingMethods.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/TaskTrainingMethods.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm +package com.microsoft.azure.synapse.ml.lightgbm -import com.microsoft.ml.spark.lightgbm.dataset.BaseAggregatedColumns -import com.microsoft.ml.spark.lightgbm.params.TrainParams +import com.microsoft.azure.synapse.ml.lightgbm.dataset.BaseAggregatedColumns +import com.microsoft.azure.synapse.ml.lightgbm.params.TrainParams import org.apache.spark.broadcast.Broadcast import org.apache.spark.sql.Row import org.slf4j.Logger diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/TrainUtils.scala similarity index 97% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/TrainUtils.scala index 16f554c916..a27beae5fd 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/TrainUtils.scala @@ -1,21 +1,21 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm - -import java.io._ -import java.net._ +package com.microsoft.azure.synapse.ml.lightgbm +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities._ +import com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils +import com.microsoft.azure.synapse.ml.lightgbm.booster.LightGBMBooster +import com.microsoft.azure.synapse.ml.lightgbm.dataset.LightGBMDataset +import com.microsoft.azure.synapse.ml.lightgbm.params.{ClassifierTrainParams, TrainParams} import com.microsoft.ml.lightgbm._ -import com.microsoft.ml.spark.core.env.StreamUtilities._ -import com.microsoft.ml.spark.core.utils.FaultToleranceUtils -import com.microsoft.ml.spark.lightgbm.booster.LightGBMBooster -import com.microsoft.ml.spark.lightgbm.dataset.LightGBMDataset -import com.microsoft.ml.spark.lightgbm.params.{ClassifierTrainParams, TrainParams} -import org.apache.spark.{BarrierTaskContext, TaskContext} import org.apache.spark.sql.types.StructType +import org.apache.spark.{BarrierTaskContext, TaskContext} import org.slf4j.Logger +import java.io._ +import java.net._ + case class NetworkParams(defaultListenPort: Int, addr: String, port: Int, barrierExecutionMode: Boolean) case class ColumnParams(labelColumn: String, featuresColumn: String, weightColumn: Option[String], initScoreColumn: Option[String], groupColumn: Option[String]) diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/booster/LightGBMBooster.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/booster/LightGBMBooster.scala similarity index 98% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/booster/LightGBMBooster.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/booster/LightGBMBooster.scala index c90dc0fdcf..c19f7c9020 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/booster/LightGBMBooster.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/booster/LightGBMBooster.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.booster +package com.microsoft.azure.synapse.ml.lightgbm.booster +import com.microsoft.azure.synapse.ml.lightgbm.dataset.LightGBMDataset +import com.microsoft.azure.synapse.ml.lightgbm.swig.SwigUtils +import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMConstants, LightGBMUtils} import com.microsoft.ml.lightgbm._ -import com.microsoft.ml.spark.lightgbm.{LightGBMConstants, LightGBMUtils} -import com.microsoft.ml.spark.lightgbm.dataset.LightGBMDataset -import com.microsoft.ml.spark.lightgbm.swig.SwigUtils import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector} import org.apache.spark.sql.{SaveMode, SparkSession} diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/DatasetAggregator.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/dataset/DatasetAggregator.scala similarity index 98% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/DatasetAggregator.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/dataset/DatasetAggregator.scala index 151ce98e36..33ba5cda66 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/DatasetAggregator.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/dataset/DatasetAggregator.scala @@ -1,19 +1,18 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.dataset +package com.microsoft.azure.synapse.ml.lightgbm.dataset +import com.microsoft.azure.synapse.ml.lightgbm.dataset.DatasetUtils.getRowAsDoubleArray +import com.microsoft.azure.synapse.ml.lightgbm.swig._ +import com.microsoft.azure.synapse.ml.lightgbm.{ColumnParams, LightGBMUtils} import com.microsoft.ml.lightgbm.{SWIGTYPE_p_int, lightgbmlib, lightgbmlibConstants} - -import java.util.concurrent.atomic.AtomicLong -import com.microsoft.ml.spark.lightgbm.{ColumnParams, LightGBMUtils} -import com.microsoft.ml.spark.lightgbm.dataset.DatasetUtils.getRowAsDoubleArray -import com.microsoft.ml.spark.lightgbm.swig._ import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import org.apache.spark.ml.linalg.{DenseVector, SparseVector} import org.apache.spark.sql.Row import org.apache.spark.sql.types.StructType +import java.util.concurrent.atomic.AtomicLong import scala.collection.mutable.ListBuffer private[lightgbm] object ChunkedArrayUtils { diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/DatasetUtils.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/dataset/DatasetUtils.scala similarity index 95% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/DatasetUtils.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/dataset/DatasetUtils.scala index 4fe55bb411..a6664fb88d 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/DatasetUtils.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/dataset/DatasetUtils.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.dataset +package com.microsoft.azure.synapse.ml.lightgbm.dataset +import com.microsoft.azure.synapse.ml.lightgbm.ColumnParams +import com.microsoft.azure.synapse.ml.lightgbm.swig.DoubleChunkedArray import com.microsoft.ml.lightgbm.{doubleChunkedArray, floatChunkedArray} -import com.microsoft.ml.spark.lightgbm.ColumnParams -import com.microsoft.ml.spark.lightgbm.swig.DoubleChunkedArray import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import org.apache.spark.ml.linalg.{DenseVector, SparseVector} import org.apache.spark.sql.Row diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/LightGBMDataset.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/dataset/LightGBMDataset.scala similarity index 97% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/LightGBMDataset.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/dataset/LightGBMDataset.scala index 0c513cfd23..2d7ba5c99d 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/LightGBMDataset.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/dataset/LightGBMDataset.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.dataset +package com.microsoft.azure.synapse.ml.lightgbm.dataset +import com.microsoft.azure.synapse.ml.lightgbm.LightGBMUtils import com.microsoft.lightgbm.SwigPtrWrapper import com.microsoft.ml.lightgbm._ -import com.microsoft.ml.spark.lightgbm.LightGBMUtils -import com.microsoft.ml.spark.lightgbm.dataset.DatasetUtils.countCardinality +import DatasetUtils.countCardinality import scala.reflect.ClassTag diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjParam.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/FObjParam.scala similarity index 82% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjParam.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/FObjParam.scala index ff166a9d38..79a77bdfe5 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjParam.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/FObjParam.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.params +package com.microsoft.azure.synapse.ml.lightgbm.params -import com.microsoft.ml.spark.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam import org.apache.spark.ml.param.Params /** Param for FObjTrait. Needed as spark has explicit params for many different diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjTrait.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/FObjTrait.scala similarity index 81% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjTrait.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/FObjTrait.scala index dbc1304e8f..006fdf551e 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjTrait.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/FObjTrait.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.params +package com.microsoft.azure.synapse.ml.lightgbm.params -import com.microsoft.ml.spark.lightgbm.dataset.LightGBMDataset +import com.microsoft.azure.synapse.ml.lightgbm.dataset.LightGBMDataset trait FObjTrait extends Serializable { /** diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMBoosterParam.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMBoosterParam.scala similarity index 75% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMBoosterParam.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMBoosterParam.scala index 50afdec45b..484d962c67 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMBoosterParam.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMBoosterParam.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.params +package com.microsoft.azure.synapse.ml.lightgbm.params -import com.microsoft.ml.spark.core.serialize.ComplexParam -import com.microsoft.ml.spark.lightgbm.booster.LightGBMBooster +import com.microsoft.azure.synapse.ml.core.serialize.ComplexParam +import com.microsoft.azure.synapse.ml.lightgbm.booster.LightGBMBooster import org.apache.spark.ml.param.Params /** Custom ComplexParam for LightGBMBooster, to make it settable on the LightGBM models. diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMParams.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMParams.scala similarity index 97% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMParams.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMParams.scala index 6a938e6ad2..b7c27f8821 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMParams.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMParams.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.params +package com.microsoft.azure.synapse.ml.lightgbm.params -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInitScoreCol, HasValidationIndicatorCol, HasWeightCol} -import com.microsoft.ml.spark.lightgbm.booster.LightGBMBooster -import com.microsoft.ml.spark.lightgbm.{LightGBMConstants, LightGBMDelegate} +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInitScoreCol, HasValidationIndicatorCol, HasWeightCol} +import com.microsoft.azure.synapse.ml.lightgbm.booster.LightGBMBooster +import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMConstants, LightGBMDelegate} import org.apache.spark.ml.param._ import org.apache.spark.ml.util.DefaultParamsWritable @@ -82,7 +82,7 @@ trait LightGBMExecutionParams extends Wrappable { val numTasks = new IntParam(this, "numTasks", "Advanced parameter to specify the number of tasks. " + - "MMLSpark tries to guess this based on cluster configuration, but this parameter can be used to override.") + "SynapseML tries to guess this based on cluster configuration, but this parameter can be used to override.") setDefault(numTasks -> 0) def getNumTasks: Int = $(numTasks) diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/TrainParams.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/TrainParams.scala similarity index 98% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/TrainParams.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/TrainParams.scala index 6bb55ffc59..fc0bf270c1 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/TrainParams.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/TrainParams.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.params +package com.microsoft.azure.synapse.ml.lightgbm.params -import com.microsoft.ml.spark.lightgbm.{LightGBMConstants, LightGBMDelegate} +import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMConstants, LightGBMDelegate} /** Defines the common Booster parameters passed to the LightGBM learners. */ diff --git a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/swig/SwigUtils.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/swig/SwigUtils.scala similarity index 98% rename from lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/swig/SwigUtils.scala rename to lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/swig/SwigUtils.scala index d84992c6eb..d28ca62c06 100644 --- a/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/swig/SwigUtils.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/swig/SwigUtils.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.swig +package com.microsoft.azure.synapse.ml.lightgbm.swig import com.microsoft.ml.lightgbm.{SWIGTYPE_p_double, SWIGTYPE_p_float, SWIGTYPE_p_int, doubleChunkedArray, floatChunkedArray, int32ChunkedArray, lightgbmlib} diff --git a/lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split1/VerifyLightGBMClassifier.scala b/lightgbm/src/test/scala/com/microsoft/azure/synapse/ml/lightgbm/split1/VerifyLightGBMClassifier.scala similarity index 98% rename from lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split1/VerifyLightGBMClassifier.scala rename to lightgbm/src/test/scala/com/microsoft/azure/synapse/ml/lightgbm/split1/VerifyLightGBMClassifier.scala index 88f015efd3..8c52c73951 100644 --- a/lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split1/VerifyLightGBMClassifier.scala +++ b/lightgbm/src/test/scala/com/microsoft/azure/synapse/ml/lightgbm/split1/VerifyLightGBMClassifier.scala @@ -1,19 +1,16 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.split1 - -import java.io.File -import java.nio.file.{Files, Path, Paths} - -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.benchmarks.{Benchmarks, DatasetUtils} -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} -import com.microsoft.ml.spark.featurize.ValueIndexer -import com.microsoft.ml.spark.lightgbm._ -import com.microsoft.ml.spark.lightgbm.dataset.LightGBMDataset -import com.microsoft.ml.spark.lightgbm.params.{FObjTrait, TrainParams} -import com.microsoft.ml.spark.stages.MultiColumnAdapter +package com.microsoft.azure.synapse.ml.lightgbm.split1 + +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.benchmarks.{Benchmarks, DatasetUtils} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.featurize.ValueIndexer +import com.microsoft.azure.synapse.ml.lightgbm.dataset.LightGBMDataset +import com.microsoft.azure.synapse.ml.lightgbm.params.{FObjTrait, TrainParams} +import com.microsoft.azure.synapse.ml.lightgbm._ +import com.microsoft.azure.synapse.ml.stages.MultiColumnAdapter import org.apache.commons.io.FileUtils import org.apache.spark.TaskContext import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, MulticlassClassificationEvaluator} @@ -22,11 +19,13 @@ import org.apache.spark.ml.linalg.{DenseVector, Vector} import org.apache.spark.ml.tuning.{ParamGridBuilder, TrainValidationSplit} import org.apache.spark.ml.util.MLReadable import org.apache.spark.ml.{Estimator, Model} -import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{DataFrame, Row} import org.slf4j.Logger +import java.io.File +import java.nio.file.{Files, Path, Paths} import scala.math.exp @SerialVersionUID(100L) diff --git a/lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRanker.scala b/lightgbm/src/test/scala/com/microsoft/azure/synapse/ml/lightgbm/split2/VerifyLightGBMRanker.scala similarity index 85% rename from lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRanker.scala rename to lightgbm/src/test/scala/com/microsoft/azure/synapse/ml/lightgbm/split2/VerifyLightGBMRanker.scala index a1dcd76db8..e01b39e587 100644 --- a/lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRanker.scala +++ b/lightgbm/src/test/scala/com/microsoft/azure/synapse/ml/lightgbm/split2/VerifyLightGBMRanker.scala @@ -1,21 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.split2 - -import com.microsoft.ml.spark.core.test.benchmarks.{Benchmarks, DatasetUtils} -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} -import com.microsoft.ml.spark.lightgbm.dataset.{DatasetUtils => CardinalityUtils} -import com.microsoft.ml.spark.lightgbm.split1.LightGBMTestUtils -import com.microsoft.ml.spark.lightgbm.{LightGBMRanker, LightGBMRankerModel, LightGBMUtils} -import org.apache.spark.SparkException +package com.microsoft.azure.synapse.ml.lightgbm.split2 + +import com.microsoft.azure.synapse.ml.core.test.benchmarks.{Benchmarks, DatasetUtils} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.lightgbm.dataset.DatasetUtils.countCardinality +import com.microsoft.azure.synapse.ml.lightgbm.split1.LightGBMTestUtils +import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMRanker, LightGBMRankerModel, LightGBMUtils} import org.apache.spark.ml.feature.VectorAssembler import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions.{col, monotonically_increasing_id, _} import org.apache.spark.sql.types.StructType -import org.scalatest.Matchers._ //scalastyle:off magic.number /** Tests to validate the functionality of LightGBM Ranker module. */ @@ -125,15 +123,13 @@ class VerifyLightGBMRanker extends Benchmarks with EstimatorFuzzing[LightGBMRank } test("verify cardinality counts: int") { - val counts = CardinalityUtils.countCardinality(Seq(1, 1, 2, 2, 2, 3)) - - counts shouldBe Seq(2, 3, 1) + val counts = countCardinality(Seq(1, 1, 2, 2, 2, 3)) + assert(counts === Seq(2, 3, 1)) } test("verify cardinality counts: string") { - val counts = CardinalityUtils.countCardinality(Seq("a", "a", "b", "b", "b", "c")) - - counts shouldBe Seq(2, 3, 1) + val counts = countCardinality(Seq("a", "a", "b", "b", "b", "c")) + assert(counts === Seq(2, 3, 1)) } override def testObjects(): Seq[TestObject[LightGBMRanker]] = { diff --git a/lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRegressor.scala b/lightgbm/src/test/scala/com/microsoft/azure/synapse/ml/lightgbm/split2/VerifyLightGBMRegressor.scala similarity index 94% rename from lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRegressor.scala rename to lightgbm/src/test/scala/com/microsoft/azure/synapse/ml/lightgbm/split2/VerifyLightGBMRegressor.scala index a3865e9fad..cbca56fc8a 100644 --- a/lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRegressor.scala +++ b/lightgbm/src/test/scala/com/microsoft/azure/synapse/ml/lightgbm/split2/VerifyLightGBMRegressor.scala @@ -1,21 +1,20 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.lightgbm.split2 - -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.benchmarks.{Benchmarks, DatasetUtils} -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} -import com.microsoft.ml.spark.lightgbm.split1.LightGBMTestUtils -import com.microsoft.ml.spark.lightgbm.{LightGBMRegressionModel, LightGBMRegressor, LightGBMUtils} -import com.microsoft.ml.spark.stages.MultiColumnAdapter +package com.microsoft.azure.synapse.ml.lightgbm.split2 + +import com.microsoft.azure.synapse.ml.core.test.benchmarks.{Benchmarks, DatasetUtils} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.lightgbm.split1.LightGBMTestUtils +import com.microsoft.azure.synapse.ml.lightgbm.{LightGBMRegressionModel, LightGBMRegressor, LightGBMUtils} +import com.microsoft.azure.synapse.ml.stages.MultiColumnAdapter import org.apache.spark.ml.evaluation.RegressionEvaluator import org.apache.spark.ml.feature.StringIndexer import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.tuning.{CrossValidator, ParamGridBuilder, TrainValidationSplit} import org.apache.spark.ml.util.MLReadable -import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions.{avg, col, lit, when} +import org.apache.spark.sql.{DataFrame, Row} // scalastyle:off magic.number diff --git a/notebooks/AzureSearchIndex - Met Artworks.ipynb b/notebooks/AzureSearchIndex - Met Artworks.ipynb index 70304755ae..f43739bb87 100644 --- a/notebooks/AzureSearchIndex - Met Artworks.ipynb +++ b/notebooks/AzureSearchIndex - Met Artworks.ipynb @@ -10,7 +10,7 @@ { "cell_type": "markdown", "source": [ - "In this example, we show how you can enrich data using Cognitive Skills and write to an Azure Search Index using MMLSpark. We use a subset of The MET's open-access collection and enrich it by passing it through 'Describe Image' and a custom 'Image Similarity' skill. The results are then written to a searchable index." + "In this example, we show how you can enrich data using Cognitive Skills and write to an Azure Search Index using SynapseML. We use a subset of The MET's open-access collection and enrich it by passing it through 'Describe Image' and a custom 'Image Similarity' skill. The results are then written to a searchable index." ], "metadata": {} }, @@ -85,8 +85,8 @@ "cell_type": "code", "execution_count": 7, "source": [ - "from mmlspark.cognitive import AnalyzeImage\r\n", - "from mmlspark.stages import SelectColumns\r\n", + "from synapse.ml.cognitive import AnalyzeImage\r\n", + "from synapse.ml.stages import SelectColumns\r\n", "\r\n", "#define pipeline\r\n", "describeImage = (AnalyzeImage()\r\n", @@ -124,7 +124,7 @@ "cell_type": "code", "execution_count": 10, "source": [ - "from mmlspark.cognitive import *\r\n", + "from synapse.ml.cognitive import *\r\n", "df2.writeToAzureSearch(\r\n", " subscriptionKey=AZURE_SEARCH_KEY,\r\n", " actionCol=\"searchAction\",\r\n", diff --git a/notebooks/Classification - Adult Census with Vowpal Wabbit.ipynb b/notebooks/Classification - Adult Census with Vowpal Wabbit.ipynb index 7a8641a772..a2a76eb8cf 100644 --- a/notebooks/Classification - Adult Census with Vowpal Wabbit.ipynb +++ b/notebooks/Classification - Adult Census with Vowpal Wabbit.ipynb @@ -4,10 +4,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Classification - Adult Census using Vowpal Wabbit in MMLSpark\n", + "# Classification - Adult Census using Vowpal Wabbit in SynapseML\n", "\n", - "In this example, we predict incomes from the *Adult Census* dataset using Vowpal Wabbit (VW) classifier in MMLSpark.\n", - "First, we read the data and split it into train and test sets as in this [example](https://github.com/Azure/mmlspark/blob/master/notebooks/Classification%20-%20Adult%20Census.ipynb\n", + "In this example, we predict incomes from the *Adult Census* dataset using Vowpal Wabbit (VW) classifier in SynapseML.\n", + "First, we read the data and split it into train and test sets as in this [example](https://github.com/Microsoft/SynapseML/blob/master/notebooks/Classification%20-%20Adult%20Census.ipynb\n", ")." ] }, @@ -51,7 +51,7 @@ "source": [ "from pyspark.sql.functions import when, col\n", "from pyspark.ml import Pipeline\n", - "from mmlspark.vw import VowpalWabbitFeaturizer, VowpalWabbitClassifier\n", + "from synapse.ml.vw import VowpalWabbitFeaturizer, VowpalWabbitClassifier\n", "\n", "# Define classification label\n", "train = train.withColumn(\"label\", when(col(\"income\").contains(\"<\"), 0.0).otherwise(1.0)).repartition(1).cache()\n", @@ -121,7 +121,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.train import ComputeModelStatistics\n", "metrics = ComputeModelStatistics(evaluationMetric=\"classification\", \n", " labelCol=\"label\", \n", " scoredLabelsCol=\"prediction\").transform(prediction)\n", diff --git a/notebooks/Classification - Adult Census.ipynb b/notebooks/Classification - Adult Census.ipynb index 6a271e1835..b1ce8b6149 100644 --- a/notebooks/Classification - Adult Census.ipynb +++ b/notebooks/Classification - Adult Census.ipynb @@ -7,7 +7,7 @@ "\n", "In this example, we try to predict incomes from the *Adult Census* dataset.\n", "\n", - "First, we import the packages (use `help(mmlspark)` to view contents)," + "First, we import the packages (use `help(synapse)` to view contents)," ], "metadata": {} }, @@ -56,7 +56,7 @@ "cell_type": "markdown", "source": [ "`TrainClassifier` can be used to initialize and fit a model, it wraps SparkML classifiers.\n", - "You can use `help(mmlspark.train.TrainClassifier)` to view the different parameters.\n", + "You can use `help(synapse.ml.train.TrainClassifier)` to view the different parameters.\n", "\n", "Note that it implicitly converts the data into the format expected by the algorithm: tokenize\n", "and hash strings, one-hot encodes categorical variables, assembles the features into a vector\n", @@ -68,7 +68,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.train import TrainClassifier\r\n", + "from synapse.ml.train import TrainClassifier\r\n", "from pyspark.ml.classification import LogisticRegression\r\n", "model = TrainClassifier(model=LogisticRegression(), labelCol=\"income\", numFeatures=256).fit(train)" ], diff --git a/notebooks/Classification - Before and After MMLSpark.ipynb b/notebooks/Classification - Before and After MMLSpark.ipynb index bf0430fc5a..9a0fa1bbda 100644 --- a/notebooks/Classification - Before and After MMLSpark.ipynb +++ b/notebooks/Classification - Before and After MMLSpark.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Classification - Before and After MMLSpark\n", + "## Classification - Before and After SynapseML\n", "\n", "### 1. Introduction\n", "\n", @@ -12,7 +12,7 @@ "\n", "In this tutorial, we perform the same classification task in two\n", "different ways: once using plain **`pyspark`** and once using the\n", - "**`mmlspark`** library. The two methods yield the same performance,\n", + "**`synapseml`** library. The two methods yield the same performance,\n", "but one of the two libraries is drastically simpler to use and iterate\n", "on (can you guess which one?).\n", "\n", @@ -90,7 +90,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.stages import UDFTransformer\n", + "from synapse.ml.stages import UDFTransformer\n", "wordLength = \"wordLength\"\n", "wordCount = \"wordCount\"\n", "wordLengthTransformer = UDFTransformer(inputCol=\"text\", outputCol=wordLength, udf=wordLengthUDF)\n", @@ -214,9 +214,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4b. Classify using mmlspark\n", + "### 4b. Classify using synapseml\n", "\n", - "Life is a lot simpler when using `mmlspark`!\n", + "Life is a lot simpler when using `synapseml`!\n", "\n", "1. The **`TrainClassifier`** Estimator featurizes the data internally,\n", " as long as the columns selected in the `train`, `test`, `validation`\n", @@ -237,8 +237,8 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import TrainClassifier, ComputeModelStatistics\n", - "from mmlspark.automl import FindBestModel\n", + "from synapse.ml.train import TrainClassifier, ComputeModelStatistics\n", + "from synapse.ml.automl import FindBestModel\n", "\n", "# Prepare data for learning\n", "train, test, validation = data.randomSplit([0.60, 0.20, 0.20], seed=123)\n", diff --git a/notebooks/Classification - Twitter Sentiment with Vowpal Wabbit.ipynb b/notebooks/Classification - Twitter Sentiment with Vowpal Wabbit.ipynb index c7be7427b3..b3bd002a74 100644 --- a/notebooks/Classification - Twitter Sentiment with Vowpal Wabbit.ipynb +++ b/notebooks/Classification - Twitter Sentiment with Vowpal Wabbit.ipynb @@ -4,9 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Twitter Sentiment Classification using Vowpal Wabbit in MMLSpark\n", + "# Twitter Sentiment Classification using Vowpal Wabbit in SynapseML\n", "\n", - "In this example, we show how to build a sentiment classification model using Vowpal Wabbit (VW) in MMLSpark. The data set we use to train and evaluate the model is [Sentiment140](http://help.sentiment140.com/for-students/?source=post_page---------------------------) twitter data. First, we import a few packages that we need." + "In this example, we show how to build a sentiment classification model using Vowpal Wabbit (VW) in SynapseML. The data set we use to train and evaluate the model is [Sentiment140](http://help.sentiment140.com/for-students/?source=post_page---------------------------) twitter data. First, we import a few packages that we need." ] }, { @@ -26,8 +26,8 @@ "from pyspark.sql.types import StructType, StructField, DoubleType, StringType\n", "from pyspark.ml import Pipeline\n", "from pyspark.ml.feature import CountVectorizer, RegexTokenizer\n", - "from mmlspark.vw import VowpalWabbitClassifier\n", - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.vw import VowpalWabbitClassifier\n", + "from synapse.ml.train import ComputeModelStatistics\n", "from pyspark.mllib.evaluation import BinaryClassificationMetrics\n", "import matplotlib.pyplot as plt" ] @@ -165,7 +165,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## VW MMLSpark Training\n", + "## VW SynapseML Training\n", "\n", "Now we are ready to define a pipeline which consists of feture engineering steps and the VW model." ] @@ -336,7 +336,7 @@ "pygments_lexer": "ipython3", "version": "3.6.8" }, - "name": "vw_mmlspark_sentiment_classification2", + "name": "vw_synapseml_sentiment_classification2", "notebookId": 2916790739696591 }, "nbformat": 4, diff --git a/notebooks/CognitiveServices - Celebrity Quote Analysis.ipynb b/notebooks/CognitiveServices - Celebrity Quote Analysis.ipynb index 81e0d75cd1..cdd55529e5 100644 --- a/notebooks/CognitiveServices - Celebrity Quote Analysis.ipynb +++ b/notebooks/CognitiveServices - Celebrity Quote Analysis.ipynb @@ -22,7 +22,7 @@ }, "outputs": [], "source": [ - "from mmlspark.cognitive import *\n", + "from synapse.ml.cognitive import *\n", "from pyspark.ml import PipelineModel\n", "from pyspark.sql.functions import col, udf\n", "from pyspark.ml.feature import SQLTransformer\n", @@ -123,7 +123,7 @@ }, "outputs": [], "source": [ - "from mmlspark.stages import UDFTransformer \n", + "from synapse.ml.stages import UDFTransformer \n", "\n", "recognizeText = RecognizeText()\\\n", " .setSubscriptionKey(VISION_API_KEY)\\\n", @@ -185,7 +185,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.stages import SelectColumns\n", + "from synapse.ml.stages import SelectColumns\n", "# Select the final coulmns\n", "cleanupColumns = SelectColumns().setCols([\"url\", \"firstCeleb\", \"text\", \"sentimentLabel\"])\n", "\n", diff --git a/notebooks/CognitiveServices - Overview.ipynb b/notebooks/CognitiveServices - Overview.ipynb index 92e2e08ba5..4da554df03 100644 --- a/notebooks/CognitiveServices - Overview.ipynb +++ b/notebooks/CognitiveServices - Overview.ipynb @@ -30,60 +30,60 @@ "\n", "### Vision\n", "[**Computer Vision**](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/)\n", - "- Describe: provides description of an image in human readable language ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DescribeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.DescribeImage))\n", - "- Analyze (color, image type, face, adult/racy content): analyzes visual features of an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.AnalyzeImage))\n", - "- OCR: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/OCR.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.OCR))\n", - "- Recognize Text: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/RecognizeText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.RecognizeText))\n", - "- Thumbnail: generates a thumbnail of user-specified size from the image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/GenerateThumbnails.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.GenerateThumbnails))\n", - "- Recognize domain-specific content: recognizes domain-specific content (celebrity, landmark) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/RecognizeDomainSpecificContent.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.RecognizeDomainSpecificContent))\n", - "- Tag: identifies list of words that are relevant to the in0put image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/TagImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.TagImage))\n", + "- Describe: provides description of an image in human readable language ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DescribeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.DescribeImage))\n", + "- Analyze (color, image type, face, adult/racy content): analyzes visual features of an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.AnalyzeImage))\n", + "- OCR: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/OCR.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.OCR))\n", + "- Recognize Text: reads text from an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/RecognizeText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.RecognizeText))\n", + "- Thumbnail: generates a thumbnail of user-specified size from the image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/GenerateThumbnails.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.GenerateThumbnails))\n", + "- Recognize domain-specific content: recognizes domain-specific content (celebrity, landmark) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/RecognizeDomainSpecificContent.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.RecognizeDomainSpecificContent))\n", + "- Tag: identifies list of words that are relevant to the in0put image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/TagImage.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.TagImage))\n", "\n", "[**Face**](https://azure.microsoft.com/en-us/services/cognitive-services/face/)\n", - "- Detect: detects human faces in an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.DetectFace))\n", - "- Verify: verifies whether two faces belong to a same person, or a face belongs to a person ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/VerifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.VerifyFaces))\n", - "- Identify: finds the closest matches of the specific query person face from a person group ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/IdentifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.IdentifyFaces))\n", - "- Find similar: finds similar faces to the query face in a face list ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/FindSimilarFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.FindSimilarFace))\n", - "- Group: divides a group of faces into disjoint groups based on similarity ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/GroupFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.GroupFaces))\n", + "- Detect: detects human faces in an image ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.DetectFace))\n", + "- Verify: verifies whether two faces belong to a same person, or a face belongs to a person ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/VerifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.VerifyFaces))\n", + "- Identify: finds the closest matches of the specific query person face from a person group ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/IdentifyFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.IdentifyFaces))\n", + "- Find similar: finds similar faces to the query face in a face list ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/FindSimilarFace.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.FindSimilarFace))\n", + "- Group: divides a group of faces into disjoint groups based on similarity ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/GroupFaces.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.GroupFaces))\n", "\n", "### Speech\n", "[**Speech Services**](https://azure.microsoft.com/en-us/services/cognitive-services/speech-services/)\n", - "- Speech-to-text: transcribes audio streams ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/SpeechToText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.SpeechToText))\n", + "- Speech-to-text: transcribes audio streams ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/SpeechToText.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.SpeechToText))\n", "\n", "### Language\n", "[**Text Analytics**](https://azure.microsoft.com/en-us/services/cognitive-services/text-analytics/)\n", - "- Language detection: detects language of the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/LanguageDetector.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.LanguageDetector))\n", - "- Key phrase extraction: identifies the key talking points in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/KeyPhraseExtractor.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.KeyPhraseExtractor))\n", - "- Named entity recognition: identifies known entities and general named entities in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/NER.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.NER))\n", - "- Sentiment analysis: returns a score betwee 0 and 1 indicating the sentiment in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/TextSentiment.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.TextSentiment))\n", + "- Language detection: detects language of the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/LanguageDetector.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.LanguageDetector))\n", + "- Key phrase extraction: identifies the key talking points in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/KeyPhraseExtractor.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.KeyPhraseExtractor))\n", + "- Named entity recognition: identifies known entities and general named entities in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/NER.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.NER))\n", + "- Sentiment analysis: returns a score betwee 0 and 1 indicating the sentiment in the input text ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/TextSentiment.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.TextSentiment))\n", "\n", "[**Translator**](https://azure.microsoft.com/en-us/services/cognitive-services/translator/)\n", - "- Translate: Translates text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/Translate.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.Translate))\n", - "- Transliterate: Converts text in one language from one script to another script. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/Transliterate.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.Transliterate))\n", - "- Detect: Identifies the language of a piece of text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/Detect.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.Detect))\n", - "- BreakSentence: Identifies the positioning of sentence boundaries in a piece of text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/BreakSentence.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.BreakSentence))\n", - "- Dictionary Lookup: Provides alternative translations for a word and a small number of idiomatic phrases. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DictionaryLookup.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.DictionaryLookup))\n", - "- Dictionary Examples: Provides examples that show how terms in the dictionary are used in context. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DictionaryExamples.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.DictionaryExamples))\n", - "- Document Translation: Translates documents across all supported languages and dialects while preserving document structure and data format. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DocumentTranslator.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.DocumentTranslator))\n", + "- Translate: Translates text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/Translate.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.Translate))\n", + "- Transliterate: Converts text in one language from one script to another script. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/Transliterate.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.Transliterate))\n", + "- Detect: Identifies the language of a piece of text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/Detect.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.Detect))\n", + "- BreakSentence: Identifies the positioning of sentence boundaries in a piece of text. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/BreakSentence.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.BreakSentence))\n", + "- Dictionary Lookup: Provides alternative translations for a word and a small number of idiomatic phrases. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DictionaryLookup.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.DictionaryLookup))\n", + "- Dictionary Examples: Provides examples that show how terms in the dictionary are used in context. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DictionaryExamples.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.DictionaryExamples))\n", + "- Document Translation: Translates documents across all supported languages and dialects while preserving document structure and data format. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DocumentTranslator.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.DocumentTranslator))\n", "\n", "### Azure Form Recognizer\n", "[**Form Recognizer**](https://azure.microsoft.com/en-us/services/form-recognizer/)\n", - "- Analyze Layout: Extract text and layout information from a given document. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeLayout.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.AnalyzeLayout))\n", - "- Analyze Receipts: Detects and extracts data from receipts using optical character recognition (OCR) and our receipt model, enabling you to easily extract structured data from receipts such as merchant name, merchant phone number, transaction date, transaction total, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeReceipts.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.AnalyzeReceipts))\n", - "- Analyze Business Cards: Detects and extracts data from business cards using optical character recognition (OCR) and our business card model, enabling you to easily extract structured data from business cards such as contact names, company names, phone numbers, emails, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeBusinessCards.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.AnalyzeBusinessCards))\n", - "- Analyze Invoices: Detects and extracts data from invoices using optical character recognition (OCR) and our invoice understanding deep learning models, enabling you to easily extract structured data from invoices such as customer, vendor, invoice ID, invoice due date, total, invoice amount due, tax amount, ship to, bill to, line items and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeInvoices.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.AnalyzeInvoices))\n", - "- Analyze ID Documents: Detects and extracts data from identification documents using optical character recognition (OCR) and our ID document model, enabling you to easily extract structured data from ID documents such as first name, last name, date of birth, document number, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeIDDocuments.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.AnalyzeIDDocuments))\n", - "- Analyze Custom Form: Extracts information from forms (PDFs and images) into structured data based on a model created from a set of representative training forms. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeCustomModel.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.AnalyzeCustomModel))\n", + "- Analyze Layout: Extract text and layout information from a given document. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeLayout.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.AnalyzeLayout))\n", + "- Analyze Receipts: Detects and extracts data from receipts using optical character recognition (OCR) and our receipt model, enabling you to easily extract structured data from receipts such as merchant name, merchant phone number, transaction date, transaction total, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeReceipts.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.AnalyzeReceipts))\n", + "- Analyze Business Cards: Detects and extracts data from business cards using optical character recognition (OCR) and our business card model, enabling you to easily extract structured data from business cards such as contact names, company names, phone numbers, emails, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeBusinessCards.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.AnalyzeBusinessCards))\n", + "- Analyze Invoices: Detects and extracts data from invoices using optical character recognition (OCR) and our invoice understanding deep learning models, enabling you to easily extract structured data from invoices such as customer, vendor, invoice ID, invoice due date, total, invoice amount due, tax amount, ship to, bill to, line items and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeInvoices.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.AnalyzeInvoices))\n", + "- Analyze ID Documents: Detects and extracts data from identification documents using optical character recognition (OCR) and our ID document model, enabling you to easily extract structured data from ID documents such as first name, last name, date of birth, document number, and more. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeIDDocuments.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.AnalyzeIDDocuments))\n", + "- Analyze Custom Form: Extracts information from forms (PDFs and images) into structured data based on a model created from a set of representative training forms. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/AnalyzeCustomModel.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.AnalyzeCustomModel))\n", "- Get Custom Model: Get detailed information about a custom model. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/GetCustomModel.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/ListCustomModels.html))\n", - "- List Custom Models: Get information about all custom models. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/ListCustomModels.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.ListCustomModels))\n", + "- List Custom Models: Get information about all custom models. ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/ListCustomModels.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.ListCustomModels))\n", "\n", "### Decision\n", "[**Anomaly Detector**](https://azure.microsoft.com/en-us/services/cognitive-services/anomaly-detector/)\n", - "- Anomaly status of latest point: generates a model using preceding points and determines whether the latest point is anomalous ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectLastAnomaly.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.DetectLastAnomaly))\n", - "- Find anomalies: generates a model using an entire series and finds anomalies in the series ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectAnomalies.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.DetectAnomalies))\n", + "- Anomaly status of latest point: generates a model using preceding points and determines whether the latest point is anomalous ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectLastAnomaly.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.DetectLastAnomaly))\n", + "- Find anomalies: generates a model using an entire series and finds anomalies in the series ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/DetectAnomalies.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.DetectAnomalies))\n", "\n", "### Search\n", - "- [Bing Image search](https://azure.microsoft.com/en-us/services/cognitive-services/bing-image-search-api/) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/BingImageSearch.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/mmlspark.cognitive.html#module-mmlspark.cognitive.BingImageSearch))\n", - "- [Azure Cognitive search](https://docs.microsoft.com/en-us/azure/search/search-what-is-azure-search) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/index.html#com.microsoft.ml.spark.cognitive.AzureSearchWriter$), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/index.html#com.microsoft.ml.spark.cognitive.AzureSearchWriter$))\n" + "- [Bing Image search](https://azure.microsoft.com/en-us/services/cognitive-services/bing-image-search-api/) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/com/microsoft/ml/spark/cognitive/BingImageSearch.html), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/pyspark/synapse.ml.cognitive.html#module-synapse.ml.cognitive.BingImageSearch))\n", + "- [Azure Cognitive search](https://docs.microsoft.com/en-us/azure/search/search-what-is-azure-search) ([Scala](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/index.html#com.microsoft.azure.synapse.ml.cognitive.AzureSearchWriter$), [Python](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc4/scala/index.html#com.microsoft.azure.synapse.ml.cognitive.AzureSearchWriter$))\n" ], "metadata": {} }, @@ -92,7 +92,7 @@ "source": [ "## Prerequisites\n", "\n", - "1. Follow the steps in [Getting started](https://docs.microsoft.com/en-us/azure/cognitive-services/big-data/getting-started) to set up your Azure Databricks and Cognitive Services environment. This tutorial shows you how to install MMLSpark and how to create your Spark cluster in Databricks.\n", + "1. Follow the steps in [Getting started](https://docs.microsoft.com/en-us/azure/cognitive-services/big-data/getting-started) to set up your Azure Databricks and Cognitive Services environment. This tutorial shows you how to install SynapseML and how to create your Spark cluster in Databricks.\n", "1. After you create a new notebook in Azure Databricks, copy the **Shared code** below and paste into a new cell in your notebook.\n", "1. Choose a service sample, below, and copy paste it into a second new cell in your notebook.\n", "1. Replace any of the service subscription key placeholders with your own key.\n", @@ -115,7 +115,7 @@ "execution_count": null, "source": [ "from pyspark.sql.functions import udf, col\r\n", - "from mmlspark.io.http import HTTPTransformer, http_udf\r\n", + "from synapse.ml.io.http import HTTPTransformer, http_udf\r\n", "from requests import Request\r\n", "from pyspark.sql.functions import lit\r\n", "from pyspark.ml import PipelineModel\r\n", @@ -150,7 +150,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.cognitive import *\r\n", + "from synapse.ml.cognitive import *\r\n", "\r\n", "# A general Cognitive Services key for Text Analytics, Computer Vision and Form Recognizer (or use separate keys that belong to each service)\r\n", "service_key = os.environ[\"COGNITIVE_SERVICE_KEY\"]\r\n", @@ -480,7 +480,7 @@ "source": [ "## Azure Cognitive search sample\n", "\n", - "In this example, we show how you can enrich data using Cognitive Skills and write to an Azure Search Index using MMLSpark." + "In this example, we show how you can enrich data using Cognitive Skills and write to an Azure Search Index using SynapseML." ], "metadata": {} }, diff --git a/notebooks/CognitiveServices - Predictive Maintenance.ipynb b/notebooks/CognitiveServices - Predictive Maintenance.ipynb index 1c7651c0fb..66790b90aa 100644 --- a/notebooks/CognitiveServices - Predictive Maintenance.ipynb +++ b/notebooks/CognitiveServices - Predictive Maintenance.ipynb @@ -152,7 +152,7 @@ { "cell_type": "code", "source": [ - "from pyspark.sql.functions import col, struct\nfrom mmlspark.cognitive import SimpleDetectAnomalies\nfrom mmlspark.core.spark import FluentAPI\n\ndetector = (SimpleDetectAnomalies()\n .setSubscriptionKey(service_key)\n .setLocation(location)\n .setOutputCol(\"anomalies\")\n .setGroupbyCol(\"grouping\")\n .setSensitivity(95)\n .setGranularity(\"secondly\"))\n\ndf_anomaly = (df_signals\n .where(col(\"unitSymbol\") == 'RPM')\n .withColumn(\"timestamp\", col(\"dateTime\").cast(\"string\"))\n .withColumn(\"value\", col(\"measureValue\").cast(\"double\"))\n .withColumn(\"grouping\", struct(\"deviceId\"))\n .mlTransform(detector)).cache()\n\ndf_anomaly.createOrReplaceTempView('df_anomaly')" + "from pyspark.sql.functions import col, struct\nfrom synapse.ml.cognitive import SimpleDetectAnomalies\nfrom synapse.ml.core.spark import FluentAPI\n\ndetector = (SimpleDetectAnomalies()\n .setSubscriptionKey(service_key)\n .setLocation(location)\n .setOutputCol(\"anomalies\")\n .setGroupbyCol(\"grouping\")\n .setSensitivity(95)\n .setGranularity(\"secondly\"))\n\ndf_anomaly = (df_signals\n .where(col(\"unitSymbol\") == 'RPM')\n .withColumn(\"timestamp\", col(\"dateTime\").cast(\"string\"))\n .withColumn(\"value\", col(\"measureValue\").cast(\"double\"))\n .withColumn(\"grouping\", struct(\"deviceId\"))\n .mlTransform(detector)).cache()\n\ndf_anomaly.createOrReplaceTempView('df_anomaly')" ], "metadata": { "application/vnd.databricks.v1+cell": { diff --git a/notebooks/ConditionalKNN - Exploring Art Across Cultures.ipynb b/notebooks/ConditionalKNN - Exploring Art Across Cultures.ipynb index 9f8480cb69..3386deba53 100644 --- a/notebooks/ConditionalKNN - Exploring Art Across Cultures.ipynb +++ b/notebooks/ConditionalKNN - Exploring Art Across Cultures.ipynb @@ -34,7 +34,7 @@ "from pyspark.sql.types import *\n", "from pyspark.ml.feature import Normalizer\n", "from pyspark.sql.functions import lit, array, array_contains, udf, col, struct\n", - "from mmlspark.nn import ConditionalKNN, ConditionalKNNModel\n", + "from synapse.ml.nn import ConditionalKNN, ConditionalKNNModel\n", "from PIL import Image\n", "from io import BytesIO\n", "\n", diff --git a/notebooks/CyberML - Anomalous Access Detection.ipynb b/notebooks/CyberML - Anomalous Access Detection.ipynb index 2c3996f9ee..b7d97dcf3f 100644 --- a/notebooks/CyberML - Anomalous Access Detection.ipynb +++ b/notebooks/CyberML - Anomalous Access Detection.ipynb @@ -34,7 +34,7 @@ "# Create an Azure Databricks cluster and install the following libs\n", "\n", "1. In Cluster Libraries install from library source Maven:\n", - "Coordinates: com.microsoft.ml.spark:mmlspark:1.0.0-rc4\n", + "Coordinates: com.microsoft.azure:synapseml:1.0.0-rc4\n", "Repository: https://mmlspark.azureedge.net/maven\n", "\n", "2. In Cluster Libraries install from PyPI the library called plotly" @@ -54,10 +54,10 @@ "outputs": [], "source": [ "# this is used to produce the synthetic dataset for this test\n", - "from mmlspark.cyber.dataset import DataFactory\n", + "from synapse.ml.cyber.dataset import DataFactory\n", "\n", "# the access anomalies model generator\n", - "from mmlspark.cyber.anomaly.collaborative_filtering import AccessAnomaly\n", + "from synapse.ml.cyber.anomaly.collaborative_filtering import AccessAnomaly\n", "\n", "from pyspark.sql import functions as f, types as t" ] diff --git a/notebooks/DeepLearning - BiLSTM Medical Entity Extraction.ipynb b/notebooks/DeepLearning - BiLSTM Medical Entity Extraction.ipynb index e095c33421..bfaed7e954 100644 --- a/notebooks/DeepLearning - BiLSTM Medical Entity Extraction.ipynb +++ b/notebooks/DeepLearning - BiLSTM Medical Entity Extraction.ipynb @@ -6,7 +6,7 @@ "source": [ "## DeepLearning - BiLSTM Medical Entity Extraction\n", "\n", - "In this tutorial we use a Bidirectional LSTM entity extractor from the MMLSPark\n", + "In this tutorial we use a Bidirectional LSTM entity extractor from the synapseml\n", "model downloader to extract entities from PubMed medical abstracts\n", "\n", "Our goal is to identify useful entities in a block of free-form text. This is a\n", @@ -28,8 +28,8 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.cntk import CNTKModel\n", - "from mmlspark.downloader import ModelDownloader\n", + "from synapse.ml.cntk import CNTKModel\n", + "from synapse.ml.downloader import ModelDownloader\n", "from pyspark.sql.functions import udf, col\n", "from pyspark.sql.types import IntegerType, ArrayType, FloatType, StringType\n", "from pyspark.sql import Row\n", diff --git a/notebooks/DeepLearning - CIFAR10 Convolutional Network.ipynb b/notebooks/DeepLearning - CIFAR10 Convolutional Network.ipynb index 01226b35b5..e50c52d5cb 100644 --- a/notebooks/DeepLearning - CIFAR10 Convolutional Network.ipynb +++ b/notebooks/DeepLearning - CIFAR10 Convolutional Network.ipynb @@ -13,8 +13,8 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.cntk import CNTKModel\n", - "from mmlspark.downloader import ModelDownloader\n", + "from synapse.ml.cntk import CNTKModel\n", + "from synapse.ml.downloader import ModelDownloader\n", "from pyspark.sql.functions import udf\n", "from pyspark.sql.types import IntegerType\n", "from os.path import abspath" diff --git a/notebooks/DeepLearning - Flower Image Classification.ipynb b/notebooks/DeepLearning - Flower Image Classification.ipynb index b76f914837..e050192f63 100644 --- a/notebooks/DeepLearning - Flower Image Classification.ipynb +++ b/notebooks/DeepLearning - Flower Image Classification.ipynb @@ -8,7 +8,7 @@ "source": [ "from pyspark.ml import Transformer, Estimator, Pipeline\n", "from pyspark.ml.classification import LogisticRegression\n", - "from mmlspark.downloader import ModelDownloader\n", + "from synapse.ml.downloader import ModelDownloader\n", "import os, sys, time" ] }, @@ -50,10 +50,10 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.opencv import ImageTransformer\n", - "from mmlspark.image import UnrollImage\n", - "from mmlspark.cntk import ImageFeaturizer\n", - "from mmlspark.stages import *\n", + "from synapse.ml.opencv import ImageTransformer\n", + "from synapse.ml.image import UnrollImage\n", + "from synapse.ml.cntk import ImageFeaturizer\n", + "from synapse.ml.stages import *\n", "\n", "# Make some featurizers\n", "it = ImageTransformer()\\\n", diff --git a/notebooks/DeepLearning - Transfer Learning.ipynb b/notebooks/DeepLearning - Transfer Learning.ipynb index d47dde8318..ef7b204f54 100644 --- a/notebooks/DeepLearning - Transfer Learning.ipynb +++ b/notebooks/DeepLearning - Transfer Learning.ipynb @@ -23,8 +23,8 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.cntk import CNTKModel\n", - "from mmlspark.downloader import ModelDownloader\n", + "from synapse.ml.cntk import CNTKModel\n", + "from synapse.ml.downloader import ModelDownloader\n", "import numpy as np, os, urllib, tarfile, pickle, array\n", "from os.path import abspath\n", "from pyspark.sql.functions import col, udf\n", @@ -100,7 +100,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import TrainClassifier\n", + "from synapse.ml.train import TrainClassifier\n", "from pyspark.ml.classification import RandomForestClassifier\n", "\n", "train,test = featurizedImages.randomSplit([0.75,0.25])\n", @@ -121,7 +121,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.train import ComputeModelStatistics\n", "predictions = model.transform(test)\n", "metrics = ComputeModelStatistics(evaluationMetric=\"accuracy\").transform(predictions)\n", "metrics.show()" diff --git a/notebooks/HttpOnSpark - Working with Arbitrary Web APIs.ipynb b/notebooks/HttpOnSpark - Working with Arbitrary Web APIs.ipynb index 5a40d43b15..c3825ee9e2 100644 --- a/notebooks/HttpOnSpark - Working with Arbitrary Web APIs.ipynb +++ b/notebooks/HttpOnSpark - Working with Arbitrary Web APIs.ipynb @@ -36,7 +36,7 @@ "\n", "from pyspark.sql.functions import struct\n", "from pyspark.sql.types import *\n", - "from mmlspark.io.http import *\n", + "from synapse.ml.io.http import *\n", "\n", "df = spark.createDataFrame([(\"foo\",) for x in range(20)], [\"data\"]) \\\n", " .withColumn(\"inputs\", struct(\"data\"))\n", diff --git a/notebooks/HyperParameterTuning - Fighting Breast Cancer.ipynb b/notebooks/HyperParameterTuning - Fighting Breast Cancer.ipynb index 2832e54178..075b777b51 100644 --- a/notebooks/HyperParameterTuning - Fighting Breast Cancer.ipynb +++ b/notebooks/HyperParameterTuning - Fighting Breast Cancer.ipynb @@ -6,7 +6,7 @@ "source": [ "## HyperParameterTuning - Fighting Breast Cancer\n", "\n", - "We can do distributed randomized grid search hyperparameter tuning with MMLSpark.\n", + "We can do distributed randomized grid search hyperparameter tuning with SynapseML.\n", "\n", "First, we import the packages" ] @@ -51,8 +51,8 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.automl import TuneHyperparameters\n", - "from mmlspark.train import TrainClassifier\n", + "from synapse.ml.automl import TuneHyperparameters\n", + "from synapse.ml.train import TrainClassifier\n", "from pyspark.ml.classification import LogisticRegression, RandomForestClassifier, GBTClassifier\n", "logReg = LogisticRegression()\n", "randForest = RandomForestClassifier()\n", @@ -76,7 +76,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.automl import *\n", + "from synapse.ml.automl import *\n", "\n", "paramBuilder = \\\n", " HyperparamBuilder() \\\n", @@ -140,7 +140,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.train import ComputeModelStatistics\n", "prediction = bestModel.transform(test)\n", "metrics = ComputeModelStatistics().transform(prediction)\n", "metrics.limit(10).toPandas()" diff --git a/notebooks/Interpretability - Image Explainers.ipynb b/notebooks/Interpretability - Image Explainers.ipynb index ced39cc113..aac9781e22 100644 --- a/notebooks/Interpretability - Image Explainers.ipynb +++ b/notebooks/Interpretability - Image Explainers.ipynb @@ -15,10 +15,10 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.explainers import *\r\n", - "from mmlspark.onnx import ONNXModel\r\n", - "from mmlspark.opencv import ImageTransformer\r\n", - "from mmlspark.io import *\r\n", + "from synapse.ml.explainers import *\r\n", + "from synapse.ml.onnx import ONNXModel\r\n", + "from synapse.ml.opencv import ImageTransformer\r\n", + "from synapse.ml.io import *\r\n", "from pyspark.ml import Pipeline\r\n", "from pyspark.ml.classification import LogisticRegression\r\n", "from pyspark.ml.feature import StringIndexer\r\n", @@ -74,7 +74,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.io import *\r\n", + "from synapse.ml.io import *\r\n", "\r\n", "image_df = spark.read.image().load(\"wasbs://publicwasb@mmlspark.blob.core.windows.net/explainers/images/david-lusvardi-dWcUncxocQY-unsplash.jpg\")\r\n", "display(image_df)\r\n", diff --git a/notebooks/Interpretability - Tabular SHAP explainer.ipynb b/notebooks/Interpretability - Tabular SHAP explainer.ipynb index 598684608c..8bde934744 100644 --- a/notebooks/Interpretability - Tabular SHAP explainer.ipynb +++ b/notebooks/Interpretability - Tabular SHAP explainer.ipynb @@ -32,7 +32,7 @@ "outputs": [], "source": [ "import pyspark\n", - "from mmlspark.explainers import *\n", + "from synapse.ml.explainers import *\n", "from pyspark.ml import Pipeline\n", "from pyspark.ml.classification import LogisticRegression\n", "from pyspark.ml.feature import StringIndexer, OneHotEncoder, VectorAssembler\n", diff --git a/notebooks/Interpretability - Text Explainers.ipynb b/notebooks/Interpretability - Text Explainers.ipynb index a3acc5f24e..281ace06f3 100644 --- a/notebooks/Interpretability - Text Explainers.ipynb +++ b/notebooks/Interpretability - Text Explainers.ipynb @@ -36,8 +36,8 @@ "from pyspark.ml.feature import StopWordsRemover, HashingTF, IDF, Tokenizer\n", "from pyspark.ml import Pipeline\n", "from pyspark.ml.classification import LogisticRegression\n", - "from mmlspark.explainers import *\n", - "from mmlspark.featurize.text import TextFeaturizer\n", + "from synapse.ml.explainers import *\n", + "from synapse.ml.featurize.text import TextFeaturizer\n", "\n", "vec2array = udf(lambda vec: vec.toArray().tolist(), ArrayType(FloatType()))\n", "vec_access = udf(lambda v, i: float(v[i]), FloatType())" @@ -143,7 +143,7 @@ "outputs": [], "source": [ "def plotConfusionMatrix(df, label, prediction, classLabels):\n", - " from mmlspark.plot import confusionMatrix\n", + " from synapse.ml.plot import confusionMatrix\n", " import matplotlib.pyplot as plt\n", "\n", " fig = plt.figure(figsize=(4.5, 4.5))\n", diff --git a/notebooks/LightGBM - Overview.ipynb b/notebooks/LightGBM - Overview.ipynb index 68bfc175bc..b10b1b7ea0 100644 --- a/notebooks/LightGBM - Overview.ipynb +++ b/notebooks/LightGBM - Overview.ipynb @@ -184,7 +184,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.lightgbm import LightGBMClassifier\r\n", + "from synapse.ml.lightgbm import LightGBMClassifier\r\n", "model = LightGBMClassifier(objective=\"binary\", featuresCol=\"features\", labelCol=\"Bankrupt?\", isUnbalance=True)" ], "outputs": [], @@ -210,7 +210,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.lightgbm import LightGBMClassificationModel\r\n", + "from synapse.ml.lightgbm import LightGBMClassificationModel\r\n", "\r\n", "if os.environ.get(\"AZURE_SERVICE\", None) == \"Microsoft.ProjectArcadia\":\r\n", " model.saveNativeModel(\"/models/lgbmclassifier.model\")\r\n", @@ -279,7 +279,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.train import ComputeModelStatistics\n", "metrics = ComputeModelStatistics(evaluationMetric=\"classification\", labelCol='Bankrupt?', scoredLabelsCol='prediction').transform(predictions)\n", "display(metrics)" ], @@ -354,7 +354,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.lightgbm import LightGBMRegressor\n", + "from synapse.ml.lightgbm import LightGBMRegressor\n", "model = LightGBMRegressor(objective='quantile',\n", " alpha=0.2,\n", " learningRate=0.3,\n", @@ -393,7 +393,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.train import ComputeModelStatistics\n", "metrics = ComputeModelStatistics(evaluationMetric='regression',\n", " labelCol='label',\n", " scoresCol='prediction') \\\n", @@ -442,7 +442,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.lightgbm import LightGBMRanker\n", + "from synapse.ml.lightgbm import LightGBMRanker\n", "\n", "features_col = 'features'\n", "query_col = 'query'\n", diff --git a/notebooks/ModelInterpretation - Snow Leopard Detection.ipynb b/notebooks/ModelInterpretation - Snow Leopard Detection.ipynb index 5c9fb57245..6d4052748c 100644 --- a/notebooks/ModelInterpretation - Snow Leopard Detection.ipynb +++ b/notebooks/ModelInterpretation - Snow Leopard Detection.ipynb @@ -36,8 +36,8 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.cognitive import *\n", - "from mmlspark.core.spark import FluentAPI\n", + "from synapse.ml.cognitive import *\n", + "from synapse.ml.core.spark import FluentAPI\n", "from pyspark.sql.functions import lit\n", "\n", "def bingPhotoSearch(name, queries, pages):\n", @@ -199,9 +199,9 @@ "from pyspark.ml.feature import StringIndexer\r\n", "from pyspark.ml.classification import LogisticRegression\r\n", "from pyspark.sql.functions import udf\r\n", - "from mmlspark.downloader import ModelDownloader\r\n", - "from mmlspark.cntk import ImageFeaturizer\r\n", - "from mmlspark.stages import UDFTransformer\r\n", + "from synapse.ml.downloader import ModelDownloader\r\n", + "from synapse.ml.cntk import ImageFeaturizer\r\n", + "from synapse.ml.stages import UDFTransformer\r\n", "from pyspark.sql.types import *\r\n", "\r\n", "def getIndex(row):\r\n", @@ -239,7 +239,7 @@ "execution_count": null, "source": [ "def plotConfusionMatrix(df, label, prediction, classLabels):\r\n", - " from mmlspark.plot import confusionMatrix\r\n", + " from synapse.ml.plot import confusionMatrix\r\n", " import matplotlib.pyplot as plt\r\n", " fig = plt.figure(figsize=(4.5, 4.5))\r\n", " confusionMatrix(df, label, prediction, classLabels)\r\n", @@ -258,7 +258,7 @@ "execution_count": null, "source": [ "import urllib.request\r\n", - "from mmlspark.lime import ImageLIME\r\n", + "from synapse.ml.lime import ImageLIME\r\n", "\r\n", "test_image_url = \"https://mmlspark.blob.core.windows.net/graphics/SnowLeopardAD/snow_leopard1.jpg\"\r\n", "with urllib.request.urlopen(test_image_url) as url:\r\n", diff --git a/notebooks/ONNX - Inference on Spark.ipynb b/notebooks/ONNX - Inference on Spark.ipynb index b15266c45c..20d08f4467 100644 --- a/notebooks/ONNX - Inference on Spark.ipynb +++ b/notebooks/ONNX - Inference on Spark.ipynb @@ -47,7 +47,7 @@ "execution_count": null, "source": [ "from pyspark.ml.feature import VectorAssembler\r\n", - "from mmlspark.lightgbm import LightGBMClassifier\r\n", + "from synapse.ml.lightgbm import LightGBMClassifier\r\n", "\r\n", "feature_cols = df.columns[1:]\r\n", "featurizer = VectorAssembler(\r\n", @@ -119,7 +119,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.onnx import ONNXModel\r\n", + "from synapse.ml.onnx import ONNXModel\r\n", "\r\n", "onnx_ml = ONNXModel().setModelPayload(model_payload_ml)\r\n", "\r\n", diff --git a/notebooks/OpenCV - Pipeline Image Transformations.ipynb b/notebooks/OpenCV - Pipeline Image Transformations.ipynb index e6b4cda376..34adfcbdc9 100644 --- a/notebooks/OpenCV - Pipeline Image Transformations.ipynb +++ b/notebooks/OpenCV - Pipeline Image Transformations.ipynb @@ -31,10 +31,10 @@ " from pyspark.sql import SparkSession\n", " spark = SparkSession.builder.getOrCreate()\n", "\n", - "import mmlspark\n", + "import synapse.ml\n", "import numpy as np\n", - "from mmlspark.opencv import toNDArray\n", - "from mmlspark.io import *\n", + "from synapse.ml.opencv import toNDArray\n", + "from synapse.ml.io import *\n", "\n", "imageDir = \"wasbs://publicwasb@mmlspark.blob.core.windows.net/sampleImages\"\n", "images = spark.read.image().load(imageDir).cache()\n", @@ -147,7 +147,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.opencv import ImageTransformer\n", + "from synapse.ml.opencv import ImageTransformer\n", "\n", "tr = (ImageTransformer() # images are resized and then cropped\n", " .setOutputCol(\"transformed\")\n", @@ -165,7 +165,7 @@ "metadata": {}, "source": [ "For the advanced image manipulations, use Spark UDFs.\n", - "The MMLSpark package provides conversion function between *Spark Row* and\n", + "The SynapseML package provides conversion function between *Spark Row* and\n", "*ndarray* image representations." ] }, @@ -176,7 +176,7 @@ "outputs": [], "source": [ "from pyspark.sql.functions import udf\n", - "from mmlspark.opencv import ImageSchema, toNDArray, toImage\n", + "from synapse.ml.opencv import ImageSchema, toNDArray, toImage\n", "\n", "def u(row):\n", " array = toNDArray(row) # convert Image to numpy ndarray[height, width, 3]\n", @@ -204,7 +204,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.image import UnrollImage\n", + "from synapse.ml.image import UnrollImage\n", "\n", "unroller = UnrollImage().setInputCol(\"noblue\").setOutputCol(\"unrolled\")\n", "\n", diff --git a/notebooks/Regression - Auto Imports.ipynb b/notebooks/Regression - Auto Imports.ipynb index 271a17c751..6ce43ff19e 100644 --- a/notebooks/Regression - Auto Imports.ipynb +++ b/notebooks/Regression - Auto Imports.ipynb @@ -13,7 +13,7 @@ "model to predict the automobile's price. The process includes training, testing,\n", "and evaluating the model on the Automobile Imports data set.\n", "\n", - "This sample demonstrates the use of several members of the mmlspark library:\n", + "This sample demonstrates the use of several members of the synapseml library:\n", "- [`TrainRegressor`\n", " ](http://mmlspark.azureedge.net/docs/pyspark/TrainRegressor.html)\n", "- [`SummarizeData`\n", @@ -93,7 +93,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.stages import SummarizeData\n", + "from synapse.ml.stages import SummarizeData\n", "summary = SummarizeData().transform(data)\n", "summary.toPandas()" ] @@ -138,7 +138,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.featurize import CleanMissingData\n", + "from synapse.ml.featurize import CleanMissingData\n", "cols = [\"normalized-losses\", \"stroke\", \"bore\", \"horsepower\",\n", " \"peak-rpm\", \"price\"]\n", "cleanModel = CleanMissingData().setCleaningMode(\"Median\") \\\n", @@ -191,7 +191,7 @@ "# train Poisson Regression Model\n", "from pyspark.ml.regression import GeneralizedLinearRegression\n", "from pyspark.ml import Pipeline\n", - "from mmlspark.train import TrainRegressor\n", + "from synapse.ml.train import TrainRegressor\n", "\n", "glr = GeneralizedLinearRegression(family=\"poisson\", link=\"log\")\n", "poissonModel = TrainRegressor().setModel(glr).setLabelCol(\"price\").setNumFeatures(256)\n", @@ -244,7 +244,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.train import ComputeModelStatistics\n", "poissonMetrics = ComputeModelStatistics().transform(poissonPrediction)\n", "print(\"Poisson Metrics\")\n", "poissonMetrics.toPandas()" @@ -274,7 +274,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputePerInstanceStatistics\n", + "from synapse.ml.train import ComputePerInstanceStatistics\n", "def demonstrateEvalPerInstance(pred):\n", " return ComputePerInstanceStatistics().transform(pred) \\\n", " .select(\"price\", \"Scores\", \"L1_loss\", \"L2_loss\") \\\n", diff --git a/notebooks/Regression - Flight Delays with DataCleaning.ipynb b/notebooks/Regression - Flight Delays with DataCleaning.ipynb index c4340228fc..5eb03604ac 100644 --- a/notebooks/Regression - Flight Delays with DataCleaning.ipynb +++ b/notebooks/Regression - Flight Delays with DataCleaning.ipynb @@ -104,7 +104,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.featurize import DataConversion\n", + "from synapse.ml.featurize import DataConversion\n", "flightDelay = DataConversion(cols=[\"Quarter\",\"Month\",\"DayofMonth\",\"DayOfWeek\",\n", " \"OriginAirportID\",\"DestAirportID\",\n", " \"CRSDepTime\",\"CRSArrTime\"],\n", @@ -156,7 +156,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import TrainRegressor, TrainedRegressorModel\n", + "from synapse.ml.train import TrainRegressor, TrainedRegressorModel\n", "from pyspark.ml.regression import LinearRegression\n", "\n", "trainCat = DataConversion(cols=[\"Carrier\",\"DepTimeBlk\",\"ArrTimeBlk\"],\n", @@ -200,7 +200,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.train import ComputeModelStatistics\n", "metrics = ComputeModelStatistics().transform(scoredData)\n", "metrics.toPandas()" ] @@ -219,7 +219,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputePerInstanceStatistics\n", + "from synapse.ml.train import ComputePerInstanceStatistics\n", "evalPerInstance = ComputePerInstanceStatistics().transform(scoredData)\n", "evalPerInstance.select(\"ArrDelay\", \"Scores\", \"L1_loss\", \"L2_loss\") \\\n", " .limit(10).toPandas()" diff --git a/notebooks/Regression - Flight Delays.ipynb b/notebooks/Regression - Flight Delays.ipynb index 590915e7cc..74e307c703 100644 --- a/notebooks/Regression - Flight Delays.ipynb +++ b/notebooks/Regression - Flight Delays.ipynb @@ -33,7 +33,7 @@ "source": [ "import numpy as np\n", "import pandas as pd\n", - "import mmlspark" + "import synapse.ml" ] }, { @@ -86,7 +86,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import TrainRegressor, TrainedRegressorModel\n", + "from synapse.ml.train import TrainRegressor, TrainedRegressorModel\n", "from pyspark.ml.regression import LinearRegression\n", "from pyspark.ml.feature import StringIndexer\n", "# Convert columns to categorical\n", @@ -139,7 +139,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.train import ComputeModelStatistics\n", "metrics = ComputeModelStatistics().transform(scoredData)\n", "metrics.toPandas()" ] @@ -158,7 +158,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputePerInstanceStatistics\n", + "from synapse.ml.train import ComputePerInstanceStatistics\n", "evalPerInstance = ComputePerInstanceStatistics().transform(scoredData)\n", "evalPerInstance.select(\"ArrDelay\", \"Scores\", \"L1_loss\", \"L2_loss\").limit(10).toPandas()" ] diff --git a/notebooks/Regression - Vowpal Wabbit vs. LightGBM vs. Linear Regressor.ipynb b/notebooks/Regression - Vowpal Wabbit vs. LightGBM vs. Linear Regressor.ipynb index 51a71519cf..3db12e7222 100644 --- a/notebooks/Regression - Vowpal Wabbit vs. LightGBM vs. Linear Regressor.ipynb +++ b/notebooks/Regression - Vowpal Wabbit vs. LightGBM vs. Linear Regressor.ipynb @@ -8,7 +8,7 @@ "\n", "This notebook shows how to build simple regression models by using \n", "[Vowpal Wabbit (VW)](https://github.com/VowpalWabbit/vowpal_wabbit) and \n", - "[LightGBM](https://github.com/microsoft/LightGBM) with MMLSpark.\n", + "[LightGBM](https://github.com/microsoft/LightGBM) with SynapseML.\n", " We also compare the results with \n", " [Spark MLlib Linear Regression](https://spark.apache.org/docs/latest/ml-classification-regression.html#linear-regression)." ] @@ -32,9 +32,9 @@ "outputs": [], "source": [ "import math\n", - "from mmlspark.train import ComputeModelStatistics\n", - "from mmlspark.vw import VowpalWabbitRegressor, VowpalWabbitFeaturizer\n", - "from mmlspark.lightgbm import LightGBMRegressor\n", + "from synapse.ml.train import ComputeModelStatistics\n", + "from synapse.ml.vw import VowpalWabbitRegressor, VowpalWabbitFeaturizer\n", + "from synapse.ml.lightgbm import LightGBMRegressor\n", "import numpy as np\n", "import pandas as pd\n", "from pyspark.ml.feature import VectorAssembler\n", @@ -392,7 +392,7 @@ "pygments_lexer": "ipython3", "version": "3.6.8" }, - "name": "mmlspark example - regression", + "name": "synapseml example - regression", "notebookId": 1395284431467721, "pycharm": { "stem_cell": { diff --git a/notebooks/SparkServing - Deploying a Classifier.ipynb b/notebooks/SparkServing - Deploying a Classifier.ipynb index 854ae260a0..1fb23adf6e 100644 --- a/notebooks/SparkServing - Deploying a Classifier.ipynb +++ b/notebooks/SparkServing - Deploying a Classifier.ipynb @@ -73,7 +73,7 @@ }, "outputs": [], "source": [ - "from mmlspark.train import TrainClassifier\n", + "from synapse.ml.train import TrainClassifier\n", "from pyspark.ml.classification import LogisticRegression\n", "model = TrainClassifier(model=LogisticRegression(), labelCol=\"income\", numFeatures=256).fit(train)" ] @@ -91,7 +91,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputeModelStatistics, TrainedClassifierModel\n", + "from synapse.ml.train import ComputeModelStatistics, TrainedClassifierModel\n", "prediction = model.transform(test)\n", "prediction.printSchema()" ] @@ -111,7 +111,7 @@ "metadata": {}, "source": [ "First, we will define the webservice input/output.\n", - "For more information, you can visit the [documentation for Spark Serving](https://github.com/Azure/mmlspark/blob/master/docs/mmlspark-serving.md)" + "For more information, you can visit the [documentation for Spark Serving](https://github.com/Microsoft/SynapseML/blob/master/docs/mmlspark-serving.md)" ] }, { @@ -121,7 +121,7 @@ "outputs": [], "source": [ "from pyspark.sql.types import *\n", - "from mmlspark.io import *\n", + "from synapse.ml.io import *\n", "import uuid\n", "\n", "serving_inputs = spark.readStream.server() \\\n", diff --git a/notebooks/TextAnalytics - Amazon Book Reviews with Word2Vec.ipynb b/notebooks/TextAnalytics - Amazon Book Reviews with Word2Vec.ipynb index 9cd06cdd91..d65eb4823d 100644 --- a/notebooks/TextAnalytics - Amazon Book Reviews with Word2Vec.ipynb +++ b/notebooks/TextAnalytics - Amazon Book Reviews with Word2Vec.ipynb @@ -129,7 +129,7 @@ "execution_count": null, "source": [ "from pyspark.ml.classification import LogisticRegression, RandomForestClassifier, GBTClassifier\r\n", - "from mmlspark.train import TrainClassifier\r\n", + "from synapse.ml.train import TrainClassifier\r\n", "import itertools\r\n", "\r\n", "lrHyperParams = [0.05, 0.2]\r\n", @@ -166,7 +166,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.automl import FindBestModel\r\n", + "from synapse.ml.automl import FindBestModel\r\n", "bestModel = FindBestModel(evaluationMetric=\"AUC\", models=trainedModels).fit(ptest)\r\n", "bestModel.getRocCurve().show()\r\n", "bestModel.getBestModelMetrics().show()\r\n", @@ -186,7 +186,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.train import ComputeModelStatistics\r\n", + "from synapse.ml.train import ComputeModelStatistics\r\n", "predictions = bestModel.transform(pvalidation)\r\n", "metrics = ComputeModelStatistics().transform(predictions)\r\n", "print(\"Best model's accuracy on validation set = \"\r\n", diff --git a/notebooks/TextAnalytics - Amazon Book Reviews.ipynb b/notebooks/TextAnalytics - Amazon Book Reviews.ipynb index e700ea2beb..033fed0bb3 100644 --- a/notebooks/TextAnalytics - Amazon Book Reviews.ipynb +++ b/notebooks/TextAnalytics - Amazon Book Reviews.ipynb @@ -54,7 +54,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.featurize.text import TextFeaturizer\r\n", + "from synapse.ml.featurize.text import TextFeaturizer\r\n", "textFeaturizer = TextFeaturizer() \\\r\n", " .setInputCol(\"text\").setOutputCol(\"features\") \\\r\n", " .setUseStopWordsRemover(True).setUseIDF(True).setMinDocFreq(5).setNumFeatures(1 << 16).fit(data)" @@ -108,7 +108,7 @@ "lrHyperParams = [0.05, 0.1, 0.2, 0.4]\r\n", "logisticRegressions = [LogisticRegression(regParam = hyperParam) for hyperParam in lrHyperParams]\r\n", "\r\n", - "from mmlspark.train import TrainClassifier\r\n", + "from synapse.ml.train import TrainClassifier\r\n", "lrmodels = [TrainClassifier(model=lrm, labelCol=\"label\").fit(train) for lrm in logisticRegressions]" ], "outputs": [], @@ -125,7 +125,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.automl import FindBestModel, BestModel\r\n", + "from synapse.ml.automl import FindBestModel, BestModel\r\n", "bestModel = FindBestModel(evaluationMetric=\"AUC\", models=lrmodels).fit(test)\r\n", "bestModel.getRocCurve().show()\r\n", "bestModel.getBestModelMetrics().show()\r\n", @@ -145,7 +145,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "from mmlspark.train import ComputeModelStatistics\r\n", + "from synapse.ml.train import ComputeModelStatistics\r\n", "predictions = bestModel.transform(validation)\r\n", "metrics = ComputeModelStatistics().transform(predictions)\r\n", "print(\"Best model's accuracy on validation set = \"\r\n", diff --git a/notebooks/Vowpal Wabbit - Overview.ipynb b/notebooks/Vowpal Wabbit - Overview.ipynb index 0934c36ebc..dab08ad9b9 100644 --- a/notebooks/Vowpal Wabbit - Overview.ipynb +++ b/notebooks/Vowpal Wabbit - Overview.ipynb @@ -149,7 +149,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.vw import VowpalWabbitFeaturizer\n", + "from synapse.ml.vw import VowpalWabbitFeaturizer\n", "featurizer = VowpalWabbitFeaturizer(inputCols=df.columns[:-1], outputCol=\"features\")\n", "train_data = featurizer.transform(train)[\"target\", \"features\"]\n", "test_data = featurizer.transform(test)[\"target\", \"features\"]" @@ -177,7 +177,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.vw import VowpalWabbitClassifier\n", + "from synapse.ml.vw import VowpalWabbitClassifier\n", "model = VowpalWabbitClassifier(numPasses=20, labelCol=\"target\", featuresCol=\"features\").fit(train_data)" ] }, @@ -204,7 +204,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.train import ComputeModelStatistics\n", "metrics = ComputeModelStatistics(evaluationMetric='classification', labelCol='target', scoredLabelsCol='prediction').transform(predictions)\n", "display(metrics)" ] @@ -213,7 +213,7 @@ "source": [ "## Adult Census with VowpalWabbitClassifier\n", "\n", - "In this example, we predict incomes from the Adult Census dataset using Vowpal Wabbit (VW) Classifier in MMLSpark." + "In this example, we predict incomes from the Adult Census dataset using Vowpal Wabbit (VW) Classifier in SynapseML." ], "cell_type": "markdown", "metadata": {} @@ -256,7 +256,7 @@ "source": [ "from pyspark.sql.functions import when, col\n", "from pyspark.ml import Pipeline\n", - "from mmlspark.vw import VowpalWabbitFeaturizer, VowpalWabbitClassifier\n", + "from synapse.ml.vw import VowpalWabbitFeaturizer, VowpalWabbitClassifier\n", "\n", "# Define classification label\n", "train = train.withColumn(\"label\", when(col(\"income\").contains(\"<\"), 0.0).otherwise(1.0)).repartition(1)\n", @@ -334,7 +334,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.train import ComputeModelStatistics\n", "metrics = ComputeModelStatistics(evaluationMetric=\"classification\", \n", " labelCol=\"label\", \n", " scoredLabelsCol=\"prediction\").transform(prediction)\n", @@ -372,8 +372,8 @@ "from matplotlib.colors import ListedColormap, Normalize\n", "from matplotlib.cm import get_cmap\n", "import matplotlib.pyplot as plt\n", - "from mmlspark.train import ComputeModelStatistics\n", - "from mmlspark.vw import VowpalWabbitRegressor, VowpalWabbitFeaturizer\n", + "from synapse.ml.train import ComputeModelStatistics\n", + "from synapse.ml.vw import VowpalWabbitRegressor, VowpalWabbitFeaturizer\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.datasets import load_boston" @@ -628,7 +628,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.vw import VowpalWabbitRegressor\n", + "from synapse.ml.vw import VowpalWabbitRegressor\n", "model = (VowpalWabbitRegressor(numPasses=20, args=\"--holdout_off --loss_function quantile -q :: -l 0.1\")\n", " .fit(train))" ] @@ -656,7 +656,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.train import ComputeModelStatistics\n", + "from synapse.ml.train import ComputeModelStatistics\n", "metrics = ComputeModelStatistics(evaluationMetric='regression',\n", " labelCol='label',\n", " scoresCol='prediction') \\\n", @@ -733,7 +733,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mmlspark.vw import VowpalWabbitFeaturizer, VowpalWabbitContextualBandit, VectorZipper\n", + "from synapse.ml.vw import VowpalWabbitFeaturizer, VowpalWabbitContextualBandit, VectorZipper\n", "from pyspark.ml import Pipeline\n", "pipeline = Pipeline(stages=[\n", " VowpalWabbitFeaturizer(inputCols=['GUser_id'], outputCol='GUser_id_feature'),\n", diff --git a/opencv/src/main/python/mmlspark/opencv/ImageTransformer.py b/opencv/src/main/python/synapse/ml/opencv/ImageTransformer.py similarity index 98% rename from opencv/src/main/python/mmlspark/opencv/ImageTransformer.py rename to opencv/src/main/python/synapse/ml/opencv/ImageTransformer.py index 1ef0a210b6..6f50bd825e 100644 --- a/opencv/src/main/python/mmlspark/opencv/ImageTransformer.py +++ b/opencv/src/main/python/synapse/ml/opencv/ImageTransformer.py @@ -13,7 +13,7 @@ from pyspark.sql.types import * from pyspark.sql.types import Row, _create_row import numpy as np -from mmlspark.opencv._ImageTransformer import _ImageTransformer +from synapse.ml.opencv._ImageTransformer import _ImageTransformer ImageFields = ["origin", "height", "width", "nChannels", "mode", "data"] diff --git a/opencv/src/main/python/mmlspark/opencv/__init__.py b/opencv/src/main/python/synapse/ml/opencv/__init__.py similarity index 100% rename from opencv/src/main/python/mmlspark/opencv/__init__.py rename to opencv/src/main/python/synapse/ml/opencv/__init__.py diff --git a/opencv/src/main/scala/com/microsoft/ml/spark/opencv/ImageSetAugmenter.scala b/opencv/src/main/scala/com/microsoft/azure/synapse/ml/opencv/ImageSetAugmenter.scala similarity index 91% rename from opencv/src/main/scala/com/microsoft/ml/spark/opencv/ImageSetAugmenter.scala rename to opencv/src/main/scala/com/microsoft/azure/synapse/ml/opencv/ImageSetAugmenter.scala index ae89e80dd9..1054ae684c 100644 --- a/opencv/src/main/scala/com/microsoft/ml/spark/opencv/ImageSetAugmenter.scala +++ b/opencv/src/main/scala/com/microsoft/azure/synapse/ml/opencv/ImageSetAugmenter.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.opencv +package com.microsoft.azure.synapse.ml.opencv -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml._ import org.apache.spark.ml.image.ImageSchema import org.apache.spark.ml.param._ diff --git a/opencv/src/main/scala/com/microsoft/ml/spark/opencv/ImageTransformer.scala b/opencv/src/main/scala/com/microsoft/azure/synapse/ml/opencv/ImageTransformer.scala similarity index 98% rename from opencv/src/main/scala/com/microsoft/ml/spark/opencv/ImageTransformer.scala rename to opencv/src/main/scala/com/microsoft/azure/synapse/ml/opencv/ImageTransformer.scala index 9caa45c54f..795c5c458c 100644 --- a/opencv/src/main/scala/com/microsoft/ml/spark/opencv/ImageTransformer.scala +++ b/opencv/src/main/scala/com/microsoft/azure/synapse/ml/opencv/ImageTransformer.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.opencv +package com.microsoft.azure.synapse.ml.opencv -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol} -import com.microsoft.ml.spark.core.schema.{BinaryFileSchema, ImageSchemaUtils} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.schema.{BinaryFileSchema, ImageSchemaUtils} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.injections.UDFUtils import org.apache.spark.ml.image.ImageSchema import org.apache.spark.ml.param._ @@ -200,7 +200,7 @@ object Flip { } /** Blurs the image using a box filter. - * The com.microsoft.ml.spark.core.serialize.params are a map of the dimensions of the blurring box. Please refer to + * The params are a map of the dimensions of the blurring box. Please refer to * [[http://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#blur OpenCV]] for more information. * * @param params Map of parameters and values diff --git a/opencv/src/main/scala/com/microsoft/ml/spark/opencv/OpenCVUtils.scala b/opencv/src/main/scala/com/microsoft/azure/synapse/ml/opencv/OpenCVUtils.scala similarity index 80% rename from opencv/src/main/scala/com/microsoft/ml/spark/opencv/OpenCVUtils.scala rename to opencv/src/main/scala/com/microsoft/azure/synapse/ml/opencv/OpenCVUtils.scala index 411d4234de..57fa63a0d2 100644 --- a/opencv/src/main/scala/com/microsoft/ml/spark/opencv/OpenCVUtils.scala +++ b/opencv/src/main/scala/com/microsoft/azure/synapse/ml/opencv/OpenCVUtils.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.opencv +package com.microsoft.azure.synapse.ml.opencv -import com.microsoft.ml.spark.core.env.NativeLoader +import com.microsoft.azure.synapse.ml.core.env.NativeLoader import org.apache.spark.sql.DataFrame import org.apache.spark.sql.catalyst.encoders.RowEncoder @@ -21,12 +21,12 @@ object OpenCVUtils { new NativeLoader("/nu/pattern/opencv").loadLibraryByName(Core.NATIVE_LIBRARY_NAME) } - private[spark] def loadOpenCVFunc[A](it: Iterator[A]) = { + private[ml] def loadOpenCVFunc[A](it: Iterator[A]) = { OpenCVLoader it } - private[spark] def loadOpenCV(df: DataFrame): DataFrame = { + private[ml] def loadOpenCV(df: DataFrame): DataFrame = { val encoder = RowEncoder(df.schema) df.mapPartitions(loadOpenCVFunc)(encoder) } diff --git a/opencv/src/test/scala/com/microsoft/ml/spark/image/ResizeImageTransformerSuite.scala b/opencv/src/test/scala/com/microsoft/azure/synapse/ml/image/ResizeImageTransformerSuite.scala similarity index 91% rename from opencv/src/test/scala/com/microsoft/ml/spark/image/ResizeImageTransformerSuite.scala rename to opencv/src/test/scala/com/microsoft/azure/synapse/ml/image/ResizeImageTransformerSuite.scala index b20b309bb0..eb6eba5750 100644 --- a/opencv/src/test/scala/com/microsoft/ml/spark/image/ResizeImageTransformerSuite.scala +++ b/opencv/src/test/scala/com/microsoft/azure/synapse/ml/image/ResizeImageTransformerSuite.scala @@ -1,20 +1,20 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.image +package com.microsoft.azure.synapse.ml.image -import java.io.File -import java.net.URL - -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.io.IOImplicits._ -import com.microsoft.ml.spark.opencv.{ImageTransformer, OpenCVTestUtils} +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.opencv.{ImageTransformer, OpenCVTestUtils} import org.apache.commons.io.FileUtils import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.{DataFrame, Row} +import java.io.File +import java.net.URL + class ResizeImageTransformerSuite extends TransformerFuzzing[ResizeImageTransformer] with OpenCVTestUtils { diff --git a/opencv/src/test/scala/com/microsoft/ml/spark/opencv/ImageSetAugmenterSuite.scala b/opencv/src/test/scala/com/microsoft/azure/synapse/ml/opencv/ImageSetAugmenterSuite.scala similarity index 78% rename from opencv/src/test/scala/com/microsoft/ml/spark/opencv/ImageSetAugmenterSuite.scala rename to opencv/src/test/scala/com/microsoft/azure/synapse/ml/opencv/ImageSetAugmenterSuite.scala index 427f84d08f..eefbe172bb 100644 --- a/opencv/src/test/scala/com/microsoft/ml/spark/opencv/ImageSetAugmenterSuite.scala +++ b/opencv/src/test/scala/com/microsoft/azure/synapse/ml/opencv/ImageSetAugmenterSuite.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.opencv +package com.microsoft.azure.synapse.ml.opencv -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.test.base.LinuxOnly -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.core.test.base.LinuxOnly +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame diff --git a/opencv/src/test/scala/com/microsoft/ml/spark/opencv/ImageTransformerSuite.scala b/opencv/src/test/scala/com/microsoft/azure/synapse/ml/opencv/ImageTransformerSuite.scala similarity index 97% rename from opencv/src/test/scala/com/microsoft/ml/spark/opencv/ImageTransformerSuite.scala rename to opencv/src/test/scala/com/microsoft/azure/synapse/ml/opencv/ImageTransformerSuite.scala index 554b2d0776..9b44b20b68 100644 --- a/opencv/src/test/scala/com/microsoft/ml/spark/opencv/ImageTransformerSuite.scala +++ b/opencv/src/test/scala/com/microsoft/azure/synapse/ml/opencv/ImageTransformerSuite.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.opencv +package com.microsoft.azure.synapse.ml.opencv -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} -import com.microsoft.ml.spark.image.{UnrollBinaryImage, UnrollImage} -import com.microsoft.ml.spark.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.image.{UnrollBinaryImage, UnrollImage} +import com.microsoft.azure.synapse.ml.io.IOImplicits._ +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.hadoop.fs.Path import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.ml.param.DataFrameEquality diff --git a/pipeline.yaml b/pipeline.yaml index ed4d28fc99..a7794b2b51 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -56,7 +56,7 @@ jobs: azureSubscription: 'MMLSpark Build' keyVaultName: mmlspark-keys - bash: | - source activate mmlspark + source activate synapse sbt packagePython sbt publishBlob publishDocs publishR publishPython sbt genBuildInfo @@ -97,7 +97,7 @@ jobs: azureSubscription: 'MMLSpark Build' keyVaultName: mmlspark-keys - bash: | - source activate mmlspark + source activate synapseml sbt packagePython sbt publishBlob displayName: Publish Blob Artifacts @@ -113,7 +113,7 @@ jobs: inputs: azureSubscription: 'MMLSpark Build' scriptLocation: inlineScript - inlineScript: 'sbt "testOnly com.microsoft.ml.spark.nbtest.DatabricksTests"' + inlineScript: 'sbt "testOnly com.microsoft.azure.synapse.ml.nbtest.DatabricksTests"' condition: and(succeeded(), eq(variables.runTests, 'True')) - task: PublishTestResults@2 displayName: 'Publish Test Results' @@ -138,7 +138,7 @@ jobs: azureSubscription: 'MMLSpark Build' keyVaultName: mmlspark-keys - bash: | - source activate mmlspark + source activate synapseml jupyter nbconvert --to script ./notebooks/*.ipynb* sbt packagePython sbt publishBlob @@ -155,7 +155,7 @@ jobs: inputs: azureSubscription: 'MMLSpark Build' scriptLocation: inlineScript - inlineScript: 'sbt "testOnly com.microsoft.ml.spark.nbtest.SynapseTests"' + inlineScript: 'sbt "testOnly com.microsoft.azure.synapse.ml.nbtest.SynapseTests"' condition: and(succeeded(), eq(variables.runTests, 'True')) - task: PublishTestResults@2 displayName: 'Publish Test Results' @@ -278,7 +278,7 @@ jobs: azureSubscription: 'MMLSpark Build' scriptLocation: inlineScript inlineScript: | - source activate mmlspark + source activate synapseml (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) (sbt coverage testPython) || (sbt coverage testPython) || (sbt coverage testPython) - task: PublishTestResults@2 @@ -328,7 +328,7 @@ jobs: azureSubscription: 'MMLSpark Build' scriptLocation: inlineScript inlineScript: | - source activate mmlspark + source activate synapseml (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) sbt coverage testR - task: PublishTestResults@2 @@ -447,9 +447,9 @@ jobs: sudo apt-get update && sudo apt-get install ffmpeg libgstreamer1.0-0 \ gstreamer1.0-plugins-base gstreamer1.0-plugins-good gstreamer1.0-plugins-bad gstreamer1.0-plugins-ugly -y) export SBT_OPTS="-Xmx2G -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=2G -Xss2M -Duser.timezone=GMT" - (timeout 20m sbt coverage "testOnly com.microsoft.ml.spark.$(PACKAGE).**") || - (${FLAKY:-false} && timeout 20m sbt coverage "testOnly com.microsoft.ml.spark.$(PACKAGE).**") || - (${FLAKY:-false} && timeout 20m sbt coverage "testOnly com.microsoft.ml.spark.$(PACKAGE).**") + (timeout 20m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") || + (${FLAKY:-false} && timeout 20m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") || + (${FLAKY:-false} && timeout 20m sbt coverage "testOnly com.microsoft.azure.synapse.ml.$(PACKAGE).**") - task: PublishTestResults@2 displayName: 'Publish Test Results' diff --git a/project/BlobMavenPlugin.scala b/project/BlobMavenPlugin.scala index de8114172e..7008c69964 100644 --- a/project/BlobMavenPlugin.scala +++ b/project/BlobMavenPlugin.scala @@ -11,7 +11,7 @@ object BlobMavenPlugin extends AutoPlugin { override def trigger = allRequirements object autoImport { - val publishBlob = TaskKey[Unit]("publishBlob", "publish the library to mmlspark blob") + val publishBlob = TaskKey[Unit]("publishBlob", "publish the library to synapseml blob") val blobArtifactInfo = SettingKey[String]("blobArtifactInfo") } @@ -34,7 +34,7 @@ object BlobMavenPlugin extends AutoPlugin { }, blobArtifactInfo := { s""" - |MMLSpark Build and Release Information + |SynapseML Build and Release Information |--------------- | |### Maven Coordinates diff --git a/project/CodegenPlugin.scala b/project/CodegenPlugin.scala index 91b42c1446..bcc0b21654 100644 --- a/project/CodegenPlugin.scala +++ b/project/CodegenPlugin.scala @@ -37,8 +37,11 @@ object CodegenPlugin extends AutoPlugin { object autoImport { val pythonizedVersion = settingKey[String]("Pythonized version") val rVersion = settingKey[String]("R version") - val genPackageNamespace = settingKey[String]("genPackageNamespace") + val genPyPackageNamespace = settingKey[String]("genPyPackageNamespace") + val genRPackageNamespace = settingKey[String]("genRPackageNamespace") + val genTestPackageNamespace = settingKey[String]("genTestPackageNamespace") + val codegenJarName = settingKey[Option[String]]("codegenJarName") val testgenJarName = settingKey[Option[String]]("testgenJarName") val codegenArgs = settingKey[String]("codegenArgs") @@ -75,9 +78,9 @@ object CodegenPlugin extends AutoPlugin { packageR.value publishLocal.value val libPath = join(condaEnvLocation.value, "Lib", "R", "library").toString - val rSrcDir = join(codegenDir.value, "src", "R", genPackageNamespace.value) + val rSrcDir = join(codegenDir.value, "src", "R", genRPackageNamespace.value) rCmd(activateCondaEnv.value, - Seq("R", "CMD", "INSTALL", "--no-multiarch", "--with-keep.source", genPackageNamespace.value), + Seq("R", "CMD", "INSTALL", "--no-multiarch", "--with-keep.source", genRPackageNamespace.value), rSrcDir.getParentFile, libPath) val testRunner = join("tools", "tests", "run_r_tests.R") if (join(rSrcDir,"tests").exists()){ @@ -91,7 +94,7 @@ object CodegenPlugin extends AutoPlugin { (Test / compile).value val arg = testgenArgs.value Def.task { - (Test / runMain).toTask(s" com.microsoft.ml.spark.codegen.TestGen $arg").value + (Test / runMain).toTask(s" com.microsoft.azure.synapse.ml.codegen.TestGen $arg").value } } tag(TestGenTag) @@ -107,7 +110,7 @@ object CodegenPlugin extends AutoPlugin { version.value, pythonizedVersion.value, rVersion.value, - genPackageNamespace.value + genPyPackageNamespace.value ).toJson.compactPrint }, testgenArgs := { @@ -119,7 +122,7 @@ object CodegenPlugin extends AutoPlugin { version.value, pythonizedVersion.value, rVersion.value, - genPackageNamespace.value + genPyPackageNamespace.value ).toJson.compactPrint }, codegenJarName := { @@ -141,7 +144,7 @@ object CodegenPlugin extends AutoPlugin { (Test / compile).value val arg = codegenArgs.value Def.task { - (Compile / runMain).toTask(s" com.microsoft.ml.spark.codegen.CodeGen $arg").value + (Compile / runMain).toTask(s" com.microsoft.azure.synapse.ml.codegen.CodeGen $arg").value } }.value), testgen := testGenImpl.value, @@ -162,7 +165,7 @@ object CodegenPlugin extends AutoPlugin { packageR := { createCondaEnvTask.value codegen.value - val rSrcDir = join(codegenDir.value, "src", "R", genPackageNamespace.value) + val rSrcDir = join(codegenDir.value, "src", "R", genRPackageNamespace.value) val rPackageDir = join(codegenDir.value, "package", "R") val libPath = join(condaEnvLocation.value, "Lib", "R", "library").toString rCmd(activateCondaEnv.value, Seq("R", "-q", "-e", "roxygen2::roxygenise()"), rSrcDir, libPath) @@ -180,11 +183,11 @@ object CodegenPlugin extends AutoPlugin { packagePython := { codegen.value createCondaEnvTask.value - val destPyDir = join(targetDir.value, "classes", genPackageNamespace.value) + val destPyDir = join(targetDir.value, "classes", genPyPackageNamespace.value) val packageDir = join(codegenDir.value, "package", "python").absolutePath val pythonSrcDir = join(codegenDir.value, "src", "python") if (destPyDir.exists()) FileUtils.forceDelete(destPyDir) - val sourcePyDir = join(pythonSrcDir.getAbsolutePath, genPackageNamespace.value) + val sourcePyDir = join(pythonSrcDir.getAbsolutePath, genPyPackageNamespace.value) FileUtils.copyDirectory(sourcePyDir, destPyDir) runCmd( activateCondaEnv.value ++ @@ -208,8 +211,8 @@ object CodegenPlugin extends AutoPlugin { version.value + "/" + fn, "pip") }, mergePyCode := { - val srcDir = join(codegenDir.value, "src", "python", genPackageNamespace.value) - val destDir = join(mergePyCodeDir.value, "src", "python", genPackageNamespace.value) + val srcDir = join(codegenDir.value, "src", "python", genPyPackageNamespace.value) + val destDir = join(mergePyCodeDir.value, "src", "python", genPyPackageNamespace.value) FileUtils.copyDirectory(srcDir, destDir) }, testPython := { @@ -220,7 +223,7 @@ object CodegenPlugin extends AutoPlugin { activateCondaEnv.value ++ Seq("python", "-m", "pytest", - s"--cov=${genPackageNamespace.value}", + s"--cov=${genPyPackageNamespace.value}", s"--junitxml=${join(mainTargetDir, s"python-test-results-${name.value}.xml")}", "--cov-report=xml", genTestPackageNamespace.value @@ -237,11 +240,14 @@ object CodegenPlugin extends AutoPlugin { codegenDir := { join(targetDir.value, "generated") }, - genPackageNamespace := { - "mmlspark" + genPyPackageNamespace := { + "synapse" + }, + genRPackageNamespace := { + "synapseml" }, genTestPackageNamespace := { - "mmlsparktest" + "synapsemltest" } ) diff --git a/project/CondaPlugin.scala b/project/CondaPlugin.scala index 4e3e3ce005..ca9c602f47 100644 --- a/project/CondaPlugin.scala +++ b/project/CondaPlugin.scala @@ -18,7 +18,7 @@ object CondaPlugin extends AutoPlugin { import autoImport._ override lazy val globalSettings: Seq[Setting[_]] = Seq( - condaEnvName := "mmlspark", + condaEnvName := "synapseml", cleanCondaEnvTask := { runCmd(Seq("conda", "env", "remove", "--name", condaEnvName.value, "-y")) }, diff --git a/scalastyle-config.xml b/scalastyle-config.xml index 8a4b5a81b1..2f2e4e631b 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -12,7 +12,7 @@ ^// Copyright \(C\) Microsoft Corporation\. All rights reserved\. // Licensed under the MIT License\. See LICENSE in project root for information\. -package (?:com\.microsoft\.ml\.spark|org\.apache\.spark|com\.microsoft\.CNTK|com\.microsoft\.ml\.lightgbm|com\.microsoft\.lightgbm)[. +package (?:com\.microsoft\.azure\.synapse\.ml|org\.apache\.spark|com\.microsoft\.CNTK|com\.microsoft\.ml\.lightgbm|com\.microsoft\.lightgbm)[. ] true @@ -72,7 +72,7 @@ package (?:com\.microsoft\.ml\.spark|org\.apache\.spark|com\.microsoft\.CNTK|com our,scala,java,other - com.microsoft.ml.spark[.].+ + com.microsoft.azure.synapse.ml[.].+ scala[.].+ java[.].+ .+ diff --git a/scalastyle-test-config.xml b/scalastyle-test-config.xml index 05299b3b95..d8b4f17d8b 100644 --- a/scalastyle-test-config.xml +++ b/scalastyle-test-config.xml @@ -12,7 +12,7 @@ ^// Copyright \(C\) Microsoft Corporation\. All rights reserved\. // Licensed under the MIT License\. See LICENSE in project root for information\. -package (?:com\.microsoft\.ml\.spark|org\.apache\.spark|com\.microsoft\.CNTK|com\.microsoft\.ml\.lightgbm|com\.microsoft\.lightgbm)[. +package (?:com\.microsoft\.azure\.synapse\.ml|org\.apache\.spark|com\.microsoft\.CNTK|com\.microsoft\.ml\.lightgbm|com\.microsoft\.lightgbm)[. ] true @@ -69,7 +69,7 @@ package (?:com\.microsoft\.ml\.spark|org\.apache\.spark|com\.microsoft\.CNTK|com our,scala,java,other - com.microsoft.ml.spark[.].+ + com.microsoft.azure.synapse.ml[.].+ scala[.].+ java[.].+ .+ diff --git a/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala b/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala new file mode 100644 index 0000000000..7933dc6a66 --- /dev/null +++ b/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/FuzzingTest.scala @@ -0,0 +1,280 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.azure.synapse.ml.core.test.fuzzing + +import com.microsoft.azure.synapse.ml.core.contracts.{HasFeaturesCol, HasInputCol, HasLabelCol, HasOutputCol} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.utils.JarLoadingUtils +import org.apache.spark.ml._ +import org.apache.spark.ml.param._ +import org.apache.spark.ml.util.{MLReadable, MLWritable} + +import java.lang.reflect.ParameterizedType +import scala.language.existentials + +/** Tests to validate fuzzing of modules. */ +class FuzzingTest extends TestBase { + + // Use this for more detailed output from the Jar Loader + val debug = false + + // use this to quickly see all the results for all failing modules + // Note that this could make the tests pass when they should be failing + val disableFailure = false + + test("Assert things have been loaded") { + // Needed because the session in TB is lazy + spark + assert(serializationFuzzers.nonEmpty) + assert(pipelineStages.nonEmpty) + assert(readers.nonEmpty) + } + + test("Verify stage fitting and transforming") { + val exemptions: Set[String] = Set( + "com.microsoft.azure.synapse.ml.cognitive.DocumentTranslator", + "org.apache.spark.ml.feature.FastVectorAssembler", + "com.microsoft.azure.synapse.ml.featurize.ValueIndexerModel", + "com.microsoft.azure.synapse.ml.cntk.train.CNTKLearner", + "com.microsoft.azure.synapse.ml.automl.TuneHyperparameters", + "com.microsoft.azure.synapse.ml.train.ComputePerInstanceStatistics", + "com.microsoft.azure.synapse.ml.featurize.DataConversion", + "com.microsoft.azure.synapse.ml.core.serialize.TestEstimatorBase", + "com.microsoft.azure.synapse.ml.cognitive.LocalNER", + "com.microsoft.azure.synapse.ml.nn.KNNModel", + "com.microsoft.azure.synapse.ml.nn.ConditionalKNNModel", + "com.microsoft.azure.synapse.ml.train.TrainedRegressorModel", + "com.microsoft.azure.synapse.ml.core.serialize.MixedParamTest", + "com.microsoft.azure.synapse.ml.automl.TuneHyperparametersModel", + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMRegressionModel", + "com.microsoft.azure.synapse.ml.isolationforest.IsolationForestModel", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitClassificationModel", + "com.microsoft.azure.synapse.ml.core.serialize.ComplexParamTest", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitRegressionModel", + "com.microsoft.azure.synapse.ml.core.serialize.StandardParamTest", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitContextualBanditModel", + "com.microsoft.azure.synapse.ml.stages.ClassBalancerModel", + "com.microsoft.azure.synapse.ml.featurize.CleanMissingDataModel", + "com.microsoft.azure.synapse.ml.stages.TimerModel", + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassificationModel", + "com.microsoft.azure.synapse.ml.train.TrainedClassifierModel", + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMRankerModel", + "com.microsoft.azure.synapse.ml.automl.BestModel" //TODO add proper interfaces to all of these + ) + val applicableStages = pipelineStages.filter(t => !exemptions(t.getClass.getName)) + val applicableClasses = applicableStages.map(_.getClass.asInstanceOf[Class[_]]).toSet + val classToFuzzer: Map[Class[_], ExperimentFuzzing[_ <: PipelineStage]] = + experimentFuzzers.map(f => + (Class.forName(f.getClass.getMethod("experimentTestObjects") + .getGenericReturnType.asInstanceOf[ParameterizedType] + .getActualTypeArguments.head.asInstanceOf[ParameterizedType] + .getActualTypeArguments.head.getTypeName) + , f)).toMap + val classesWithFuzzers = classToFuzzer.keys + val classesWithoutFuzzers = applicableClasses.diff(classesWithFuzzers.toSet) + assertOrLog(classesWithoutFuzzers.isEmpty, + "These classes do not have Experiment fuzzers, \n" + + "(try extending Estimator/Transformer Fuzzing): \n" + + classesWithoutFuzzers.mkString("\n")) + } + + test("Verify all stages can be serialized") { + val exemptions: Set[String] = Set( + "com.microsoft.azure.synapse.ml.cognitive.DocumentTranslator", + "com.microsoft.azure.synapse.ml.automl.BestModel", + "com.microsoft.azure.synapse.ml.automl.TuneHyperparameters", + "com.microsoft.azure.synapse.ml.automl.TuneHyperparametersModel", + "com.microsoft.azure.synapse.ml.cntk.train.CNTKLearner", + "com.microsoft.azure.synapse.ml.cognitive.LocalNER", + "com.microsoft.azure.synapse.ml.core.serialize.ComplexParamTest", + "com.microsoft.azure.synapse.ml.core.serialize.MixedParamTest", + "com.microsoft.azure.synapse.ml.core.serialize.StandardParamTest", + "com.microsoft.azure.synapse.ml.core.serialize.TestEstimatorBase", + "com.microsoft.azure.synapse.ml.featurize.CleanMissingDataModel", + "com.microsoft.azure.synapse.ml.featurize.DataConversion", + "com.microsoft.azure.synapse.ml.featurize.ValueIndexerModel", + "com.microsoft.azure.synapse.ml.isolationforest.IsolationForestModel", + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassificationModel", + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMRankerModel", + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMRegressionModel", + "com.microsoft.azure.synapse.ml.nn.ConditionalKNNModel", + "com.microsoft.azure.synapse.ml.nn.KNNModel", + "com.microsoft.azure.synapse.ml.stages.ClassBalancerModel", + "com.microsoft.azure.synapse.ml.stages.TimerModel", + "com.microsoft.azure.synapse.ml.train.ComputePerInstanceStatistics", + "com.microsoft.azure.synapse.ml.train.TrainedClassifierModel", + "com.microsoft.azure.synapse.ml.train.TrainedRegressorModel", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitClassificationModel", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitContextualBanditModel", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitRegressionModel" + ) + val applicableStages = pipelineStages.filter(t => !exemptions(t.getClass.getName)) + val applicableClasses = applicableStages.map(_.getClass.asInstanceOf[Class[_]]).toSet + val classToFuzzer: Map[Class[_], SerializationFuzzing[_ <: PipelineStage with MLWritable]] = + serializationFuzzers.map(f => + (Class.forName(f.getClass.getMethod("serializationTestObjects") + .getGenericReturnType.asInstanceOf[ParameterizedType] + .getActualTypeArguments.head.asInstanceOf[ParameterizedType] + .getActualTypeArguments.head.getTypeName), + f) + ).toMap + val classesWithFuzzers = classToFuzzer.keys + val classesWithoutFuzzers = applicableClasses.diff(classesWithFuzzers.toSet) + assertOrLog(classesWithoutFuzzers.isEmpty, + "These classes do not have Serialization fuzzers,\n" + + "(try extending Estimator/Transformer Fuzzing):\n " + + classesWithoutFuzzers.mkString("\n ")) + } + + test("Verify all stages can be tested in python") { + val exemptions: Set[String] = Set( + "com.microsoft.azure.synapse.ml.cognitive.DocumentTranslator", + "com.microsoft.azure.synapse.ml.automl.TuneHyperparameters", + "com.microsoft.azure.synapse.ml.train.TrainedRegressorModel", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitContextualBanditModel", + "com.microsoft.azure.synapse.ml.train.TrainedClassifierModel", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitClassificationModel", + "com.microsoft.azure.synapse.ml.isolationforest.IsolationForestModel", + "com.microsoft.azure.synapse.ml.nn.ConditionalKNNModel", + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassificationModel", + "com.microsoft.azure.synapse.ml.core.serialize.TestEstimatorBase", + "com.microsoft.azure.synapse.ml.core.serialize.MixedParamTest", + "com.microsoft.azure.synapse.ml.featurize.CleanMissingDataModel", + "com.microsoft.azure.synapse.ml.stages.TimerModel", + "com.microsoft.azure.synapse.ml.featurize.DataConversion", + "com.microsoft.azure.synapse.ml.automl.TuneHyperparametersModel", + "com.microsoft.azure.synapse.ml.automl.BestModel", + "com.microsoft.azure.synapse.ml.nn.KNNModel", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitRegressionModel", + "com.microsoft.azure.synapse.ml.stages.ClassBalancerModel", + "com.microsoft.azure.synapse.ml.core.serialize.StandardParamTest", + "com.microsoft.azure.synapse.ml.core.serialize.ComplexParamTest", + "com.microsoft.azure.synapse.ml.featurize.ValueIndexerModel", + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMRankerModel", + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMRegressionModel", + "com.microsoft.azure.synapse.ml.train.ComputePerInstanceStatistics" + ) + val applicableStages = pipelineStages.filter(t => !exemptions(t.getClass.getName)) + val applicableClasses = applicableStages.map(_.getClass.asInstanceOf[Class[_]]).toSet + val classToFuzzer: Map[Class[_], PyTestFuzzing[_ <: PipelineStage]] = + pytestFuzzers.map(f => + (Class.forName(f.getClass.getMethod("pyTestObjects") + .getGenericReturnType.asInstanceOf[ParameterizedType] + .getActualTypeArguments.head.asInstanceOf[ParameterizedType] + .getActualTypeArguments.head.getTypeName), + f) + ).toMap + val classesWithFuzzers = classToFuzzer.keys + val classesWithoutFuzzers = applicableClasses.diff(classesWithFuzzers.toSet) + assertOrLog(classesWithoutFuzzers.isEmpty, classesWithoutFuzzers.mkString("\n")) + } + + // TODO verify that model UIDs match the class names, perhaps use a Trait + + test("Verify all pipeline stages don't have exotic characters") { + val badChars = List(",", "\"", "'", ".") + pipelineStages.foreach { pipelineStage => + pipelineStage.params.foreach { param => + assertOrLog(!param.name.contains(badChars), param.name) + } + } + } + + test("Verify all pipeline stage values match their param names") { + val exemptions: Set[String] = Set[String]( + "com.microsoft.azure.synapse.ml.stages.UDFTransformer") // needs to hide setters from model + pipelineStages.foreach { pipelineStage => + if (!exemptions(pipelineStage.getClass.getName)) { + val paramFields = + pipelineStage.getClass.getDeclaredFields + .filter(f => classOf[Param[Any]].isAssignableFrom(f.getType)) + val paramNames = paramFields.map { f => + f.setAccessible(true) + val p = f.get(pipelineStage) + p.asInstanceOf[Param[Any]].name + } + val paramFieldNames = paramFields.map(_.getName) + assertOrLog(paramNames === paramFieldNames, + paramNames.mkString(",") + "\n" + + paramFieldNames.mkString(",") + "\n" + + pipelineStage.getClass.getName) + } + } + } + + test("Verify correct use of mixins") { + val triggers = Map( + "inputCol" -> classOf[HasInputCol], + "inputColumn" -> classOf[HasInputCol], + "outputCol" -> classOf[HasOutputCol], + "outputColumn" -> classOf[HasOutputCol], + "labelCol" -> classOf[HasLabelCol], + "labelColumn" -> classOf[HasLabelCol], + "featuresCol" -> classOf[HasFeaturesCol], + "featuresColumn" -> classOf[HasFeaturesCol] + ) + + val exemptions = Set[String]( + "org.apache.spark.ml.feature.FastVectorAssembler", // In Spark namespace + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitClassifier", // HasFeaturesCol is part of spark's base class + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitContextualBandit", // HasFeaturesCol is part of spark's base class + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitRegressor", // HasFeaturesCol is part of spark's base class + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier", // HasFeaturesCol is part of spark's base class + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMRegressor", // HasFeaturesCol is part of spark's base class + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMRanker", // HasFeaturesCol is part of spark's base class + "com.microsoft.azure.synapse.ml.isolationforest.IsolationForest", // HasFeaturesCol from spark + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassificationModel", + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMRankerModel", + "com.microsoft.azure.synapse.ml.lightgbm.LightGBMRegressionModel", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitClassificationModel", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitRegressionModel", + "com.microsoft.azure.synapse.ml.vw.VowpalWabbitContextualBanditModel", + "com.microsoft.azure.synapse.ml.explainers.ImageLIME", + "com.microsoft.azure.synapse.ml.explainers.ImageSHAP", + "com.microsoft.azure.synapse.ml.explainers.TabularLIME", + "com.microsoft.azure.synapse.ml.explainers.TabularSHAP", + "com.microsoft.azure.synapse.ml.explainers.TextLIME", + "com.microsoft.azure.synapse.ml.explainers.TextSHAP", + "com.microsoft.azure.synapse.ml.explainers.VectorLIME", + "com.microsoft.azure.synapse.ml.explainers.VectorSHAP" + ) + + pipelineStages.foreach { stage => + if (!exemptions(stage.getClass.getName)) { + stage.params.foreach { param => + triggers.get(param.name) match { + case Some(clazz) => + assertOrLog(clazz.isAssignableFrom(stage.getClass), + stage.getClass.getName + " needs to extend " + clazz.getName) + case None => + } + } + } + } + } + + private def assertOrLog(condition: Boolean, hint: String = "", + disableFailure: Boolean = disableFailure): Unit = { + if (disableFailure && !condition) println(hint) + else assert(condition, hint) + () + } + + // set the context loader to pick up on the jars + //Thread.currentThread().setContextClassLoader(JarLoadingUtils.classLoader) + + private lazy val readers: List[MLReadable[_]] = JarLoadingUtils.instantiateObjects[MLReadable[_]]() + + private lazy val pipelineStages: List[PipelineStage] = JarLoadingUtils.instantiateServices[PipelineStage]() + + private lazy val experimentFuzzers: List[ExperimentFuzzing[_ <: PipelineStage]] = + JarLoadingUtils.instantiateServices[ExperimentFuzzing[_ <: PipelineStage]]() + + private lazy val serializationFuzzers: List[SerializationFuzzing[_ <: PipelineStage with MLWritable]] = + JarLoadingUtils.instantiateServices[SerializationFuzzing[_ <: PipelineStage with MLWritable]]() + + private lazy val pytestFuzzers: List[PyTestFuzzing[_ <: PipelineStage]] = + JarLoadingUtils.instantiateServices[PyTestFuzzing[_ <: PipelineStage]]() + +} diff --git a/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/FuzzingTest.scala b/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/FuzzingTest.scala deleted file mode 100644 index 7010d2bcc0..0000000000 --- a/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/FuzzingTest.scala +++ /dev/null @@ -1,281 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.ml.spark.core.test.fuzzing - -import java.lang.reflect.ParameterizedType - -import com.microsoft.ml.spark.core.contracts.{HasFeaturesCol, HasInputCol, HasLabelCol, HasOutputCol} -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.utils.JarLoadingUtils -import org.apache.spark.ml._ -import org.apache.spark.ml.param._ -import org.apache.spark.ml.util.{MLReadable, MLWritable} - -import scala.language.existentials - -/** Tests to validate fuzzing of modules. */ -class FuzzingTest extends TestBase { - - // Use this for more detailed output from the Jar Loader - val debug = false - - // use this to quickly see all the results for all failing modules - // Note that this could make the tests pass when they should be failing - val disableFailure = false - - test("Assert things have been loaded") { - // Needed because the session in TB is lazy - spark - assert(serializationFuzzers.nonEmpty) - assert(pipelineStages.nonEmpty) - assert(readers.nonEmpty) - } - - test("Verify stage fitting and transforming") { - val exemptions: Set[String] = Set( - "com.microsoft.ml.spark.cognitive.DocumentTranslator", - "org.apache.spark.ml.feature.FastVectorAssembler", - "com.microsoft.ml.spark.featurize.ValueIndexerModel", - "com.microsoft.ml.spark.cntk.train.CNTKLearner", - "com.microsoft.ml.spark.automl.TuneHyperparameters", - "com.microsoft.ml.spark.train.ComputePerInstanceStatistics", - "com.microsoft.ml.spark.featurize.DataConversion", - "com.microsoft.ml.spark.core.serialize.TestEstimatorBase", - "com.microsoft.ml.spark.cognitive.LocalNER", - "com.microsoft.ml.spark.nn.KNNModel", - "com.microsoft.ml.spark.nn.ConditionalKNNModel", - "com.microsoft.ml.spark.train.TrainedRegressorModel", - "com.microsoft.ml.spark.core.serialize.MixedParamTest", - "com.microsoft.ml.spark.automl.TuneHyperparametersModel", - "com.microsoft.ml.spark.lightgbm.LightGBMRegressionModel", - "com.microsoft.ml.spark.isolationforest.IsolationForestModel", - "com.microsoft.ml.spark.vw.VowpalWabbitClassificationModel", - "com.microsoft.ml.spark.core.serialize.ComplexParamTest", - "com.microsoft.ml.spark.vw.VowpalWabbitRegressionModel", - "com.microsoft.ml.spark.core.serialize.StandardParamTest", - "com.microsoft.ml.spark.vw.VowpalWabbitContextualBanditModel", - "com.microsoft.ml.spark.stages.ClassBalancerModel", - "com.microsoft.ml.spark.featurize.CleanMissingDataModel", - "com.microsoft.ml.spark.stages.TimerModel", - "com.microsoft.ml.spark.lightgbm.LightGBMClassificationModel", - "com.microsoft.ml.spark.train.TrainedClassifierModel", - "com.microsoft.ml.spark.lightgbm.LightGBMRankerModel", - "com.microsoft.ml.spark.automl.BestModel" //TODO add proper interfaces to all of these - ) - val applicableStages = pipelineStages.filter(t => !exemptions(t.getClass.getName)) - val applicableClasses = applicableStages.map(_.getClass.asInstanceOf[Class[_]]).toSet - val classToFuzzer: Map[Class[_], ExperimentFuzzing[_ <: PipelineStage]] = - experimentFuzzers.map(f => - (Class.forName(f.getClass.getMethod("experimentTestObjects") - .getGenericReturnType.asInstanceOf[ParameterizedType] - .getActualTypeArguments.head.asInstanceOf[ParameterizedType] - .getActualTypeArguments.head.getTypeName) - , f)).toMap - val classesWithFuzzers = classToFuzzer.keys - val classesWithoutFuzzers = applicableClasses.diff(classesWithFuzzers.toSet) - assertOrLog(classesWithoutFuzzers.isEmpty, - "These classes do not have Experiment fuzzers, \n" + - "(try extending Estimator/Transformer Fuzzing): \n" + - classesWithoutFuzzers.mkString("\n")) - } - - test("Verify all stages can be serialized") { - val exemptions: Set[String] = Set( - "com.microsoft.ml.spark.cognitive.DocumentTranslator", - "com.microsoft.ml.spark.automl.BestModel", - "com.microsoft.ml.spark.automl.TuneHyperparameters", - "com.microsoft.ml.spark.automl.TuneHyperparametersModel", - "com.microsoft.ml.spark.cntk.train.CNTKLearner", - "com.microsoft.ml.spark.cognitive.LocalNER", - "com.microsoft.ml.spark.core.serialize.ComplexParamTest", - "com.microsoft.ml.spark.core.serialize.MixedParamTest", - "com.microsoft.ml.spark.core.serialize.StandardParamTest", - "com.microsoft.ml.spark.core.serialize.TestEstimatorBase", - "com.microsoft.ml.spark.featurize.CleanMissingDataModel", - "com.microsoft.ml.spark.featurize.DataConversion", - "com.microsoft.ml.spark.featurize.ValueIndexerModel", - "com.microsoft.ml.spark.isolationforest.IsolationForestModel", - "com.microsoft.ml.spark.lightgbm.LightGBMClassificationModel", - "com.microsoft.ml.spark.lightgbm.LightGBMRankerModel", - "com.microsoft.ml.spark.lightgbm.LightGBMRegressionModel", - "com.microsoft.ml.spark.nn.ConditionalKNNModel", - "com.microsoft.ml.spark.nn.KNNModel", - "com.microsoft.ml.spark.stages.ClassBalancerModel", - "com.microsoft.ml.spark.stages.TimerModel", - "com.microsoft.ml.spark.train.ComputePerInstanceStatistics", - "com.microsoft.ml.spark.train.TrainedClassifierModel", - "com.microsoft.ml.spark.train.TrainedRegressorModel", - "com.microsoft.ml.spark.vw.VowpalWabbitClassificationModel", - "com.microsoft.ml.spark.vw.VowpalWabbitContextualBanditModel", - "com.microsoft.ml.spark.vw.VowpalWabbitRegressionModel" - ) - val applicableStages = pipelineStages.filter(t => !exemptions(t.getClass.getName)) - val applicableClasses = applicableStages.map(_.getClass.asInstanceOf[Class[_]]).toSet - val classToFuzzer: Map[Class[_], SerializationFuzzing[_ <: PipelineStage with MLWritable]] = - serializationFuzzers.map(f => - (Class.forName(f.getClass.getMethod("serializationTestObjects") - .getGenericReturnType.asInstanceOf[ParameterizedType] - .getActualTypeArguments.head.asInstanceOf[ParameterizedType] - .getActualTypeArguments.head.getTypeName), - f) - ).toMap - val classesWithFuzzers = classToFuzzer.keys - val classesWithoutFuzzers = applicableClasses.diff(classesWithFuzzers.toSet) - assertOrLog(classesWithoutFuzzers.isEmpty, - "These classes do not have Serialization fuzzers,\n" + - "(try extending Estimator/Transformer Fuzzing):\n " + - classesWithoutFuzzers.mkString("\n ")) - } - - test("Verify all stages can be tested in python") { - val exemptions: Set[String] = Set( - "com.microsoft.ml.spark.cognitive.DocumentTranslator", - "com.microsoft.ml.spark.automl.TuneHyperparameters", - "com.microsoft.ml.spark.train.TrainedRegressorModel", - "com.microsoft.ml.spark.vw.VowpalWabbitContextualBanditModel", - "com.microsoft.ml.spark.train.TrainedClassifierModel", - "com.microsoft.ml.spark.vw.VowpalWabbitClassificationModel", - "com.microsoft.ml.spark.isolationforest.IsolationForestModel", - "com.microsoft.ml.spark.nn.ConditionalKNNModel", - "com.microsoft.ml.spark.lightgbm.LightGBMClassificationModel", - "com.microsoft.ml.spark.core.serialize.TestEstimatorBase", - "com.microsoft.ml.spark.core.serialize.MixedParamTest", - "com.microsoft.ml.spark.featurize.CleanMissingDataModel", - "com.microsoft.ml.spark.stages.TimerModel", - "com.microsoft.ml.spark.featurize.DataConversion", - "com.microsoft.ml.spark.automl.TuneHyperparametersModel", - "com.microsoft.ml.spark.automl.BestModel", - "com.microsoft.ml.spark.nn.KNNModel", - "com.microsoft.ml.spark.vw.VowpalWabbitRegressionModel", - "com.microsoft.ml.spark.stages.ClassBalancerModel", - "com.microsoft.ml.spark.core.serialize.StandardParamTest", - "com.microsoft.ml.spark.core.serialize.ComplexParamTest", - "com.microsoft.ml.spark.featurize.ValueIndexerModel", - "com.microsoft.ml.spark.lightgbm.LightGBMRankerModel", - "com.microsoft.ml.spark.lightgbm.LightGBMRegressionModel", - "com.microsoft.ml.spark.train.ComputePerInstanceStatistics" - ) - val applicableStages = pipelineStages.filter(t => !exemptions(t.getClass.getName)) - val applicableClasses = applicableStages.map(_.getClass.asInstanceOf[Class[_]]).toSet - val classToFuzzer: Map[Class[_], PyTestFuzzing[_ <: PipelineStage]] = - pytestFuzzers.map(f => - (Class.forName(f.getClass.getMethod("pyTestObjects") - .getGenericReturnType.asInstanceOf[ParameterizedType] - .getActualTypeArguments.head.asInstanceOf[ParameterizedType] - .getActualTypeArguments.head.getTypeName), - f) - ).toMap - val classesWithFuzzers = classToFuzzer.keys - val classesWithoutFuzzers = applicableClasses.diff(classesWithFuzzers.toSet) - assertOrLog(classesWithoutFuzzers.isEmpty, classesWithoutFuzzers.mkString("\n")) - } - - // TODO verify that model UIDs match the class names, perhaps use a Trait - - test("Verify all pipeline stages don't have exotic characters") { - val badChars = List(",", "\"", "'", ".") - pipelineStages.foreach { pipelineStage => - pipelineStage.params.foreach { param => - assertOrLog(!param.name.contains(badChars), param.name) - } - } - } - - test("Verify all pipeline stage values match their param names") { - val exemptions: Set[String] = Set[String]( - "com.microsoft.ml.spark.stages.UDFTransformer") // needs to hide setters from model - pipelineStages.foreach { pipelineStage => - if (!exemptions(pipelineStage.getClass.getName)) { - val paramFields = - pipelineStage.getClass.getDeclaredFields - .filter(f => classOf[Param[Any]].isAssignableFrom(f.getType)) - val paramNames = paramFields.map { f => - f.setAccessible(true) - val p = f.get(pipelineStage) - p.asInstanceOf[Param[Any]].name - } - val paramFieldNames = paramFields.map(_.getName) - assertOrLog(paramNames === paramFieldNames, - paramNames.mkString(",") + "\n" + - paramFieldNames.mkString(",") + "\n" + - pipelineStage.getClass.getName) - } - } - } - - test("Verify correct use of mixins") { - val triggers = Map( - "inputCol" -> classOf[HasInputCol], - "inputColumn" -> classOf[HasInputCol], - "outputCol" -> classOf[HasOutputCol], - "outputColumn" -> classOf[HasOutputCol], - "labelCol" -> classOf[HasLabelCol], - "labelColumn" -> classOf[HasLabelCol], - "featuresCol" -> classOf[HasFeaturesCol], - "featuresColumn" -> classOf[HasFeaturesCol] - ) - - val exemptions = Set[String]( - "org.apache.spark.ml.feature.FastVectorAssembler", // In Spark namespace - "com.microsoft.ml.spark.vw.VowpalWabbitClassifier", // HasFeaturesCol is part of spark's base class - "com.microsoft.ml.spark.vw.VowpalWabbitContextualBandit", // HasFeaturesCol is part of spark's base class - "com.microsoft.ml.spark.vw.VowpalWabbitRegressor", // HasFeaturesCol is part of spark's base class - "com.microsoft.ml.spark.lightgbm.LightGBMClassifier", // HasFeaturesCol is part of spark's base class - "com.microsoft.ml.spark.lightgbm.LightGBMRegressor", // HasFeaturesCol is part of spark's base class - "com.microsoft.ml.spark.lightgbm.LightGBMRanker", // HasFeaturesCol is part of spark's base class - "com.microsoft.ml.spark.isolationforest.IsolationForest", // HasFeaturesCol from spark - "com.microsoft.ml.spark.lightgbm.LightGBMClassificationModel", - "com.microsoft.ml.spark.lightgbm.LightGBMRankerModel", - "com.microsoft.ml.spark.lightgbm.LightGBMRegressionModel", - "com.microsoft.ml.spark.vw.VowpalWabbitClassificationModel", - "com.microsoft.ml.spark.vw.VowpalWabbitRegressionModel", - "com.microsoft.ml.spark.vw.VowpalWabbitContextualBanditModel", - "com.microsoft.ml.spark.explainers.ImageLIME", - "com.microsoft.ml.spark.explainers.ImageSHAP", - "com.microsoft.ml.spark.explainers.TabularLIME", - "com.microsoft.ml.spark.explainers.TabularSHAP", - "com.microsoft.ml.spark.explainers.TextLIME", - "com.microsoft.ml.spark.explainers.TextSHAP", - "com.microsoft.ml.spark.explainers.VectorLIME", - "com.microsoft.ml.spark.explainers.VectorSHAP" - ) - - pipelineStages.foreach { stage => - if (!exemptions(stage.getClass.getName)) { - stage.params.foreach { param => - triggers.get(param.name) match { - case Some(clazz) => - assertOrLog(clazz.isAssignableFrom(stage.getClass), - stage.getClass.getName + " needs to extend " + clazz.getName) - case None => - } - } - } - } - } - - private def assertOrLog(condition: Boolean, hint: String = "", - disableFailure: Boolean = disableFailure): Unit = { - if (disableFailure && !condition) println(hint) - else assert(condition, hint) - () - } - - // set the context loader to pick up on the jars - //Thread.currentThread().setContextClassLoader(JarLoadingUtils.classLoader) - - private lazy val readers: List[MLReadable[_]] = JarLoadingUtils.instantiateObjects[MLReadable[_]]() - - private lazy val pipelineStages: List[PipelineStage] = JarLoadingUtils.instantiateServices[PipelineStage]() - - private lazy val experimentFuzzers: List[ExperimentFuzzing[_ <: PipelineStage]] = - JarLoadingUtils.instantiateServices[ExperimentFuzzing[_ <: PipelineStage]]() - - private lazy val serializationFuzzers: List[SerializationFuzzing[_ <: PipelineStage with MLWritable]] = - JarLoadingUtils.instantiateServices[SerializationFuzzing[_ <: PipelineStage with MLWritable]]() - - private lazy val pytestFuzzers: List[PyTestFuzzing[_ <: PipelineStage]] = - JarLoadingUtils.instantiateServices[PyTestFuzzing[_ <: PipelineStage]]() - -} diff --git a/tools/docker/demo/init_notebook.py b/tools/docker/demo/init_notebook.py index 69bb9479c1..cf1baad861 100644 --- a/tools/docker/demo/init_notebook.py +++ b/tools/docker/demo/init_notebook.py @@ -2,8 +2,8 @@ import os spark = SparkSession.builder \ .master("local[*]") \ - .appName("MMLSpark Docker App") \ - .config("spark.jars.packages", "com.microsoft.ml.spark:mmlspark_2.12:" + os.environ["MMLSPARK_VERSION"]) \ + .appName("SynapseML Docker App") \ + .config("spark.jars.packages", "com.microsoft.azure:synapseml:" + os.environ["MMLSPARK_VERSION"]) \ .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ .getOrCreate() sc = spark.sparkContext diff --git a/tools/docker/developer/Dockerfile b/tools/docker/developer/Dockerfile index fc9e4947df..d3c95ca675 100644 --- a/tools/docker/developer/Dockerfile +++ b/tools/docker/developer/Dockerfile @@ -26,7 +26,7 @@ ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64 RUN mkdir -p /__w/1 WORKDIR /__w/1 -RUN git clone https://github.com/Azure/mmlspark.git s \ +RUN git clone https://github.com/Microsoft/SynapseML.git s \ && cd s \ && git checkout build-refactor WORKDIR /__w/1/s @@ -38,7 +38,7 @@ ADD . . ENV SBT_OPTS="${SBT_OPTS} -Dsbt.io.jdktimestamps=true" RUN sbt setup RUN conda init bash -RUN echo "source activate mmlspark" > ~/.bashrc +RUN echo "source activate synapseml" > ~/.bashrc # RUN /bin/bash -c "sbt testPython" diff --git a/tools/helm/zeppelin/Dockerfile b/tools/helm/zeppelin/Dockerfile index b84231656b..9eae3ca559 100644 --- a/tools/helm/zeppelin/Dockerfile +++ b/tools/helm/zeppelin/Dockerfile @@ -77,7 +77,7 @@ RUN echo "$LOG_TAG install nodejs" && \ echo "$LOG_TAG Download Zeppelin source" && \ git clone https://github.com/apache/zeppelin.git /zeppelin-${Z_VERSION}-bin-all && \ mv /zeppelin-${Z_VERSION}-bin-all ${Z_HOME}_src && \ - mkdir ${Z_HOME}/notebook/mmlspark -p && \ + mkdir ${Z_HOME}/notebook/synapseml -p && \ cd ${Z_HOME}_src && \ git checkout ${Z_COMMIT} && \ echo '{ "allow_root": true }' > /root/.bowerrc && \ @@ -121,7 +121,7 @@ RUN cd ${Z_HOME}_src && \ ADD jars /jars # add notebooks -ADD mmlsparkExamples/ ${Z_HOME}/notebook/mmlspark/ +ADD synapsemlExamples/ ${Z_HOME}/notebook/synapse/ml/ ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf ADD zeppelin-env.sh ${Z_HOME}/conf/ diff --git a/tools/helm/zeppelin/mini.Dockerfile b/tools/helm/zeppelin/mini.Dockerfile index 076a3e98c9..e88547ac20 100644 --- a/tools/helm/zeppelin/mini.Dockerfile +++ b/tools/helm/zeppelin/mini.Dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/mmlspark/spark2.4:v4_mini +FROM mcr.microsoft.com/synapse/ml/spark2.4:v4_mini MAINTAINER Dalitso Banda ADD patch_beam.patch /tmp/patch_beam.patch @@ -35,7 +35,7 @@ RUN echo "$LOG_TAG setting python dependencies" && \ export LD_LIBRARY_PATH=/lib:/usr/lib/:$LD_LIBRARY_PATH && \ git clone https://github.com/apache/zeppelin.git /zeppelin-${Z_VERSION}-bin-all && \ mv /zeppelin-${Z_VERSION}-bin-all ${Z_HOME}_src && \ - mkdir ${Z_HOME}/notebook/mmlspark -p && \ + mkdir ${Z_HOME}/notebook/synapseml -p && \ cd ${Z_HOME}_src && \ git checkout ${Z_COMMIT} && \ echo '{ "allow_root": true }' > /root/.bowerrc && \ @@ -112,7 +112,7 @@ RUN echo "$LOG_TAG setting python dependencies" && \ ADD jars /jars # add notebooks -ADD mmlsparkExamples/ ${Z_HOME}/notebook/mmlspark/ +ADD synapsemlExamples/ ${Z_HOME}/notebook/synapse/ml/ ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf ADD zeppelin-env.sh ${Z_HOME}/conf/ diff --git a/tools/helm/zeppelin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln b/tools/helm/zeppelin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln index 745db798d0..fb21dcaaaa 100644 --- a/tools/helm/zeppelin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln +++ b/tools/helm/zeppelin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln @@ -118,7 +118,7 @@ "code": "SUCCESS", "msg": [ { - "data": "Help on package mmlspark:\n\nNAME\n mmlspark\n\nFILE\n /zeppelin/local-repo/Azure/mmlspark/0.15/mmlspark-0.15.jar/mmlspark/__init__.py\n\nDESCRIPTION\n MicrosoftML is a library of Python classes to interface with the\n Microsoft scala APIs to utilize Apache Spark to create distibuted\n machine learning models.\n \n MicrosoftML simplifies training and scoring classifiers and\n regressors, as well as facilitating the creation of models using the\n CNTK library, images, and text.\n\nPACKAGE CONTENTS\n AnalyzeImage\n AssembleFeatures\n BinaryFileReader\n BingImageReader\n BingImageSearch\n CNTKLearner\n CNTKModel\n Cacher\n CheckpointData\n ClassBalancer\n CleanMissingData\n ComputeModelStatistics\n ComputePerInstanceStatistics\n CustomInputParser\n CustomOutputParser\n DataConversion\n DescribeImage\n DetectFace\n DropColumns\n DynamicMiniBatchTransformer\n EnsembleByKey\n EntityDetector\n Explode\n FastVectorAssembler\n Featurize\n FindBestModel\n FindSimilarFace\n FixedMiniBatchTransformer\n FlattenBatch\n FluentAPI\n GenerateThumbnails\n GroupFaces\n HTTPTransformer\n HyperparamBuilder\n IdentifyFaces\n ImageFeaturizer\n ImageLIME\n ImageReader\n ImageSetAugmenter\n ImageTransformer\n ImageWriter\n IndexToValue\n JSONInputParser\n JSONOutputParser\n KeyPhraseExtractor\n Lambda\n LanguageDetector\n LightGBMClassifier\n LightGBMRegressor\n ModelDownloader\n MultiColumnAdapter\n MultiNGram\n NER\n OCR\n PageSplitter\n PartitionConsolidator\n PartitionSample\n PowerBIWriter\n RankingAdapter\n RankingAdapterModel\n RankingEvaluator\n RecognizeDomainSpecificContent\n RecognizeText\n RenameColumn\n Repartition\n SelectColumns\n ServingFunctions\n ServingImplicits\n SimpleHTTPTransformer\n StringOutputParser\n SummarizeData\n SuperpixelTransformer\n TagImage\n TextFeaturizer\n TextPreprocessor\n TextSentiment\n TimeIntervalMiniBatchTransformer\n Timer\n TrainClassifier\n TrainRegressor\n TuneHyperparameters\n TypeConversionUtils\n UDFTransformer\n UnrollBinaryImage\n UnrollImage\n Utils\n ValueIndexer\n ValueIndexerModel\n VerifyFaces\n _BingImageSearch\n _CNTKLearner\n _CNTKModel\n _FindBestModel\n _ImageFeaturizer\n _ImageTransformer\n _JSONOutputParser\n _LightGBMClassifier\n _LightGBMRegressor\n _ResizeImageTransformer\n _SimpleHTTPTransformer\n _TrainClassifier\n _TrainRegressor\n _TuneHyperparameters\n _UDFTransformer\n java_params_patch\n plot\n\nDATA\n BinaryFileFields = ['path', 'bytes']\n BinaryFileSchema = StructType(List(StructField(path,StringType,true),S...\n DEFAULT_URL = 'https://mmlspark.azureedge.net/datasets/CNTKModels/'\n ImageFields = ['path', 'height', 'width', 'type', 'bytes']\n ImageSchema = StructType(List(StructField(path,StringType,true...erTyp...\n __loader__ =

\r\n\r\nIn this tutorial, we perform the same classification task in two different ways: once using plain **`pyspark`** and once using the **`mmlspark`** library. The two methods yield the same performance, but one of the two libraries is drastically simpler to use and iterate on (can you guess which one?).\r\n\r\nThe task is simple: Predict whether a user's review of a book sold on Amazon is good (rating > 3) or bad based on the text of the review. We accomplish this by training LogisticRegression learners with different hyperparameters and choosing the best model.","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560907558_-1510106009","id":"paragraph_1549560907558_-1510106009","dateCreated":"2019-02-07T17:35:52+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7270","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

103 - Simplifying Machine Learning Pipelines with mmlspark

\n

1. Introduction

\n


\n

In this tutorial, we perform the same classification task in two different ways: once using plain pyspark and once using the mmlspark library. The two methods yield the same performance, but one of the two libraries is drastically simpler to use and iterate on (can you guess which one?).

\n

The task is simple: Predict whether a user’s review of a book sold on Amazon is good (rating > 3) or bad based on the text of the review. We accomplish this by training LogisticRegression learners with different hyperparameters and choosing the best model.

\n
"}]},"runtimeInfos":{}},{"text":"%md\r\n### 2. Read the data\r\n\r\nWe download and read in the data. We show a sample below:","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560950666_-2091601662","id":"paragraph_1549560950666_-2091601662","dateCreated":"2019-02-07T17:36:11+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7361","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

2. Read the data

\n

We download and read in the data. We show a sample below:

\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\n# Zeppelin needs the path to be update manually to find mmlspark library\nimport sys\nsys.path.extend(sc.getConf().get(\"spark.jars\").split(\",\"))\n\nimport pandas as pd\nimport mmlspark\nfrom pyspark.sql.types import IntegerType, StringType, StructType, StructField\n\ndataFilePath = \"BookReviewsFromAmazon10K.tsv\"\ntextSchema = StructType([StructField(\"rating\", IntegerType(), False),\n StructField(\"text\", StringType(), False)])\nimport os, urllib\nif not os.path.isfile(dataFilePath):\n urllib.urlretrieve(\"https://mmlspark.azureedge.net/datasets/\" + dataFilePath, dataFilePath)\nrawData = spark.createDataFrame(pd.read_csv(dataFilePath, sep=\"\\t\", header=None), textSchema)\nrawData.show(5)\n","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560971147_-312816441","id":"paragraph_1549560971147_-312816441","dateCreated":"2019-02-07T17:36:26+0000","status":"READY","focus":true,"$$hashKey":"object:7460","runtimeInfos":{}},{"text":"%md\n### 3. Extract more features and process data\n\nReal data however is more complex than the above dataset. It is common for a dataset to have features of multiple types: text, numeric, categorical. To illustrate how difficult it is to work with these datasets, we add two numerical features to the dataset: the **word count** of the review and the **mean word length**.","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561045867_-2023351219","id":"paragraph_1549561045867_-2023351219","dateCreated":"2019-02-07T17:37:29+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7616","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

3. Extract more features and process data

\n

Real data however is more complex than the above dataset. It is common for a dataset to have features of multiple types: text, numeric, categorical. To illustrate how difficult it is to work with these datasets, we add two numerical features to the dataset: the word count of the review and the mean word length.

\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.sql.functions import udf\nfrom pyspark.sql.types import LongType, FloatType, DoubleType\ndef wordCount(s):\n return len(s.split())\ndef wordLength(s):\n import numpy as np\n ss = [len(w) for w in s.split()]\n return round(float(np.mean(ss)), 2)\nwordLengthUDF = udf(wordLength, DoubleType())\nwordCountUDF = udf(wordCount, IntegerType())","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560986476_-857211016","id":"paragraph_1549560986476_-857211016","dateCreated":"2019-02-07T17:37:10+0000","status":"READY","focus":true,"$$hashKey":"object:7544","runtimeInfos":{}},{"text":"%pyspark\nfrom mmlspark import UDFTransformer\nwordLength = \"wordLength\"\nwordCount = \"wordCount\"\nwordLengthTransformer = UDFTransformer(inputCol=\"text\", outputCol=wordLength, udf=wordLengthUDF)\nwordCountTransformer = UDFTransformer(inputCol=\"text\", outputCol=wordCount, udf=wordCountUDF)","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561076902_257402397","id":"paragraph_1549561076902_257402397","dateCreated":"2019-02-07T17:38:04+0000","status":"READY","focus":true,"$$hashKey":"object:7706","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml import Pipeline\ndata = Pipeline(stages=[wordLengthTransformer, wordCountTransformer]) \\\n .fit(rawData).transform(rawData) \\\n .withColumn(\"label\", rawData[\"rating\"] > 3).drop(\"rating\")","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561091918_-2108761101","id":"paragraph_1549561091918_-2108761101","dateCreated":"2019-02-07T17:38:16+0000","status":"READY","focus":true,"$$hashKey":"object:7778","runtimeInfos":{}},{"text":"%md\n### 4a. Classify using pyspark\n\nTo choose the best LogisticRegression classifier using the `pyspark` library, need to *explictly* perform the following steps:\n\n1. Process the features:\n * Tokenize the text column\n * Hash the tokenized column into a vector using hashing\n * Merge the numeric features with the vector in the step above\n2. Process the label column: cast it into the proper type.\n3. Train multiple LogisticRegression algorithms on the `train` dataset with different hyperparameters\n4. Compute the area under the ROC curve for each of the trained models and select the model with the highest metric as computed on the `test` dataset\n5. Evaluate the best model on the `validation` set\n\nAs you can see below, there is a lot of work involved and a lot of steps where something can go wrong!","user":"anonymous","config":{"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561105519_758143693","id":"paragraph_1549561105519_758143693","dateCreated":"2019-02-07T17:38:35+0000","status":"READY","focus":true,"$$hashKey":"object:7850","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml.feature import Tokenizer, HashingTF\nfrom pyspark.ml.feature import VectorAssembler\n\n# Featurize text column\ntokenizer = Tokenizer(inputCol=\"text\", outputCol=\"tokenizedText\")\nnumFeatures = 10000\nhashingScheme = HashingTF(inputCol=\"tokenizedText\",\n outputCol=\"TextFeatures\",\n numFeatures=numFeatures)\ntokenizedData = tokenizer.transform(data)\nfeaturizedData = hashingScheme.transform(tokenizedData)\n\n# Merge text and numeric features in one feature column\nfeatureColumnsArray = [\"TextFeatures\", \"wordCount\", \"wordLength\"]\nassembler = VectorAssembler(\n inputCols = featureColumnsArray,\n outputCol=\"features\")\nassembledData = assembler.transform(featurizedData)\n\n# Select only columns of interest\n# Convert rating column from boolean to int\nprocessedData = assembledData \\\n .select(\"label\", \"features\") \\\n .withColumn(\"label\", assembledData.label.cast(IntegerType()))\n","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561123327_369147431","id":"paragraph_1549561123327_369147431","dateCreated":"2019-02-07T17:38:50+0000","status":"READY","focus":true,"$$hashKey":"object:7922","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml.evaluation import BinaryClassificationEvaluator\nfrom pyspark.ml.classification import LogisticRegression\n\n# Prepare data for learning\ntrain, test, validation = processedData.randomSplit([0.60, 0.20, 0.20], seed=123)\n\n# Train the models on the 'train' data\nlrHyperParams = [0.05, 0.1, 0.2, 0.4]\nlogisticRegressions = [LogisticRegression(regParam = hyperParam)\n for hyperParam in lrHyperParams]\nevaluator = BinaryClassificationEvaluator(rawPredictionCol=\"rawPrediction\",\n metricName=\"areaUnderROC\")\nmetrics = []\nmodels = []\n\n# Select the best model\nfor learner in logisticRegressions:\n model = learner.fit(train)\n models.append(model)\n scoredData = model.transform(test)\n metrics.append(evaluator.evaluate(scoredData))\nbestMetric = max(metrics)\nbestModel = models[metrics.index(bestMetric)]\n\n# Save model\nbestModel.write().overwrite().save(\"SparkMLExperiment.mmls\")\n# Get AUC on the validation dataset\nscoredVal = bestModel.transform(validation)\nprint(evaluator.evaluate(scoredVal))","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561136914_-1460942872","id":"paragraph_1549561136914_-1460942872","dateCreated":"2019-02-07T17:39:01+0000","status":"READY","focus":true,"$$hashKey":"object:7994","runtimeInfos":{}},{"text":"%md\n### 4b. Classify using mmlspark\n\nLife is a lot simpler when using `mmlspark`!\n\n1. The **`TrainClassifier`** Estimator featurizes the data internally,\n as long as the columns selected in the `train`, `test`, `validation`\n dataset represent the features\n\n2. The **`FindBestModel`** Estimator find the best model from a pool of\n trained models by find the model which performs best on the `test`\n dataset given the specified metric\n\n3. The **`CompueModelStatistics`** Transformer computes the different\n metrics on a scored dataset (in our case, the `validation` dataset)\n at the same time","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561151031_-669643989","id":"paragraph_1549561151031_-669643989","dateCreated":"2019-02-07T17:39:16+0000","status":"FINISHED","focus":true,"$$hashKey":"object:8066","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

4b. Classify using mmlspark

\n

Life is a lot simpler when using mmlspark!

\n
    \n
  1. \n

    The TrainClassifier Estimator featurizes the data internally,
    as long as the columns selected in the train, test, validation
    dataset represent the features

  2. \n
  3. \n

    The FindBestModel Estimator find the best model from a pool of
    trained models by find the model which performs best on the test
    dataset given the specified metric

  4. \n
  5. \n

    The CompueModelStatistics Transformer computes the different
    metrics on a scored dataset (in our case, the validation dataset)
    at the same time

  6. \n
\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\nfrom mmlspark import TrainClassifier, FindBestModel, ComputeModelStatistics\n\n# Prepare data for learning\ntrain, test, validation = data.randomSplit([0.60, 0.20, 0.20], seed=123)\n\n# Train the models on the 'train' data\nlrHyperParams = [0.05, 0.1, 0.2, 0.4]\nlogisticRegressions = [LogisticRegression(regParam = hyperParam)\n for hyperParam in lrHyperParams]\nlrmodels = [TrainClassifier(model=lrm, labelCol=\"label\", numFeatures=10000).fit(train)\n for lrm in logisticRegressions]\n\n# Select the best model\nbestModel = FindBestModel(evaluationMetric=\"AUC\", models=lrmodels).fit(test)\n\n# Save model\nbestModel.write().overwrite().save(\"MMLSExperiment.mmls\")\n# Get AUC on the validation dataset\npredictions = bestModel.transform(validation)\nmetrics = ComputeModelStatistics().transform(predictions)\nprint(\"Best model's AUC on validation set = \"\n + \"{0:.2f}%\".format(metrics.first()[\"AUC\"] * 100))","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561156327_905971663","id":"paragraph_1549561156327_905971663","dateCreated":"2019-02-07T17:39:36+0000","status":"READY","focus":true,"$$hashKey":"object:8144","runtimeInfos":{}}],"name":"simplification_mmlspark","id":"2E3XBY5JN","defaultInterpreterGroup":"spark","noteParams":{},"noteForms":{},"angularObjects":{},"config":{"isZeppelinNotebookCronEnable":false,"looknfeel":"default","personalizedMode":"false"},"info":{}} \ No newline at end of file +{"paragraphs":[{"user":"anonymous","config":{"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/scala","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560881266_-1396707350","id":"paragraph_1549560881266_-1396707350","dateCreated":"2019-02-07T17:35:07+0000","status":"READY","focus":true,"$$hashKey":"object:7074","text":"%spark.dep\n// include the azure synapseml dependency\nz.reset()\nz.load(\"Azure:synapseml:0.15\")\nz.load(\"org.apache.hadoop:hadoop-azure:2.7.0\")\nz.load(\"com.microsoft.azure:azure-storage:8.0.0\")","runtimeInfos":{}},{"text":"%md\r\n## 103 - Simplifying Machine Learning Pipelines with `synapseml`\r\n\r\n### 1. Introduction\r\n\r\n


\r\n\r\nIn this tutorial, we perform the same classification task in two different ways: once using plain **`pyspark`** and once using the **`synapseml`** library. The two methods yield the same performance, but one of the two libraries is drastically simpler to use and iterate on (can you guess which one?).\r\n\r\nThe task is simple: Predict whether a user's review of a book sold on Amazon is good (rating > 3) or bad based on the text of the review. We accomplish this by training LogisticRegression learners with different hyperparameters and choosing the best model.","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560907558_-1510106009","id":"paragraph_1549560907558_-1510106009","dateCreated":"2019-02-07T17:35:52+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7270","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

103 - Simplifying Machine Learning Pipelines with synapseml

\n

1. Introduction

\n


\n

In this tutorial, we perform the same classification task in two different ways: once using plain pyspark and once using the synapseml library. The two methods yield the same performance, but one of the two libraries is drastically simpler to use and iterate on (can you guess which one?).

\n

The task is simple: Predict whether a user’s review of a book sold on Amazon is good (rating > 3) or bad based on the text of the review. We accomplish this by training LogisticRegression learners with different hyperparameters and choosing the best model.

\n
"}]},"runtimeInfos":{}},{"text":"%md\r\n### 2. Read the data\r\n\r\nWe download and read in the data. We show a sample below:","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560950666_-2091601662","id":"paragraph_1549560950666_-2091601662","dateCreated":"2019-02-07T17:36:11+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7361","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

2. Read the data

\n

We download and read in the data. We show a sample below:

\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\n# Zeppelin needs the path to be update manually to find synapseml library\nimport sys\nsys.path.extend(sc.getConf().get(\"spark.jars\").split(\",\"))\n\nimport pandas as pd\nimport synapse.ml\nfrom pyspark.sql.types import IntegerType, StringType, StructType, StructField\n\ndataFilePath = \"BookReviewsFromAmazon10K.tsv\"\ntextSchema = StructType([StructField(\"rating\", IntegerType(), False),\n StructField(\"text\", StringType(), False)])\nimport os, urllib\nif not os.path.isfile(dataFilePath):\n urllib.urlretrieve(\"https://synapseml.azureedge.net/datasets/\" + dataFilePath, dataFilePath)\nrawData = spark.createDataFrame(pd.read_csv(dataFilePath, sep=\"\\t\", header=None), textSchema)\nrawData.show(5)\n","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560971147_-312816441","id":"paragraph_1549560971147_-312816441","dateCreated":"2019-02-07T17:36:26+0000","status":"READY","focus":true,"$$hashKey":"object:7460","runtimeInfos":{}},{"text":"%md\n### 3. Extract more features and process data\n\nReal data however is more complex than the above dataset. It is common for a dataset to have features of multiple types: text, numeric, categorical. To illustrate how difficult it is to work with these datasets, we add two numerical features to the dataset: the **word count** of the review and the **mean word length**.","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561045867_-2023351219","id":"paragraph_1549561045867_-2023351219","dateCreated":"2019-02-07T17:37:29+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7616","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

3. Extract more features and process data

\n

Real data however is more complex than the above dataset. It is common for a dataset to have features of multiple types: text, numeric, categorical. To illustrate how difficult it is to work with these datasets, we add two numerical features to the dataset: the word count of the review and the mean word length.

\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.sql.functions import udf\nfrom pyspark.sql.types import LongType, FloatType, DoubleType\ndef wordCount(s):\n return len(s.split())\ndef wordLength(s):\n import numpy as np\n ss = [len(w) for w in s.split()]\n return round(float(np.mean(ss)), 2)\nwordLengthUDF = udf(wordLength, DoubleType())\nwordCountUDF = udf(wordCount, IntegerType())","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560986476_-857211016","id":"paragraph_1549560986476_-857211016","dateCreated":"2019-02-07T17:37:10+0000","status":"READY","focus":true,"$$hashKey":"object:7544","runtimeInfos":{}},{"text":"%pyspark\nfrom synapse.ml import UDFTransformer\nwordLength = \"wordLength\"\nwordCount = \"wordCount\"\nwordLengthTransformer = UDFTransformer(inputCol=\"text\", outputCol=wordLength, udf=wordLengthUDF)\nwordCountTransformer = UDFTransformer(inputCol=\"text\", outputCol=wordCount, udf=wordCountUDF)","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561076902_257402397","id":"paragraph_1549561076902_257402397","dateCreated":"2019-02-07T17:38:04+0000","status":"READY","focus":true,"$$hashKey":"object:7706","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml import Pipeline\ndata = Pipeline(stages=[wordLengthTransformer, wordCountTransformer]) \\\n .fit(rawData).transform(rawData) \\\n .withColumn(\"label\", rawData[\"rating\"] > 3).drop(\"rating\")","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561091918_-2108761101","id":"paragraph_1549561091918_-2108761101","dateCreated":"2019-02-07T17:38:16+0000","status":"READY","focus":true,"$$hashKey":"object:7778","runtimeInfos":{}},{"text":"%md\n### 4a. Classify using pyspark\n\nTo choose the best LogisticRegression classifier using the `pyspark` library, need to *explictly* perform the following steps:\n\n1. Process the features:\n * Tokenize the text column\n * Hash the tokenized column into a vector using hashing\n * Merge the numeric features with the vector in the step above\n2. Process the label column: cast it into the proper type.\n3. Train multiple LogisticRegression algorithms on the `train` dataset with different hyperparameters\n4. Compute the area under the ROC curve for each of the trained models and select the model with the highest metric as computed on the `test` dataset\n5. Evaluate the best model on the `validation` set\n\nAs you can see below, there is a lot of work involved and a lot of steps where something can go wrong!","user":"anonymous","config":{"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561105519_758143693","id":"paragraph_1549561105519_758143693","dateCreated":"2019-02-07T17:38:35+0000","status":"READY","focus":true,"$$hashKey":"object:7850","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml.feature import Tokenizer, HashingTF\nfrom pyspark.ml.feature import VectorAssembler\n\n# Featurize text column\ntokenizer = Tokenizer(inputCol=\"text\", outputCol=\"tokenizedText\")\nnumFeatures = 10000\nhashingScheme = HashingTF(inputCol=\"tokenizedText\",\n outputCol=\"TextFeatures\",\n numFeatures=numFeatures)\ntokenizedData = tokenizer.transform(data)\nfeaturizedData = hashingScheme.transform(tokenizedData)\n\n# Merge text and numeric features in one feature column\nfeatureColumnsArray = [\"TextFeatures\", \"wordCount\", \"wordLength\"]\nassembler = VectorAssembler(\n inputCols = featureColumnsArray,\n outputCol=\"features\")\nassembledData = assembler.transform(featurizedData)\n\n# Select only columns of interest\n# Convert rating column from boolean to int\nprocessedData = assembledData \\\n .select(\"label\", \"features\") \\\n .withColumn(\"label\", assembledData.label.cast(IntegerType()))\n","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561123327_369147431","id":"paragraph_1549561123327_369147431","dateCreated":"2019-02-07T17:38:50+0000","status":"READY","focus":true,"$$hashKey":"object:7922","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml.evaluation import BinaryClassificationEvaluator\nfrom pyspark.ml.classification import LogisticRegression\n\n# Prepare data for learning\ntrain, test, validation = processedData.randomSplit([0.60, 0.20, 0.20], seed=123)\n\n# Train the models on the 'train' data\nlrHyperParams = [0.05, 0.1, 0.2, 0.4]\nlogisticRegressions = [LogisticRegression(regParam = hyperParam)\n for hyperParam in lrHyperParams]\nevaluator = BinaryClassificationEvaluator(rawPredictionCol=\"rawPrediction\",\n metricName=\"areaUnderROC\")\nmetrics = []\nmodels = []\n\n# Select the best model\nfor learner in logisticRegressions:\n model = learner.fit(train)\n models.append(model)\n scoredData = model.transform(test)\n metrics.append(evaluator.evaluate(scoredData))\nbestMetric = max(metrics)\nbestModel = models[metrics.index(bestMetric)]\n\n# Save model\nbestModel.write().overwrite().save(\"SparkMLExperiment.mmls\")\n# Get AUC on the validation dataset\nscoredVal = bestModel.transform(validation)\nprint(evaluator.evaluate(scoredVal))","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561136914_-1460942872","id":"paragraph_1549561136914_-1460942872","dateCreated":"2019-02-07T17:39:01+0000","status":"READY","focus":true,"$$hashKey":"object:7994","runtimeInfos":{}},{"text":"%md\n### 4b. Classify using synapseml\n\nLife is a lot simpler when using `synapseml`!\n\n1. The **`TrainClassifier`** Estimator featurizes the data internally,\n as long as the columns selected in the `train`, `test`, `validation`\n dataset represent the features\n\n2. The **`FindBestModel`** Estimator find the best model from a pool of\n trained models by find the model which performs best on the `test`\n dataset given the specified metric\n\n3. The **`CompueModelStatistics`** Transformer computes the different\n metrics on a scored dataset (in our case, the `validation` dataset)\n at the same time","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561151031_-669643989","id":"paragraph_1549561151031_-669643989","dateCreated":"2019-02-07T17:39:16+0000","status":"FINISHED","focus":true,"$$hashKey":"object:8066","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

4b. Classify using synapseml

\n

Life is a lot simpler when using synapseml!

\n
    \n
  1. \n

    The TrainClassifier Estimator featurizes the data internally,
    as long as the columns selected in the train, test, validation
    dataset represent the features

  2. \n
  3. \n

    The FindBestModel Estimator find the best model from a pool of
    trained models by find the model which performs best on the test
    dataset given the specified metric

  4. \n
  5. \n

    The CompueModelStatistics Transformer computes the different
    metrics on a scored dataset (in our case, the validation dataset)
    at the same time

  6. \n
\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\nfrom synapse.ml import TrainClassifier, FindBestModel, ComputeModelStatistics\n\n# Prepare data for learning\ntrain, test, validation = data.randomSplit([0.60, 0.20, 0.20], seed=123)\n\n# Train the models on the 'train' data\nlrHyperParams = [0.05, 0.1, 0.2, 0.4]\nlogisticRegressions = [LogisticRegression(regParam = hyperParam)\n for hyperParam in lrHyperParams]\nlrmodels = [TrainClassifier(model=lrm, labelCol=\"label\", numFeatures=10000).fit(train)\n for lrm in logisticRegressions]\n\n# Select the best model\nbestModel = FindBestModel(evaluationMetric=\"AUC\", models=lrmodels).fit(test)\n\n# Save model\nbestModel.write().overwrite().save(\"MMLSExperiment.mmls\")\n# Get AUC on the validation dataset\npredictions = bestModel.transform(validation)\nmetrics = ComputeModelStatistics().transform(predictions)\nprint(\"Best model's AUC on validation set = \"\n + \"{0:.2f}%\".format(metrics.first()[\"AUC\"] * 100))","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561156327_905971663","id":"paragraph_1549561156327_905971663","dateCreated":"2019-02-07T17:39:36+0000","status":"READY","focus":true,"$$hashKey":"object:8144","runtimeInfos":{}}],"name":"simplification_synapseml","id":"2E3XBY5JN","defaultInterpreterGroup":"spark","noteParams":{},"noteForms":{},"angularObjects":{},"config":{"isZeppelinNotebookCronEnable":false,"looknfeel":"default","personalizedMode":"false"},"info":{}} \ No newline at end of file diff --git a/tools/helm/zeppelin/mmlsparkExamples/sparkserving_2DZFNGU8A.zpln b/tools/helm/zeppelin/mmlsparkExamples/sparkserving_2DZFNGU8A.zpln index 2f72e82c84..2bddb61bef 100644 --- a/tools/helm/zeppelin/mmlsparkExamples/sparkserving_2DZFNGU8A.zpln +++ b/tools/helm/zeppelin/mmlsparkExamples/sparkserving_2DZFNGU8A.zpln @@ -28,7 +28,7 @@ "status": "FINISHED", "focus": true, "$$hashKey": "object:883", - "text": "%spark.dep\n// include the azure mmlspark dependency\nz.reset()\nz.load(\"Azure:mmlspark:0.15\")", + "text": "%spark.dep\n// include the azure synapseml dependency\nz.reset()\nz.load(\"Azure:synapseml:0.15\")", "results": { "code": "SUCCESS", "msg": [ @@ -41,7 +41,7 @@ "runtimeInfos": {} }, { - "text": "%spark.pyspark\r\n\r\n# Zeppelin needs the path to be update manually to find mmlspark library\r\nimport sys\r\nsys.path.extend(sc.getConf().get(\"spark.jars\").split(\",\"))\r\n\r\n\r\nimport mmlspark\r\nfrom pyspark.sql.types import *\r\nfrom pyspark.sql import SparkSession\r\n\r\nfrom pyspark.sql.functions import length, col\r\n\r\nspark = SparkSession.builder.appName(\"SimpleContServing\").getOrCreate()\r\nsc = spark.sparkContext\r\nsc.setLogLevel(\"WARN\")\r\n\r\nprint(\"creating df\")\r\ndf = spark.readStream.continuousServer() \\\r\n .address(\"0.0.0.0\", 8888, \"my_api\") \\\r\n .load() \\\r\n .parseRequest(StructType().add(\"foo\", StringType()).add(\"bar\", IntegerType()))\r\n\r\nreplies = df.withColumn(\"fooLength\", length(col(\"foo\")))\\\r\n .makeReply(\"fooLength\")\r\n\r\nprint(\"creating server\")\r\nserver = replies\\\r\n .writeStream \\\r\n .continuousServer() \\\r\n .trigger(continuous=\"1 second\") \\\r\n .replyTo(\"my_api\") \\\r\n .queryName(\"my_query\") \\\r\n .option(\"checkpointLocation\", \"file:///tmp/checkpoints\")\r\n\r\nprint(\"starting server\")\r\nquery = server.start()\r\nprint(\"server running\")\r\nquery.awaitTermination()\r\n\r\n# Test \r\n# curl -X POST -d '{\"foo\":\"foolen\", \"bar\":43}' -H \"ContentType: application/json\" http://[[ip address of load balancer]]:8888/", + "text": "%spark.pyspark\r\n\r\n# Zeppelin needs the path to be update manually to find synapseml library\r\nimport sys\r\nsys.path.extend(sc.getConf().get(\"spark.jars\").split(\",\"))\r\n\r\n\r\nimport synapse.ml\r\nfrom pyspark.sql.types import *\r\nfrom pyspark.sql import SparkSession\r\n\r\nfrom pyspark.sql.functions import length, col\r\n\r\nspark = SparkSession.builder.appName(\"SimpleContServing\").getOrCreate()\r\nsc = spark.sparkContext\r\nsc.setLogLevel(\"WARN\")\r\n\r\nprint(\"creating df\")\r\ndf = spark.readStream.continuousServer() \\\r\n .address(\"0.0.0.0\", 8888, \"my_api\") \\\r\n .load() \\\r\n .parseRequest(StructType().add(\"foo\", StringType()).add(\"bar\", IntegerType()))\r\n\r\nreplies = df.withColumn(\"fooLength\", length(col(\"foo\")))\\\r\n .makeReply(\"fooLength\")\r\n\r\nprint(\"creating server\")\r\nserver = replies\\\r\n .writeStream \\\r\n .continuousServer() \\\r\n .trigger(continuous=\"1 second\") \\\r\n .replyTo(\"my_api\") \\\r\n .queryName(\"my_query\") \\\r\n .option(\"checkpointLocation\", \"file:///tmp/checkpoints\")\r\n\r\nprint(\"starting server\")\r\nquery = server.start()\r\nprint(\"server running\")\r\nquery.awaitTermination()\r\n\r\n# Test \r\n# curl -X POST -d '{\"foo\":\"foolen\", \"bar\":43}' -H \"ContentType: application/json\" http://[[ip address of load balancer]]:8888/", "user": "anonymous", "config": { "tableHide": false, diff --git a/tools/helm/zeppelin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln b/tools/helm/zeppelin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln index 5a121655a5..20cc6987ab 100644 --- a/tools/helm/zeppelin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln +++ b/tools/helm/zeppelin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln @@ -1,7 +1,7 @@ { "paragraphs": [ { - "text": "%md\nContents of /zeppelin/notebook/mmlspark/serving.py\n```\nimport mmlspark\nfrom pyspark.sql.types import *\nfrom pyspark.sql import SparkSession\n\nfrom pyspark.sql.functions import length, col\n\nspark = SparkSession.builder.appName(\"SimpleContServing\").getOrCreate()\nsc = spark.sparkContext\nsc.setLogLevel(\"WARN\")\n\nprint(\"creating df\")\ndf = spark.readStream.continuousServer() \\\n .address(\"0.0.0.0\", 8888, \"my_api\") \\\n .load() \\\n .parseRequest(StructType().add(\"foo\", StringType()).add(\"bar\", IntegerType()))\n\nreplies = df.withColumn(\"fooLength\", length(col(\"foo\")))\\\n .makeReply(\"fooLength\")\n\nprint(\"creating server\")\nserver = replies\\\n .writeStream \\\n .continuousServer() \\\n .trigger(continuous=\"1 second\") \\\n .replyTo(\"my_api\") \\\n .queryName(\"my_query\") \\\n .option(\"checkpointLocation\", \"file:///tmp/checkpoints\")\n\nprint(\"starting server\")\nquery = server.start()\nquery.awaitTermination()\n\n# Submit the server\n# .\\bin\\spark-submit --packages com.microsoft.ml.spark:mmlspark_2.11:0.14.dev42 --repositories https://mmlspark.azureedge.net/maven serving2.py\n\n# Test \n# curl -X POST -d '{\"foo\":\"foolen\", \"bar\":43}' -H \"ContentType: application/json\" http://[[ip address of load balancer]]:8888/\n```", + "text": "%md\nContents of /zeppelin/notebook/mmlspark/serving.py\n```\nimport synapse.ml\nfrom pyspark.sql.types import *\nfrom pyspark.sql import SparkSession\n\nfrom pyspark.sql.functions import length, col\n\nspark = SparkSession.builder.appName(\"SimpleContServing\").getOrCreate()\nsc = spark.sparkContext\nsc.setLogLevel(\"WARN\")\n\nprint(\"creating df\")\ndf = spark.readStream.continuousServer() \\\n .address(\"0.0.0.0\", 8888, \"my_api\") \\\n .load() \\\n .parseRequest(StructType().add(\"foo\", StringType()).add(\"bar\", IntegerType()))\n\nreplies = df.withColumn(\"fooLength\", length(col(\"foo\")))\\\n .makeReply(\"fooLength\")\n\nprint(\"creating server\")\nserver = replies\\\n .writeStream \\\n .continuousServer() \\\n .trigger(continuous=\"1 second\") \\\n .replyTo(\"my_api\") \\\n .queryName(\"my_query\") \\\n .option(\"checkpointLocation\", \"file:///tmp/checkpoints\")\n\nprint(\"starting server\")\nquery = server.start()\nquery.awaitTermination()\n\n# Submit the server\n# .\\bin\\spark-submit --packages com.microsoft.azure:mmlspark_2.11:0.14.dev42 --repositories https://mmlspark.azureedge.net/maven serving2.py\n\n# Test \n# curl -X POST -d '{\"foo\":\"foolen\", \"bar\":43}' -H \"ContentType: application/json\" http://[[ip address of load balancer]]:8888/\n```", "user": "anonymous", "config": { "tableHide": false, @@ -26,7 +26,7 @@ "msg": [ { "type": "HTML", - "data": "
\n

Contents of /zeppelin/notebook/mmlspark/serving.py

\n
import mmlspark\nfrom pyspark.sql.types import *\nfrom pyspark.sql import SparkSession\n\nfrom pyspark.sql.functions import length, col\n\nspark = SparkSession.builder.appName("SimpleContServing").getOrCreate()\nsc = spark.sparkContext\nsc.setLogLevel("WARN")\n\nprint("creating df")\ndf = spark.readStream.continuousServer() \\\n    .address("0.0.0.0", 8888, "my_api") \\\n    .load() \\\n    .parseRequest(StructType().add("foo", StringType()).add("bar", IntegerType()))\n\nreplies = df.withColumn("fooLength", length(col("foo")))\\\n    .makeReply("fooLength")\n\nprint("creating server")\nserver = replies\\\n    .writeStream \\\n    .continuousServer() \\\n    .trigger(continuous="1 second") \\\n    .replyTo("my_api") \\\n    .queryName("my_query") \\\n    .option("checkpointLocation", "file:///tmp/checkpoints")\n\nprint("starting server")\nquery = server.start()\nquery.awaitTermination()\n\n# Submit the server\n# .\\bin\\spark-submit --packages com.microsoft.ml.spark:mmlspark_2.11:0.14.dev42 --repositories https://mmlspark.azureedge.net/maven  serving2.py\n\n# Test \n# curl -X POST -d '{"foo":"foolen", "bar":43}' -H "ContentType: application/json" http://[[ip address of load balancer]]:8888/\n
\n
" + "data": "
\n

Contents of /zeppelin/notebook/mmlspark/serving.py

\n
import synapse.ml\nfrom pyspark.sql.types import *\nfrom pyspark.sql import SparkSession\n\nfrom pyspark.sql.functions import length, col\n\nspark = SparkSession.builder.appName("SimpleContServing").getOrCreate()\nsc = spark.sparkContext\nsc.setLogLevel("WARN")\n\nprint("creating df")\ndf = spark.readStream.continuousServer() \\\n    .address("0.0.0.0", 8888, "my_api") \\\n    .load() \\\n    .parseRequest(StructType().add("foo", StringType()).add("bar", IntegerType()))\n\nreplies = df.withColumn("fooLength", length(col("foo")))\\\n    .makeReply("fooLength")\n\nprint("creating server")\nserver = replies\\\n    .writeStream \\\n    .continuousServer() \\\n    .trigger(continuous="1 second") \\\n    .replyTo("my_api") \\\n    .queryName("my_query") \\\n    .option("checkpointLocation", "file:///tmp/checkpoints")\n\nprint("starting server")\nquery = server.start()\nquery.awaitTermination()\n\n# Submit the server\n# .\\bin\\spark-submit --packages com.microsoft.azure:mmlspark_2.11:0.14.dev42 --repositories https://mmlspark.azureedge.net/maven  serving2.py\n\n# Test \n# curl -X POST -d '{"foo":"foolen", "bar":43}' -H "ContentType: application/json" http://[[ip address of load balancer]]:8888/\n
\n
" } ] }, diff --git a/tools/helm/zeppelin/zeppelin-env.sh b/tools/helm/zeppelin/zeppelin-env.sh index 63f4a928a5..50b94fb513 100644 --- a/tools/helm/zeppelin/zeppelin-env.sh +++ b/tools/helm/zeppelin/zeppelin-env.sh @@ -71,7 +71,7 @@ export MASTER="${SPARK_MASTER:=local[*]}" ## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit ## export SPARK_HOME=/opt/spark/ # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries -# export SPARK_SUBMIT_OPTIONS="--packages com.microsoft.ml.spark:mmlspark_2.11:0.14.dev42 --repositories https://mmlspark.azureedge.net/maven" # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G". +# export SPARK_SUBMIT_OPTIONS="--packages com.microsoft.azure:synapseml_2.11:0.14.dev42 --repositories https://synapseml.azureedge.net/maven" # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G". # export SPARK_APP_NAME # (optional) The name of spark application. ## Use embedded spark binaries ## diff --git a/tools/misc/get-stats b/tools/misc/get-stats index ffd934cb54..e893104112 100755 --- a/tools/misc/get-stats +++ b/tools/misc/get-stats @@ -5,7 +5,7 @@ . "$(dirname "${BASH_SOURCE[0]}")/../../runme" DAPI="https://hub.docker.com/v2/repositories/microsoft/mmlspark" -GAPI="https://api.github.com/repos/Azure/mmlspark" +GAPI="https://api.github.com/repos/Microsoft/SynapseML" jget() { local url="$1"; shift; curl -s "$url" | jq -r "$@"; } jget_paged() { diff --git a/tools/pytest/run_all_tests.py b/tools/pytest/run_all_tests.py index fc4a76b84b..6d38d85d35 100644 --- a/tools/pytest/run_all_tests.py +++ b/tools/pytest/run_all_tests.py @@ -1,7 +1,7 @@ import unittest import xmlrunner -all_test_cases = unittest.defaultTestLoader.discover('target/scala-2.11/generated/test/python/mmlspark','*.py') +all_test_cases = unittest.defaultTestLoader.discover('target/scala-2.11/generated/test/python/synapseml','*.py') test_runner=xmlrunner.XMLTestRunner(output="target/scala-2.11/generated/test_results/python") # Loop the found test cases and add them into test suite. diff --git a/tools/vagrant/Vagrantfile b/tools/vagrant/Vagrantfile index 0588ff41d4..dc576a5535 100644 --- a/tools/vagrant/Vagrantfile +++ b/tools/vagrant/Vagrantfile @@ -106,10 +106,10 @@ config.vm.provision "shell", inline: <<-SHELL SHELL config.vm.provision "shell", privileged: false, inline: <<-SHELL - # Install mmlspark + # Install synapseml cd ${HOME_DIR} - git clone https://github.com/Azure/mmlspark.git - cd mmlspark + git clone https://github.com/Microsoft/SynapseML.git + cd synapseml sbt setup SHELL diff --git a/vw/src/main/python/mmlspark/vw/VowpalWabbitClassificationModel.py b/vw/src/main/python/synapse/ml/vw/VowpalWabbitClassificationModel.py similarity index 91% rename from vw/src/main/python/mmlspark/vw/VowpalWabbitClassificationModel.py rename to vw/src/main/python/synapse/ml/vw/VowpalWabbitClassificationModel.py index 0aef477fbd..709677e035 100644 --- a/vw/src/main/python/mmlspark/vw/VowpalWabbitClassificationModel.py +++ b/vw/src/main/python/synapse/ml/vw/VowpalWabbitClassificationModel.py @@ -1,7 +1,7 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See LICENSE in project root for information. -from mmlspark.vw._VowpalWabbitClassificationModel import _VowpalWabbitClassificationModel +from synapse.ml.vw._VowpalWabbitClassificationModel import _VowpalWabbitClassificationModel from pyspark.ml.common import inherit_doc from pyspark import SparkContext, SQLContext from pyspark.sql import DataFrame diff --git a/vw/src/main/python/mmlspark/vw/VowpalWabbitClassifier.py b/vw/src/main/python/synapse/ml/vw/VowpalWabbitClassifier.py similarity index 85% rename from vw/src/main/python/mmlspark/vw/VowpalWabbitClassifier.py rename to vw/src/main/python/synapse/ml/vw/VowpalWabbitClassifier.py index ac33082148..9e5ac24cb2 100644 --- a/vw/src/main/python/mmlspark/vw/VowpalWabbitClassifier.py +++ b/vw/src/main/python/synapse/ml/vw/VowpalWabbitClassifier.py @@ -1,7 +1,7 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See LICENSE in project root for information. -from mmlspark.vw._VowpalWabbitClassifier import _VowpalWabbitClassifier +from synapse.ml.vw._VowpalWabbitClassifier import _VowpalWabbitClassifier from pyspark.ml.common import inherit_doc @inherit_doc diff --git a/vw/src/main/python/mmlspark/vw/VowpalWabbitContextualBandit.py b/vw/src/main/python/synapse/ml/vw/VowpalWabbitContextualBandit.py similarity index 93% rename from vw/src/main/python/mmlspark/vw/VowpalWabbitContextualBandit.py rename to vw/src/main/python/synapse/ml/vw/VowpalWabbitContextualBandit.py index 7fd6f83710..3684656691 100644 --- a/vw/src/main/python/mmlspark/vw/VowpalWabbitContextualBandit.py +++ b/vw/src/main/python/synapse/ml/vw/VowpalWabbitContextualBandit.py @@ -1,7 +1,7 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See LICENSE in project root for information. -from mmlspark.vw._VowpalWabbitContextualBandit import _VowpalWabbitContextualBandit +from synapse.ml.vw._VowpalWabbitContextualBandit import _VowpalWabbitContextualBandit from pyspark.ml.common import inherit_doc from pyspark import SparkContext from pyspark.ml.wrapper import JavaWrapper diff --git a/vw/src/main/python/mmlspark/vw/VowpalWabbitContextualBanditModel.py b/vw/src/main/python/synapse/ml/vw/VowpalWabbitContextualBanditModel.py similarity index 90% rename from vw/src/main/python/mmlspark/vw/VowpalWabbitContextualBanditModel.py rename to vw/src/main/python/synapse/ml/vw/VowpalWabbitContextualBanditModel.py index ae74383d8e..d46158e7d4 100644 --- a/vw/src/main/python/mmlspark/vw/VowpalWabbitContextualBanditModel.py +++ b/vw/src/main/python/synapse/ml/vw/VowpalWabbitContextualBanditModel.py @@ -1,7 +1,7 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See LICENSE in project root for information. -from mmlspark.vw._VowpalWabbitContextualBanditModel import _VowpalWabbitContextualBanditModel +from synapse.ml.vw._VowpalWabbitContextualBanditModel import _VowpalWabbitContextualBanditModel from pyspark.ml.common import inherit_doc from pyspark import SparkContext, SQLContext from pyspark.sql import DataFrame diff --git a/vw/src/main/python/mmlspark/vw/VowpalWabbitRegressionModel.py b/vw/src/main/python/synapse/ml/vw/VowpalWabbitRegressionModel.py similarity index 91% rename from vw/src/main/python/mmlspark/vw/VowpalWabbitRegressionModel.py rename to vw/src/main/python/synapse/ml/vw/VowpalWabbitRegressionModel.py index 4ed9fb6868..16a3b23628 100644 --- a/vw/src/main/python/mmlspark/vw/VowpalWabbitRegressionModel.py +++ b/vw/src/main/python/synapse/ml/vw/VowpalWabbitRegressionModel.py @@ -1,7 +1,7 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See LICENSE in project root for information. -from mmlspark.vw._VowpalWabbitRegressionModel import _VowpalWabbitRegressionModel +from synapse.ml.vw._VowpalWabbitRegressionModel import _VowpalWabbitRegressionModel from pyspark.ml.common import inherit_doc from pyspark import SparkContext, SQLContext from pyspark.sql import DataFrame diff --git a/vw/src/main/python/mmlspark/vw/VowpalWabbitRegressor.py b/vw/src/main/python/synapse/ml/vw/VowpalWabbitRegressor.py similarity index 86% rename from vw/src/main/python/mmlspark/vw/VowpalWabbitRegressor.py rename to vw/src/main/python/synapse/ml/vw/VowpalWabbitRegressor.py index f4584fad2c..862ca2366a 100644 --- a/vw/src/main/python/mmlspark/vw/VowpalWabbitRegressor.py +++ b/vw/src/main/python/synapse/ml/vw/VowpalWabbitRegressor.py @@ -1,7 +1,7 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See LICENSE in project root for information. -from mmlspark.vw._VowpalWabbitRegressor import _VowpalWabbitRegressor +from synapse.ml.vw._VowpalWabbitRegressor import _VowpalWabbitRegressor from pyspark.ml.common import inherit_doc diff --git a/vw/src/main/python/mmlspark/vw/__init__.py b/vw/src/main/python/synapse/ml/vw/__init__.py similarity index 100% rename from vw/src/main/python/mmlspark/vw/__init__.py rename to vw/src/main/python/synapse/ml/vw/__init__.py diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/HasNumBits.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/HasNumBits.scala similarity index 95% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/HasNumBits.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/HasNumBits.scala index a06db395b7..72d0fe2976 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/HasNumBits.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/HasNumBits.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw import org.apache.spark.ml.param.{IntParam, Params} diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/HasSumcollisions.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/HasSumcollisions.scala similarity index 93% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/HasSumcollisions.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/HasSumcollisions.scala index ce5a488f4e..e3b558c34f 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/HasSumcollisions.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/HasSumcollisions.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw import org.apache.spark.ml.param.{BooleanParam, Params} diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/VectorUtils.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VectorUtils.scala similarity index 97% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/VectorUtils.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VectorUtils.scala index c2540bd670..84356e7e5a 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/VectorUtils.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VectorUtils.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw object VectorUtils { /** diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/VectorZipper.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VectorZipper.scala similarity index 85% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/VectorZipper.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VectorZipper.scala index d0e04c5fde..dddc82df6d 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/VectorZipper.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VectorZipper.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCols, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCols, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitBase.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitBase.scala similarity index 98% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitBase.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitBase.scala index faad604349..e6b53b4bab 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitBase.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitBase.scala @@ -1,15 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import java.util.UUID - -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.HasWeightCol -import com.microsoft.ml.spark.core.env.StreamUtilities -import com.microsoft.ml.spark.core.utils.{ClusterUtil, StopWatch} -import com.microsoft.ml.spark.core.utils.FaultToleranceUtils +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.HasWeightCol +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities +import com.microsoft.azure.synapse.ml.core.utils.{ClusterUtil, FaultToleranceUtils, StopWatch} import org.apache.spark.TaskContext import org.apache.spark.internal._ import org.apache.spark.ml.param._ @@ -18,6 +15,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Dataset, Encoders, Row} import org.vowpalwabbit.spark._ +import java.util.UUID import scala.math.min import scala.util.{Failure, Success} diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitBaseModel.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitBaseModel.scala similarity index 94% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitBaseModel.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitBaseModel.scala index d020882991..4aaa12ef48 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitBaseModel.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitBaseModel.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.core.env.StreamUtilities -import com.microsoft.ml.spark.core.utils.FaultToleranceUtils -import org.apache.spark.binary.BinaryFileFormat +import com.microsoft.azure.synapse.ml.core.env.StreamUtilities +import com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils +import com.microsoft.azure.synapse.ml.io.binary.BinaryFileFormat import org.apache.spark.ml.param.{ByteArrayParam, DataFrameParam, Param} import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} import org.apache.spark.sql.functions.{col, struct, udf} diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitClassifier.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitClassifier.scala similarity index 95% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitClassifier.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitClassifier.scala index fde711bc3a..25da5d6edd 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitClassifier.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitClassifier.scala @@ -1,11 +1,11 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.schema.DatasetExtensions._ -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.schema.DatasetExtensions._ +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.classification.{ProbabilisticClassificationModel, ProbabilisticClassifier} import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.ml.param._ diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitContextualBandit.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitContextualBandit.scala similarity index 98% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitContextualBandit.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitContextualBandit.scala index e74214aacd..7672199ab2 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitContextualBandit.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitContextualBandit.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.io.http.SharedVariable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.io.http.SharedVariable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.ParamInjections.HasParallelismInjected import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import org.apache.spark.ml.param._ diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitFeaturizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitFeaturizer.scala similarity index 97% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitFeaturizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitFeaturizer.scala index 94a9debd62..c295beba7e 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitFeaturizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitFeaturizer.scala @@ -1,21 +1,21 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCols, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging -import com.microsoft.ml.spark.vw.featurizer._ -import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCols, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import com.microsoft.azure.synapse.ml.vw.featurizer._ +import org.apache.spark.ml.linalg.SQLDataTypes.VectorType +import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.param.{BooleanParam, IntParam, ParamMap, StringArrayParam} +import org.apache.spark.ml.util.Identifiable +import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} +import org.apache.spark.sql.functions.{col, struct, udf} import org.apache.spark.sql.types.{StringType, _} import org.apache.spark.sql.{DataFrame, Dataset, Row} -import org.apache.spark.sql.functions.{col, struct, udf} import org.vowpalwabbit.spark.VowpalWabbitMurmur -import org.apache.spark.ml.linalg.Vectors -import org.apache.spark.ml.util.Identifiable -import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import scala.collection.mutable diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitInteractions.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitInteractions.scala similarity index 93% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitInteractions.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitInteractions.scala index 14c748c87a..1c1495e071 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitInteractions.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitInteractions.scala @@ -1,19 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.core.contracts.{HasInputCols, HasOutputCol} -import com.microsoft.ml.spark.logging.BasicLogging -import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} -import org.apache.spark.ml.param.ParamMap -import org.apache.spark.sql.{DataFrame, Dataset, Row} -import org.apache.spark.sql.functions.{col, struct, udf} +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.core.contracts.{HasInputCols, HasOutputCol} +import com.microsoft.azure.synapse.ml.logging.BasicLogging +import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import org.apache.spark.ml.linalg.{Vector, Vectors} +import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.util.Identifiable +import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer} +import org.apache.spark.sql.functions.{col, struct, udf} import org.apache.spark.sql.types.{StructField, StructType} -import org.apache.spark.ml.linalg.SQLDataTypes.VectorType +import org.apache.spark.sql.{DataFrame, Dataset, Row} object VowpalWabbitInteractions extends ComplexParamsReadable[VowpalWabbitInteractions] diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitMurmurWithPrefix.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitMurmurWithPrefix.scala similarity index 98% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitMurmurWithPrefix.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitMurmurWithPrefix.scala index 427dbce8a9..3a7a9d1a16 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitMurmurWithPrefix.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitMurmurWithPrefix.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw import org.vowpalwabbit.spark.VowpalWabbitMurmur import java.nio.charset.StandardCharsets diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitRegressor.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitRegressor.scala similarity index 93% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitRegressor.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitRegressor.scala index b4e4617b97..35c2241b34 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitRegressor.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitRegressor.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.codegen.Wrappable -import com.microsoft.ml.spark.logging.BasicLogging +import com.microsoft.azure.synapse.ml.codegen.Wrappable +import com.microsoft.azure.synapse.ml.logging.BasicLogging import org.apache.spark.ml.{BaseRegressor, ComplexParamsReadable, ComplexParamsWritable} import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitUtil.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitUtil.scala similarity index 98% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitUtil.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitUtil.scala index b3a3217606..467e037c96 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/VowpalWabbitUtil.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitUtil.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector} import org.apache.spark.sql.Row diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/BooleanFeaturizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/BooleanFeaturizer.scala similarity index 97% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/BooleanFeaturizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/BooleanFeaturizer.scala index 7ae43e536d..f31e21812c 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/BooleanFeaturizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/BooleanFeaturizer.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw.featurizer +package com.microsoft.azure.synapse.ml.vw.featurizer import org.apache.spark.sql.Row import org.vowpalwabbit.spark.VowpalWabbitMurmur diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/ElementFeaturizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/ElementFeaturizer.scala similarity index 87% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/ElementFeaturizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/ElementFeaturizer.scala index 9734c67642..9162750bbc 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/ElementFeaturizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/ElementFeaturizer.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw.featurizer +package com.microsoft.azure.synapse.ml.vw.featurizer import scala.collection.mutable diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/Featurizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/Featurizer.scala similarity index 88% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/Featurizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/Featurizer.scala index deceb8ddd7..6d147b5c66 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/Featurizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/Featurizer.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw.featurizer +package com.microsoft.azure.synapse.ml.vw.featurizer -import com.microsoft.ml.spark.vw.VowpalWabbitMurmurWithPrefix +import com.microsoft.azure.synapse.ml.vw.VowpalWabbitMurmurWithPrefix import org.apache.spark.sql.Row import scala.collection.mutable diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/MapFeaturizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/MapFeaturizer.scala similarity index 96% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/MapFeaturizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/MapFeaturizer.scala index e1a2457988..b233dce0b8 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/MapFeaturizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/MapFeaturizer.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw.featurizer +package com.microsoft.azure.synapse.ml.vw.featurizer import org.apache.spark.sql.Row import org.vowpalwabbit.spark.VowpalWabbitMurmur diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/MapStringFeaturizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/MapStringFeaturizer.scala similarity index 96% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/MapStringFeaturizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/MapStringFeaturizer.scala index de4a978a2e..3538edf85d 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/MapStringFeaturizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/MapStringFeaturizer.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw.featurizer +package com.microsoft.azure.synapse.ml.vw.featurizer import org.apache.spark.sql.Row diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/NumericFeaturizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/NumericFeaturizer.scala similarity index 97% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/NumericFeaturizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/NumericFeaturizer.scala index cc56a1081b..e5f4342d9a 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/NumericFeaturizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/NumericFeaturizer.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw.featurizer +package com.microsoft.azure.synapse.ml.vw.featurizer import org.apache.spark.sql.Row import org.vowpalwabbit.spark.VowpalWabbitMurmur diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/SeqFeaturizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/SeqFeaturizer.scala similarity index 96% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/SeqFeaturizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/SeqFeaturizer.scala index 58ef585a66..db6dc5820e 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/SeqFeaturizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/SeqFeaturizer.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw.featurizer +package com.microsoft.azure.synapse.ml.vw.featurizer import org.apache.spark.sql.Row diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/StringFeaturizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/StringFeaturizer.scala similarity index 96% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/StringFeaturizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/StringFeaturizer.scala index d582141522..3b5cd2ad7a 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/StringFeaturizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/StringFeaturizer.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw.featurizer +package com.microsoft.azure.synapse.ml.vw.featurizer import org.apache.spark.sql.Row diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/StringSplitFeaturizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/StringSplitFeaturizer.scala similarity index 97% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/StringSplitFeaturizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/StringSplitFeaturizer.scala index db556e5804..58addd1290 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/StringSplitFeaturizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/StringSplitFeaturizer.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw.featurizer +package com.microsoft.azure.synapse.ml.vw.featurizer import org.apache.spark.sql.Row diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/StructFeaturizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/StructFeaturizer.scala similarity index 97% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/StructFeaturizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/StructFeaturizer.scala index 858cb723c0..a55da1e010 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/StructFeaturizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/StructFeaturizer.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw.featurizer +package com.microsoft.azure.synapse.ml.vw.featurizer import org.apache.spark.sql.Row import org.vowpalwabbit.spark.VowpalWabbitMurmur diff --git a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/VectorFeaturizer.scala b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/VectorFeaturizer.scala similarity index 96% rename from vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/VectorFeaturizer.scala rename to vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/VectorFeaturizer.scala index 47b3581288..b818b4c30c 100644 --- a/vw/src/main/scala/com/microsoft/ml/spark/vw/featurizer/VectorFeaturizer.scala +++ b/vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/featurizer/VectorFeaturizer.scala @@ -1,7 +1,7 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw.featurizer +package com.microsoft.azure.synapse.ml.vw.featurizer import org.apache.spark.sql.Row import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector} diff --git a/vw/src/test/python/mmlsparktest/vw/__init__.py b/vw/src/test/python/synapsemltest/vw/__init__.py similarity index 100% rename from vw/src/test/python/mmlsparktest/vw/__init__.py rename to vw/src/test/python/synapsemltest/vw/__init__.py diff --git a/vw/src/test/python/mmlsparktest/vw/test_vw.py b/vw/src/test/python/synapsemltest/vw/test_vw.py similarity index 86% rename from vw/src/test/python/mmlsparktest/vw/test_vw.py rename to vw/src/test/python/synapsemltest/vw/test_vw.py index 680c070361..486c7d179e 100644 --- a/vw/src/test/python/mmlsparktest/vw/test_vw.py +++ b/vw/src/test/python/synapsemltest/vw/test_vw.py @@ -4,12 +4,12 @@ import tempfile import pyspark -from mmlspark.vw.VowpalWabbitClassifier import VowpalWabbitClassifier -from mmlspark.vw.VowpalWabbitRegressor import VowpalWabbitRegressor -from mmlspark.vw.VowpalWabbitFeaturizer import VowpalWabbitFeaturizer +from synapse.ml.vw.VowpalWabbitClassifier import VowpalWabbitClassifier +from synapse.ml.vw.VowpalWabbitRegressor import VowpalWabbitRegressor +from synapse.ml.vw.VowpalWabbitFeaturizer import VowpalWabbitFeaturizer from pyspark.sql.types import * -from mmlsparktest.spark import * +from synapsemltest.spark import * class VowpalWabbitSpec(unittest.TestCase): diff --git a/vw/src/test/python/mmlsparktest/vw/test_vw_cb.py b/vw/src/test/python/synapsemltest/vw/test_vw_cb.py similarity index 97% rename from vw/src/test/python/mmlsparktest/vw/test_vw_cb.py rename to vw/src/test/python/synapsemltest/vw/test_vw_cb.py index 007e59506f..487771ff97 100644 --- a/vw/src/test/python/mmlsparktest/vw/test_vw_cb.py +++ b/vw/src/test/python/synapsemltest/vw/test_vw_cb.py @@ -4,11 +4,11 @@ import tempfile import pyspark -from mmlsparktest.spark import * +from synapsemltest.spark import * -from mmlspark.vw import VowpalWabbitContextualBandit -from mmlspark.vw import VowpalWabbitFeaturizer -from mmlspark.vw import VectorZipper +from synapse.ml.vw import VowpalWabbitContextualBandit +from synapse.ml.vw import VowpalWabbitFeaturizer +from synapse.ml.vw import VectorZipper from pyspark.ml.tuning import ParamGridBuilder from pyspark.sql.types import * diff --git a/vw/src/test/scala/com/microsoft/ml/spark/vw/VWContextualBandidSpec.scala b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VWContextualBandidSpec.scala similarity index 98% rename from vw/src/test/scala/com/microsoft/ml/spark/vw/VWContextualBandidSpec.scala rename to vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VWContextualBandidSpec.scala index e8f372888c..1e1d14a513 100644 --- a/vw/src/test/scala/com/microsoft/ml/spark/vw/VWContextualBandidSpec.scala +++ b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VWContextualBandidSpec.scala @@ -1,18 +1,18 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.build.BuildInfo -import com.microsoft.ml.spark.core.env.FileUtilities -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.env.FileUtilities +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.build.BuildInfo import org.apache.spark.ml.Pipeline import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.tuning.ParamGridBuilder import org.apache.spark.ml.util.MLReadable -import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.functions.col +import org.apache.spark.sql.{DataFrame, SparkSession} object CBDatasetHelper { def readCSV(session: SparkSession, fileName: String, fileLocation: String): DataFrame = { diff --git a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVectorZipper.scala b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVectorZipper.scala similarity index 89% rename from vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVectorZipper.scala rename to vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVectorZipper.scala index affbba9622..45365b1793 100644 --- a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVectorZipper.scala +++ b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVectorZipper.scala @@ -1,9 +1,9 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.DataFrame diff --git a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitClassifier.scala b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitClassifier.scala similarity index 97% rename from vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitClassifier.scala rename to vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitClassifier.scala index 028cf8ab61..e33a5d5813 100644 --- a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitClassifier.scala +++ b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitClassifier.scala @@ -1,20 +1,19 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import java.io.File -import java.nio.file.Files - -import com.microsoft.ml.spark.core.test.benchmarks.{Benchmarks, DatasetUtils} -import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator -import org.apache.spark.ml.util.MLReadable -import org.apache.spark.sql.{DataFrame, Dataset, Row} -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.test.benchmarks.{Benchmarks, DatasetUtils} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} import org.apache.spark.TaskContext +import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator import org.apache.spark.ml.tuning.{CrossValidator, ParamGridBuilder} +import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{DataFrame, Dataset, Row} + +import java.io.File class VerifyVowpalWabbitClassifier extends Benchmarks with EstimatorFuzzing[VowpalWabbitClassifier] { lazy val moduleName = "vw" diff --git a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitFeaturizer.scala b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitFeaturizer.scala similarity index 98% rename from vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitFeaturizer.scala rename to vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitFeaturizer.scala index d50ec40f0e..2b44b63a07 100644 --- a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitFeaturizer.scala +++ b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitFeaturizer.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.linalg.{SparseVector, Vector, Vectors} import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.functions._ diff --git a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitInteractions.scala b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitInteractions.scala similarity index 91% rename from vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitInteractions.scala rename to vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitInteractions.scala index 37da122f49..f15633ebfd 100644 --- a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitInteractions.scala +++ b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitInteractions.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.core.test.base.TestBase -import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} +import com.microsoft.azure.synapse.ml.core.test.base.TestBase +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.linalg.{SparseVector, Vector, Vectors} import org.apache.spark.ml.util.MLReadable diff --git a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitMurmurWithPrefix.scala b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitMurmurWithPrefix.scala similarity index 95% rename from vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitMurmurWithPrefix.scala rename to vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitMurmurWithPrefix.scala index c74308e1cd..c8088f686e 100644 --- a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitMurmurWithPrefix.scala +++ b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitMurmurWithPrefix.scala @@ -1,12 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw +import com.microsoft.azure.synapse.ml.core.test.base.TestBase import org.vowpalwabbit.spark.VowpalWabbitMurmur -import java.nio.charset.StandardCharsets -import com.microsoft.ml.spark.core.test.base.TestBase +import java.nio.charset.StandardCharsets class VerifyVowpalWabbitMurmurWithPrefix extends TestBase { diff --git a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitRegressor.scala b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitRegressor.scala similarity index 96% rename from vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitRegressor.scala rename to vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitRegressor.scala index ac29147120..c9b535d33e 100644 --- a/vw/src/test/scala/com/microsoft/ml/spark/vw/VerifyVowpalWabbitRegressor.scala +++ b/vw/src/test/scala/com/microsoft/azure/synapse/ml/vw/VerifyVowpalWabbitRegressor.scala @@ -1,10 +1,10 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.vw +package com.microsoft.azure.synapse.ml.vw -import com.microsoft.ml.spark.core.test.benchmarks.{Benchmarks, DatasetUtils} -import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject} +import com.microsoft.azure.synapse.ml.core.test.benchmarks.{Benchmarks, DatasetUtils} +import com.microsoft.azure.synapse.ml.core.test.fuzzing.{EstimatorFuzzing, TestObject} import org.apache.spark.ml.evaluation.RegressionEvaluator import org.apache.spark.ml.util.MLReadable import org.apache.spark.sql.{Column, DataFrame}