Skip to content

Commit

Permalink
added spark_jars_string function
Browse files Browse the repository at this point in the history
  • Loading branch information
mamonu committed Nov 2, 2021
1 parent f05b451 commit f4f7f0c
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion splink/case_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,28 @@ def _check_jaro_registered(spark):
warnings.warn(
"Custom string comparison functions such as jaro_winkler_sim are available in"
" Spark Or you did not pass 'spark' (the SparkSession) into 'Model' You can"
" import these functions using the scala-udf-similarity-0.0.7.jar provided with"
" import these functions using the scala-udf-similarity-0.0.9.jar provided with"
" Splink"
)
return False


def _get_spark_jars_string():
"""
Outputs the exact string needed in the sparkSession config variable `spark.jars`
In order to use the custom functions in the spark-udf-similarity-0.0.9.jar
"""

import splink

message = "you will need to set it by adding .config('spark.jars','"
jarstr = splink.__file__[0:-11] + "jars/scala-udf-similarity-0.0.9.jar"
thx = "') to your sparkSession configuration. "

return message + jarstr + thx


def _find_last_end_position(case_statement):
# Since we're only interested in the position, case shouldn't matter. stmt = case_statement.lower()
case_statement = case_statement.lower()
Expand Down

0 comments on commit f4f7f0c

Please sign in to comment.