superannotateai · Jun 21, 2023 · Jun 19, 2023 · Jun 21, 2023
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.pyc
+*.py[cod]
+*$py.class
+*mypy*
+*.egg*
+*DS_Store*
+envs/
+dist/
+*.egg-info
+tasks/
+*.dump
+*rdb
+MnesiaCore.rabbit*
+kubernetes/config
+#static/
+# C extensions
+nohup.out
+*.so
+minikube-darwin-amd64
+
+# Distribution / packaging
+.Python
+src/staticfiles/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+ml_models/
+
+# Translations
+*.mo
+*.pot
+*.env
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+sample_large_files
+sample_tif_files
+
+# Vscode settings folder
+.vscode/
+
+# python virtual env for SDK testing
+venv_*
+pylint.html
+_mask.c
+tags
+debug_*
+*.idea
+*.DS_Store
+htmlcov
diff --git a/...ate-databricks-connector/Dockerfile.spark → Dockerfile.spark b/...ate-databricks-connector/Dockerfile.spark → Dockerfile.spark
diff --git a/...tate-databricks-connector/Dockerfile.test → Dockerfile.test b/...tate-databricks-connector/Dockerfile.test → Dockerfile.test
@@ -1,18 +1,18 @@
 FROM spark_docker_v2
 
-# Add neccessary data
-ADD superannotate_databricks_connector superannotate_databricks_connector
-ADD setup.py setup.py
-ADD tests tests
-
 # Build the package
 RUN python setup.py sdist bdist_wheel
 
 # Add the distribution
-ADD dist dist
+COPY src src
+
+RUN python -m build src
 
 # Install the package
 RUN pip install dist/*.whl
 
+# Add necessary data
+ADD tests tests
+
 # Run unit tests
 RUN python -m unittest discover tests
diff --git a/LICENSE → LICENSE.txt b/LICENSE → LICENSE.txt
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,50 @@
+[project]
+
+name = "superannotate_databricks_connector"  # Required
+
+version = "0.0.1dev1"
+
+description = "Custom functions to work with SuperAnnotate in Databricks"
+
+readme = "README.md"
+
+requires-python = ">=3.8"
+
+license = { file = "LICENSE.txt" }
+
+keywords = ["superannotate_databricks_connector", "superannotate"]
+
+authors = [
+    { name = "Leo Lindén", email = "leo@superannotate.com" }
+]
+
+maintainers = [
+    { name = "Leo Lindén", email = "leo@superannotate.com" }
+]
+
+classifiers = [# Optional
+    "Intended Audience :: Developers",
+    "Topic :: Software Development :: Build Tools",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3 :: Only",
+]
+
+dependencies = [
+    "pyspark~=3.4.0"
+]
+
+[project.optional-dependencies]
+test = ["pytest"]
+
+[project.urls]  # Optional
+"Homepage" = "https://github.com/superannotateai/superannotate-databricks-connector"
+"Bug Reports" = "https://github.com/superannotateai/superannotate-databricks-connector/issues"
+"Source" = "https://github.com/superannotateai/superannotate-databricks-connector/"
+
+[build-system]
+requires = ["setuptools>=43.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
diff --git a/src/superannotate_databricks_connector/__init__.py b/src/superannotate_databricks_connector/__init__.py
@@ -0,0 +1,10 @@
+from superannotate_databricks_connector import schemas
+from superannotate_databricks_connector import text
+from superannotate_databricks_connector import vector
+
+
+__all__ = [
+    'schemas',
+    'text',
+    'vector'
+]
diff --git a/...annotate_databricks_connector/__init__.py → ..._databricks_connector/schemas/__init__.py b/...annotate_databricks_connector/__init__.py → ..._databricks_connector/schemas/__init__.py
diff --git a/...e_databricks_connector/schemas/comment.py → ...e_databricks_connector/schemas/comment.py b/...e_databricks_connector/schemas/comment.py → ...e_databricks_connector/schemas/comment.py
@@ -13,22 +13,22 @@ def get_comment_schema():
     comment_schema = StructType([
         StructField("correspondence",
                     ArrayType(MapType(
-                            StringType(),
-                            StringType())),
+                        StringType(),
+                        StringType())),
                     True),
         StructField("x", FloatType(), True),
         StructField("y", FloatType(), True),
         StructField("resolved", BooleanType(), True),
         StructField("createdAt", StringType(), True),
         StructField("createdBy", MapType(
-                                StringType(),
-                                StringType()),
+            StringType(),
+            StringType()),
                     True),
         StructField("creationType", StringType(), True),
         StructField("updatedAt", StringType(), True),
         StructField("updatedBy", MapType(
-                                    StringType(),
-                                    StringType()),
+            StringType(),
+            StringType()),
                     True)
     ])
-    return comment_schema
+    return comment_schema
diff --git a/...tabricks_connector/schemas/text_schema.py → ...tabricks_connector/schemas/text_schema.py b/...tabricks_connector/schemas/text_schema.py → ...tabricks_connector/schemas/text_schema.py
diff --git a/...bricks_connector/schemas/vector_schema.py → ...bricks_connector/schemas/vector_schema.py b/...bricks_connector/schemas/vector_schema.py → ...bricks_connector/schemas/vector_schema.py
diff --git a/...uperannotate_databricks_connector/text.py → ...uperannotate_databricks_connector/text.py b/...uperannotate_databricks_connector/text.py → ...uperannotate_databricks_connector/text.py
@@ -1,5 +1,5 @@
 from datetime import datetime
-from .schemas.text_schema import get_text_schema
+from superannotate_databricks_connector.schemas.text_schema import get_text_schema
 
 
 def convert_dates(instance):
@@ -45,7 +45,7 @@ def get_text_dataframe(annotations, spark):
             "status": item["metadata"]["status"],
             "annotatorEmail": item["metadata"]["annotatorEmail"],
             "qaEmail": item["metadata"]["qaEmail"],
-            "entities": [convert_dates(instance) for instance 
+            "entities": [convert_dates(instance) for instance
                          in item["instances"] if instance["type"] == "entity"],
             "tags": [convert_dates(instance) for instance in item["instances"]
                      if instance["type"] == "tag"]

diff --git a/...erannotate_databricks_connector/vector.py → ...erannotate_databricks_connector/vector.py b/...erannotate_databricks_connector/vector.py → ...erannotate_databricks_connector/vector.py
@@ -1,4 +1,4 @@
-from .schemas.vector_schema import get_vector_schema
+from superannotate_databricks_connector.schemas.vector_schema import get_vector_schema
 
 
 def process_comment(comment):

diff --git a/superannotate-databricks-connector/.DS_Store b/superannotate-databricks-connector/.DS_Store
diff --git a/superannotate-databricks-connector/build/lib/superannotate-databricks-connector/__init__.py b/superannotate-databricks-connector/build/lib/superannotate-databricks-connector/__init__.py
diff --git a/superannotate-databricks-connector/build/lib/superannotate-databricks-connector/io.py b/superannotate-databricks-connector/build/lib/superannotate-databricks-connector/io.py