Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed .DS_Store
Binary file not shown.
160 changes: 160 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.pyc
*.py[cod]
*$py.class
*mypy*
*.egg*
*DS_Store*
envs/
dist/
*.egg-info
tasks/
*.dump
*rdb
MnesiaCore.rabbit*
kubernetes/config
#static/
# C extensions
nohup.out
*.so
minikube-darwin-amd64

# Distribution / packaging
.Python
src/staticfiles/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
ml_models/

# Translations
*.mo
*.pot
*.env
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

sample_large_files
sample_tif_files

# Vscode settings folder
.vscode/

# python virtual env for SDK testing
venv_*
pylint.html
_mask.c
tags
debug_*
*.idea
*.DS_Store
htmlcov
File renamed without changes.
12 changes: 6 additions & 6 deletions ...tate-databricks-connector/Dockerfile.test → Dockerfile.test
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
FROM spark_docker_v2

# Add neccessary data
ADD superannotate_databricks_connector superannotate_databricks_connector
ADD setup.py setup.py
ADD tests tests

# Build the package
RUN python setup.py sdist bdist_wheel

# Add the distribution
ADD dist dist
COPY src src

RUN python -m build src

# Install the package
RUN pip install dist/*.whl

# Add necessary data
ADD tests tests

# Run unit tests
RUN python -m unittest discover tests
File renamed without changes.
50 changes: 50 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
[project]

name = "superannotate_databricks_connector" # Required

version = "0.0.1dev1"

description = "Custom functions to work with SuperAnnotate in Databricks"

readme = "README.md"

requires-python = ">=3.8"

license = { file = "LICENSE.txt" }

keywords = ["superannotate_databricks_connector", "superannotate"]

authors = [
{ name = "Leo Lindén", email = "leo@superannotate.com" }
]

maintainers = [
{ name = "Leo Lindén", email = "leo@superannotate.com" }
]

classifiers = [# Optional
"Intended Audience :: Developers",
"Topic :: Software Development :: Build Tools",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3 :: Only",
]

dependencies = [
"pyspark~=3.4.0"
]

[project.optional-dependencies]
test = ["pytest"]

[project.urls] # Optional
"Homepage" = "https://github.com/superannotateai/superannotate-databricks-connector"
"Bug Reports" = "https://github.com/superannotateai/superannotate-databricks-connector/issues"
"Source" = "https://github.com/superannotateai/superannotate-databricks-connector/"

[build-system]
requires = ["setuptools>=43.0.0", "wheel"]
build-backend = "setuptools.build_meta"
10 changes: 10 additions & 0 deletions src/superannotate_databricks_connector/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from superannotate_databricks_connector import schemas
from superannotate_databricks_connector import text
from superannotate_databricks_connector import vector


__all__ = [
'schemas',
'text',
'vector'
]
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ def get_comment_schema():
comment_schema = StructType([
StructField("correspondence",
ArrayType(MapType(
StringType(),
StringType())),
StringType(),
StringType())),
True),
StructField("x", FloatType(), True),
StructField("y", FloatType(), True),
StructField("resolved", BooleanType(), True),
StructField("createdAt", StringType(), True),
StructField("createdBy", MapType(
StringType(),
StringType()),
StringType(),
StringType()),
True),
StructField("creationType", StringType(), True),
StructField("updatedAt", StringType(), True),
StructField("updatedBy", MapType(
StringType(),
StringType()),
StringType(),
StringType()),
True)
])
return comment_schema
return comment_schema
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from .schemas.text_schema import get_text_schema
from superannotate_databricks_connector.schemas.text_schema import get_text_schema


def convert_dates(instance):
Expand Down Expand Up @@ -45,7 +45,7 @@ def get_text_dataframe(annotations, spark):
"status": item["metadata"]["status"],
"annotatorEmail": item["metadata"]["annotatorEmail"],
"qaEmail": item["metadata"]["qaEmail"],
"entities": [convert_dates(instance) for instance
"entities": [convert_dates(instance) for instance
in item["instances"] if instance["type"] == "entity"],
"tags": [convert_dates(instance) for instance in item["instances"]
if instance["type"] == "tag"]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .schemas.vector_schema import get_vector_schema
from superannotate_databricks_connector.schemas.vector_schema import get_vector_schema


def process_comment(comment):
Expand Down
Binary file removed superannotate-databricks-connector/.DS_Store
Binary file not shown.

This file was deleted.

This file was deleted.

Loading