Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #2946: Presto Ingestion issues fix #2986

Merged
merged 5 commits into from
Feb 26, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 53 additions & 0 deletions ingestion/src/metadata/ingestion/source/presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,68 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re
from urllib.parse import quote_plus

from pyhive.sqlalchemy_presto import PrestoDialect, _type_map
from sqlalchemy import types, util
from sqlalchemy.engine import reflection

from metadata.generated.schema.entity.services.databaseService import (
DatabaseServiceType,
)
from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig
from metadata.ingestion.source.sql_source import SQLSource
from metadata.ingestion.source.sql_source_common import SQLConnectionConfig

_type_map.update(
{
"char": types.CHAR,
"decimal": types.Float,
"time": types.TIME,
"varchar": types.VARCHAR,
}
)


@reflection.cache
def get_columns(self, connection, table_name, schema=None, **kw):
rows = self._get_table_columns(connection, table_name, schema)
result = []
for row in rows:
try:
# Take out the more detailed type information
# e.g. 'map<int,int>' -> 'map'
# 'decimal(10,1)' -> decimal
col_type = re.search(r"^\w+", row.Type).group(0)
coltype = _type_map[col_type]

charlen = re.search(r"\(([\d]+)\)", row.Type)
if charlen:
charlen = charlen.group(1)
args = (int(charlen),)
coltype = coltype(
*args,
)
except KeyError:
util.warn(
"Did not recognize type '%s' of column '%s'" % (col_type, row.Column)
)
coltype = types.NullType
result.append(
{
"name": row.Column,
"type": coltype,
# newer Presto no longer includes this column
"nullable": getattr(row, "Null", True),
"default": None,
}
)
return result


PrestoDialect.get_columns = get_columns


class PrestoConfig(SQLConnectionConfig):
host_port = "localhost:8080"
Expand Down