In [8]:
from sqlalchemy import create_engine, inspect, text ,select, MetaData, Table, and_
from sqlalchemy.types import *
import re
from urllib.parse import quote_plus
import ast
import cx_Oracle
import pandas as pd

In [2]:
type_mappings = {
    "postgres": {
        "smallint": "SMALLINT",
        "integer": "INT",
        "int": "INT",
        "bigint": "BIGINT",
        "decimal": "DECIMAL",  
        "numeric": "DECIMAL", 
        "real": "FLOAT",
        "double precision": "DOUBLE",
        "money": "STRING",
        "char": "STRING",
        "varchar": "STRING",
        "text": "STRING",
        "bytea": "BINARY",
        "boolean": "BOOLEAN",
        "date": "DATE",
        "timestamp": "TIMESTAMP",
        "timestamp with time zone": "TIMESTAMP",
        "timestamp without time zone": "TIMESTAMP",
        "time": "STRING",
        "time with time zone": "STRING",
        "interval": "STRING",
        "json": "STRING",
        "jsonb": "STRING",
        "uuid": "STRING",
        "inet": "STRING",
        "cidr": "STRING",
        "macaddr": "STRING",
        "xml": "STRING",
        "array": "STRING",
        "point": "STRING",
        "line": "STRING",
        "lseg": "STRING",
        "box": "STRING",
        "path": "STRING",
        "polygon": "STRING",
        "circle": "STRING",
        "tsvector": "STRING",
        "tsquery": "STRING",
        "bit": "STRING",
        "bit varying": "STRING"
    },
    "oracle": {
        "number": "DECIMAL",  
        "float": "DOUBLE",
        "binary_float": "FLOAT",
        "binary_double": "DOUBLE",
        "char": "STRING",
        "varchar2": "STRING",
        "nchar": "STRING",
        "nvarchar2": "STRING",
        "clob": "STRING",
        "nclob": "STRING",
        "blob": "BINARY",
        "raw": "BINARY",
        "long": "STRING",
        "long raw": "BINARY",
        "date": "DATE",
        "timestamp": "TIMESTAMP",
        "timestamp with time zone": "TIMESTAMP",
        "timestamp with local time zone": "TIMESTAMP",
        "interval year to month": "STRING",
        "interval day to second": "STRING",
        "rowid": "STRING",
        "urowid": "STRING",
        "xmltype": "STRING",
        "anydata": "STRING",
        "bfile": "STRING",
        "boolean": "BOOLEAN"
    },
    "mysql": {
        "tinyint": "TINYINT",
        "smallint": "SMALLINT",
        "mediumint": "INT",
        "int": "INT",
        "bigint": "BIGINT",
        "decimal": "DECIMAL", 
        "numeric": "DECIMAL",  
        "float": "FLOAT",
        "double": "DOUBLE",
        "bit": "BOOLEAN",
        "char": "STRING",
        "varchar": "STRING",
        "binary": "BINARY",
        "varbinary": "BINARY",
        "blob": "BINARY",
        "tinyblob": "BINARY",
        "mediumblob": "BINARY",
        "longblob": "BINARY",
        "text": "STRING",
        "tinytext": "STRING",
        "mediumtext": "STRING",
        "longtext": "STRING",
        "json": "STRING",
        "enum": "STRING",
        "set": "STRING",
        "date": "DATE",
        "datetime": "TIMESTAMP",
        "timestamp": "TIMESTAMP",
        "time": "STRING",
        "year": "INT",
        "geometry": "STRING",
        "point": "STRING",
        "linestring": "STRING",
        "polygon": "STRING",
        "multipoint": "STRING",
        "multilinestring": "STRING",
        "multipolygon": "STRING",
        "geometrycollection": "STRING"
    },
    "sqlserver": {
        "int": "INT",
        "bigint": "BIGINT",
        "smallint": "SMALLINT",
        "tinyint": "TINYINT",
        "bit": "BOOLEAN",
        "decimal": "DECIMAL", 
        "numeric": "DECIMAL",  
        "float": "DOUBLE",
        "real": "FLOAT",
        "money": "DECIMAL(19, 4)",
        "smallmoney": "DECIMAL(10, 4)",
        "char": "STRING",
        "varchar": "STRING",
        "nchar": "STRING",
        "nvarchar": "STRING",
        "text": "STRING",
        "ntext": "STRING",
        "binary": "BINARY",
        "varbinary": "BINARY",
        "image": "BINARY",
        "datetime": "TIMESTAMP",
        "smalldatetime": "TIMESTAMP",
        "date": "DATE",
        "time": "STRING",
        "timestamp": "BINARY",
        "uniqueidentifier": "STRING",
        "xml": "STRING"
    }
}

In [87]:
with open('/Users/nachanon/projects/hive_datatype/type_mappings.txt') as f:
    data = f.read()
type_mappings = ast.literal_eval(data)

In [3]:

def get_hive_type(db_type , column_type):
   
    match = re.match(r"\w+\((\d+),\s*(\d+)\)",column_type)
    if match:
        type_name = match.group().split('(')[0]
        precision = int(match.groups()[0])
        scale = int(match.groups()[1])
        if precision > 38:
            precision = 38
        if (precision >0 and precision <= 38) and (scale >=0 and scale <= precision):
            return f"DECIMAL({precision},{scale})"
    
    if db_type in type_mappings and column_type in type_mappings[db_type]:
        return type_mappings[db_type][column_type]
    
    
    return 'STRING'  

def convert_schema_to_hive(engine, inspector, db_schema, db_type):
    
    schema = {}
    
    for table_name in inspector.get_table_names(db_schema):
        columns = []
        for column in inspector.get_columns(table_name = table_name, schema = db_schema):
            hive_type = get_hive_type(db_type,str(column['type']).lower())
            columns.append({
                'name':column['name'],
                'hive_type':hive_type,
                'source_type':str(column['type']),
                'comment':column['comment']}
              )
        schema[table_name] = columns

    return schema

def generate_sql_ddl(hive_schema, schema_name, table_name, table_comment, location = '/staging/ois' , stored_as = 'PARQUET'):
    ddl = f"CREATE EXTERNAL TABLE IF NOT EXISTS {schema_name}.{table_name.lower()} (\n"
    cols= []
    for col in hive_schema[table_name]:
        comment = f"COMMENT '{col['comment']}'" if col['comment'] else ''
        cols.append(f"{col['name']} {col['hive_type']} {comment}")
        
    ddl += "    "
    ddl += ",\n    ".join(cols)
    ddl += "\n)\n"

    ddl += f"COMMENT '{table_comment['text']}'\n" if table_comment['text'] else ''
    ddl += f"STORED AS {stored_as}\n"
    ddl += f"LOCATION '{location}'"
    
    
    return ddl


In [4]:
conn_string = "postgresql://postgres:P%40ssw0rdsit@192.168.170.13:5432/postgres"
engine = create_engine(conn_string)
inspector = inspect(engine)
 
hive_schema = convert_schema_to_hive(engine, inspector, inspector.get_schema_names()[0],'postgres')
ddl = generate_sql_ddl(hive_schema, 'ois','WaterPump',inspector.get_table_comment(table_name= 'WaterPump',schema = 'OIS'),'/staging')

In [11]:
df = pd.DataFrame(hive_schema['WaterPump'])

In [38]:
date_row = [{'source_name':'','source_type':'','target_name':'INGYER','target_type':'DECIMAL(4,0)'},
           {'source_name':'','source_type':'','target_name':'INGMTH','target_type':'DECIMAL(2,0)'},
           {'source_name':'','source_type':'','target_name':'INGDAY','target_type':'DECIMAL(2,0)'},
           {'source_name':'','source_type':'','target_name':'INGDTE','target_type':'TIMESTAMP'}]
date_df = pd.DataFrame(date_row)
excel_table = df.rename(columns = {'name':'source_name','hive_type':'target_type'})
excel_table['target_name'] = excel_table['source_name']
excel_table = excel_table[['source_name','source_type','target_name','target_type']]
excel_table = pd.concat([excel_table,date_df],ignore_index=True)

In [40]:
excel_table.to_excel('test.xlsx',sheet_name='test')

ModuleNotFoundError: No module named 'openpyxl'

In [39]:
excel_table

Unnamed: 0,source_name,source_type,target_name,target_type
0,yyyymmdd,BIGINT,yyyymmdd,BIGINT
1,office_wwcode,BIGINT,office_wwcode,BIGINT
2,office_mcode,BIGINT,office_mcode,BIGINT
3,water_type_code,TEXT,water_type_code,STRING
4,water_type_name,TEXT,water_type_name,STRING
5,water_id,BIGINT,water_id,BIGINT
6,water_name,TEXT,water_name,STRING
7,plant_id,BIGINT,plant_id,BIGINT
8,plant_name,TEXT,plant_name,STRING
9,untreated_water,DOUBLE PRECISION,untreated_water,DOUBLE


In [10]:
hive_schema['WaterPump']

[{'name': 'yyyymmdd',
  'hive_type': 'BIGINT',
  'source_type': 'BIGINT',
  'comment': None},
 {'name': 'office_wwcode',
  'hive_type': 'BIGINT',
  'source_type': 'BIGINT',
  'comment': None},
 {'name': 'office_mcode',
  'hive_type': 'BIGINT',
  'source_type': 'BIGINT',
  'comment': None},
 {'name': 'water_type_code',
  'hive_type': 'STRING',
  'source_type': 'TEXT',
  'comment': None},
 {'name': 'water_type_name',
  'hive_type': 'STRING',
  'source_type': 'TEXT',
  'comment': None},
 {'name': 'water_id',
  'hive_type': 'BIGINT',
  'source_type': 'BIGINT',
  'comment': None},
 {'name': 'water_name',
  'hive_type': 'STRING',
  'source_type': 'TEXT',
  'comment': None},
 {'name': 'plant_id',
  'hive_type': 'BIGINT',
  'source_type': 'BIGINT',
  'comment': None},
 {'name': 'plant_name',
  'hive_type': 'STRING',
  'source_type': 'TEXT',
  'comment': None},
 {'name': 'untreated_water',
  'hive_type': 'DOUBLE',
  'source_type': 'DOUBLE PRECISION',
  'comment': None},
 {'name': 'menu_water',


In [12]:
for table_name in inspector.get_table_names('OIS'):
    print(f"TABLE: {table_name}")
    for count, column in enumerate(inspector.get_columns(table_name=table_name, schema ='OIS')):
        
        print(f"NAME : {column['name']}   OLD DATA TYPE : {column['type']}   ----    HIVE DATA TYPE : {hive_schema[table_name][count]['hive_type']}")

TABLE: WaterPump
NAME : yyyymmdd   OLD DATA TYPE : BIGINT   ----    HIVE DATA TYPE : BIGINT
NAME : office_wwcode   OLD DATA TYPE : BIGINT   ----    HIVE DATA TYPE : BIGINT
NAME : office_mcode   OLD DATA TYPE : BIGINT   ----    HIVE DATA TYPE : BIGINT
NAME : water_type_code   OLD DATA TYPE : TEXT   ----    HIVE DATA TYPE : STRING
NAME : water_type_name   OLD DATA TYPE : TEXT   ----    HIVE DATA TYPE : STRING
NAME : water_id   OLD DATA TYPE : BIGINT   ----    HIVE DATA TYPE : BIGINT
NAME : water_name   OLD DATA TYPE : TEXT   ----    HIVE DATA TYPE : STRING
NAME : plant_id   OLD DATA TYPE : BIGINT   ----    HIVE DATA TYPE : BIGINT
NAME : plant_name   OLD DATA TYPE : TEXT   ----    HIVE DATA TYPE : STRING
NAME : untreated_water   OLD DATA TYPE : DOUBLE PRECISION   ----    HIVE DATA TYPE : DOUBLE
NAME : menu_water   OLD DATA TYPE : DOUBLE PRECISION   ----    HIVE DATA TYPE : DOUBLE
TABLE: WaterLevel
NAME : office_wwcode   OLD DATA TYPE : BIGINT   ----    HIVE DATA TYPE : BIGINT
NAME : offic

In [112]:
print(generate_sql_ddl(hive_schema, 'default','WaterPump',inspector.get_table_comment(table_name= 'WaterPump',schema = 'OIS'),'/user/bdaadmin'))


CREATE EXTERNAL TABLE IF NOT EXISTS default.WaterPump (
    yyyymmdd STRING ,
    office_wwcode STRING ,
    office_mcode STRING ,
    water_type_code STRING ,
    water_type_name STRING ,
    water_id STRING ,
    water_name STRING ,
    plant_id STRING ,
    plant_name STRING ,
    untreated_water STRING ,
    menu_water STRING 
)
STORED AS PARQUET
LOCATION '/user/bdaadmin';




In [128]:
schema_hive_all = {}
ddl = ''
for schema in inspector.get_schema_names():
    schema_hive_all[schema] =convert_schema_to_hive(engine, inspector, schema,'postgres')
    
for schema in schema_hive_all:
    for table in schema_hive_all[schema]:
        print(generate_sql_ddl(schema_hive_all[schema], schema, table,inspector.get_table_comment(table_name= table, schema=schema)))

CREATE EXTERNAL TABLE IF NOT EXISTS OIS.WaterPump (
    yyyymmdd BIGINT ,
    office_wwcode BIGINT ,
    office_mcode BIGINT ,
    water_type_code STRING ,
    water_type_name STRING ,
    water_id BIGINT ,
    water_name STRING ,
    plant_id BIGINT ,
    plant_name STRING ,
    untreated_water DOUBLE ,
    menu_water DOUBLE 
)
COMMENT 'k'
STORED AS PARQUET
LOCATION '/staging/ois';


CREATE EXTERNAL TABLE IF NOT EXISTS OIS.WaterLevel (
    office_wwcode BIGINT ,
    office_id BIGINT ,
    office_showname STRING ,
    plant_id BIGINT ,
    plant_name STRING ,
    water_id BIGINT ,
    water_name STRING ,
    machine_id BIGINT ,
    machine_code STRING ,
    category_code STRING ,
    category_name STRING ,
    category_style STRING ,
    group_code DOUBLE ,
    group_neme STRING ,
    untreated_date BIGINT ,
    volume DOUBLE ,
    level DOUBLE 
)
STORED AS PARQUET
LOCATION '/staging/ois';


CREATE EXTERNAL TABLE IF NOT EXISTS OIS.CustomerM09 (
    yyyymmdd BIGINT ,
    office_wwcode B

In [147]:
from urllib.parse import quote_plus
quote_plus(conn_string)

'postgresql%3A%2F%2Fpostgres%3AP%2540ssw0rdsit%40192.168.170.13%3A5432%2Ftest1'

In [237]:
# Test for DECIMAL(X,X)

In [8]:
host='localhost'
port=1521
sid='sid'
user='SYS'
password='1923'
sid = cx_Oracle.makedsn(host, port, sid=sid)

cstr = 'oracle://{user}:{password}@{sid}'.format(
    user=user,
    password=password,
    sid=sid
)

engine =  create_engine(
    cstr
)

In [9]:
conn = engine.connect()

DatabaseError: (cx_Oracle.DatabaseError) DPI-1047: Cannot locate a 64-bit Oracle Client library: "dlopen(libclntsh.dylib, 0x0001): tried: 'libclntsh.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OSlibclntsh.dylib' (no such file), '/Users/nachanon/anaconda3/envs/hive_data/lib/libclntsh.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/nachanon/anaconda3/envs/hive_data/lib/libclntsh.dylib' (no such file), '/Users/nachanon/anaconda3/bin/../lib/libclntsh.dylib' (no such file), '/usr/lib/libclntsh.dylib' (no such file, not in dyld cache), 'libclntsh.dylib' (no such file)". See https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html for help
(Background on this error at: https://sqlalche.me/e/20/4xp6)

In [52]:
#from urllib.parse import quote_plus
conn_string = "postgresql://postgres:P%40ssw0rdsit@192.168.170.13:5432/test1"
engine = create_engine(conn_string)
inspector = inspect(engine)

hive_schema = convert_schema_to_hive(engine, inspector, 'public','postgres')

In [16]:
inspector.get_table_names('OIS')

['WaterPump', 'WaterLevel', 'CustomerM09']

In [135]:
inspector.get_columns(schema = 'public',table_name = 'employee2')


[{'name': 'id',
  'type': INTEGER(),
  'nullable': False,
  'default': 'nextval(\'"public".employee2_id_seq\'::regclass)',
  'autoincrement': True,
  'comment': None},
 {'name': 'name',
  'type': VARCHAR(length=50),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'salary',
  'type': NUMERIC(precision=100, scale=2),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None}]

In [6]:
inspector.get_columns(table_name = 'employee',schema='public')

[{'name': 'id',
  'type': INTEGER(),
  'nullable': False,
  'default': 'nextval(\'"public".employee_id_seq\'::regclass)',
  'autoincrement': True,
  'comment': None},
 {'name': 'name',
  'type': VARCHAR(length=50),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'salary',
  'type': NUMERIC(precision=100, scale=2),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None}]

In [80]:
ddl = generate_sql_ddl(hive_schema, 'default', 'employee',inspector.get_table_comment('employee'), location = '/test_ddl' , stored_as = 'PARQUET')

In [136]:
inspector.get_check_constraints(table_name = 'employee2',schema = 'public')

[]

In [146]:
inspector.get_unique_constraints('employee2')

[{'column_names': ['salary'], 'name': 'employee2_unique', 'comment': None},
 {'column_names': ['name'], 'name': 'employee_unique2', 'comment': None}]

In [19]:
inspector.get_columns(schema='OIS',table_name = 'WaterPump')

[{'name': 'yyyymmdd',
  'type': BIGINT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'office_wwcode',
  'type': BIGINT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'office_mcode',
  'type': BIGINT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'water_type_code',
  'type': TEXT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'water_type_name',
  'type': TEXT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'water_id',
  'type': BIGINT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'water_name',
  'type': TEXT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'plant_id',
  'type': BIGINT(),
  'nullable': True,
  'default': None,
  'autoincr

In [22]:
hive_schema['WaterPump']

[{'name': 'yyyymmdd', 'hive_type': 'BIGINT', 'comment': None},
 {'name': 'office_wwcode', 'hive_type': 'BIGINT', 'comment': None},
 {'name': 'office_mcode', 'hive_type': 'BIGINT', 'comment': None},
 {'name': 'water_type_code', 'hive_type': 'STRING', 'comment': None},
 {'name': 'water_type_name', 'hive_type': 'STRING', 'comment': None},
 {'name': 'water_id', 'hive_type': 'BIGINT', 'comment': None},
 {'name': 'water_name', 'hive_type': 'STRING', 'comment': None},
 {'name': 'plant_id', 'hive_type': 'BIGINT', 'comment': None},
 {'name': 'plant_name', 'hive_type': 'STRING', 'comment': None},
 {'name': 'untreated_water', 'hive_type': 'DOUBLE', 'comment': None},
 {'name': 'menu_water', 'hive_type': 'DOUBLE', 'comment': None}]

In [31]:
!python --version

Python 3.12.4


In [33]:
!pip install thrift_sasl


Collecting thrift_sasl
  Using cached thrift_sasl-0.4.3-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting pure-sasl>=0.6.2 (from thrift_sasl)
  Using cached pure_sasl-0.6.2-py3-none-any.whl
Using cached thrift_sasl-0.4.3-py2.py3-none-any.whl (8.3 kB)
Installing collected packages: pure-sasl, thrift_sasl
Successfully installed pure-sasl-0.6.2 thrift_sasl-0.4.3


In [38]:
from pyhive import hive
conn = hive.Connection(
    host='192.168.170.224', 
    port=10000, 
    username='bdaadmin',
    password='P@ssw0rdsit', 
    database='default',
    auth='LDAP'
)

In [46]:
ddl2 = re.sub(r'\s+', ' ', ddl.strip())
    
# Remove space before commas
ddl2 = re.sub(r'\s+,', ',', ddl2)

# Remove trailing semicolon and everything after it
ddl2 = re.sub(r';.*$', '', ddl2)

# Remove STORED AS and LOCATION clauses
ddl2 = re.sub(r'STORED AS.*', '', ddl2).strip()

In [57]:
def clean_ddl(ddl):
    ddl2 = re.sub(r'\s+', ' ', ddl.strip())
    
    ddl2 = re.sub(r'\s+,', ',', ddl2)
    
    ddl2 = re.sub(r';.*$', '', ddl2)
    
    ddl2 = re.sub(r'STORED AS.*', '', ddl2).strip()
    return ddl2

In [61]:
ddl2 = clean_ddl(ddl)
ddl2 += ';'

In [102]:
cursor = conn.cursor()

cursor.execute(ddl)

In [84]:
ddl.replace(';','').replace('default','ois')

"CREATE EXTERNAL TABLE IF NOT EXISTS ois.employee (\n    id INT COMMENT 'นี่คือ ID',\n    name STRING ,\n    salary DECIMAL(38,2) \n)\nCOMMENT 'table comment'\nSTORED AS PARQUET\nLOCATION '/test_ddl'"

In [11]:
match = re.match(r"\w+\((\d+),\s*(\d+)\)",'number(5,2)')

In [13]:
match.group().split('(')

['number', '5,2)']

In [20]:
#from urllib.parse import quote_plus
conn_string = "postgresql://nachanon:1923@localhost:5432/postgres"
engine = create_engine(conn_string)
inspector = inspect(engine)

hive_schema = convert_schema_to_hive(engine, inspector, 'public','employees')

In [46]:
sql_query = """
    SELECT *
    FROM employees;
"""
with engine.connect() as connection:
    result = connection.execute(text("SELECT * FROM employees;"))
    result.fetchall()

In [50]:
import pandas as pd
query  = "select * from public.employees"
df= pd.read_sql(query , con = engine)

In [51]:
df

Unnamed: 0,employee_id,first_name,last_name,email,salary,hire_date
0,1,tete,tete2,tete@hotmail.com,1923.0,2024-10-30
1,2,nachanon,aimsricharoen,nachanon@hotmail.com,1234.0,2024-10-30


In [63]:
columns = [col for col in df.columns]
 

In [74]:
values_list = []
for index, row in df.iterrows():
    for i,val in enumertae(row):
        val_tuple = (row.iloc[i],row.iloc[i],row['last_name'],row['email'],row['salary'])
    print(row.iloc[0])
    values_list.append(val_tuple)

1
2


In [26]:
hive_schema

{'test': [{'name': 'id', 'hive_type': 'STRING', 'comment': None}],
 'employees': [{'name': 'employee_id', 'hive_type': 'STRING', 'comment': None},
  {'name': 'first_name', 'hive_type': 'STRING', 'comment': None},
  {'name': 'last_name', 'hive_type': 'STRING', 'comment': None},
  {'name': 'email', 'hive_type': 'STRING', 'comment': None},
  {'name': 'salary', 'hive_type': 'DECIMAL(10,2)', 'comment': None},
  {'name': 'hire_date', 'hive_type': 'STRING', 'comment': None}],
 'testtest': [{'name': 'id', 'hive_type': 'STRING', 'comment': None},
  {'name': 'ingyer', 'hive_type': 'DECIMAL(4,0)', 'comment': None},
  {'name': 'ingmth', 'hive_type': 'DECIMAL(2,0)', 'comment': None},
  {'name': 'ingday', 'hive_type': 'DECIMAL(2,0)', 'comment': None}]}

In [21]:
inspector.get_schema_names()

['information_schema', 'public']

In [23]:
[schema for schema in inspector.get_schema_names() if schema != 'information_schema']

['public']

In [24]:
date_cols = ['timestamp','date','datetime','timestamp with time zone','timestamp without time zone','time','time with time zone']


In [27]:
l = "set mapred.reduce.tasks=-1;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=2048;
set hive.exec.max.dynamic.partitions.pernode=512;
set mapreduce.map.memory.mb = 3072;
set mapreduce.reduce.memory.mb = 3072;
set hive.merge.mapredfiles = true;
set hive.merge.smallfiles.avgsize=1280000000;
set hive.exec.max.created.files=200000;"

SyntaxError: unterminated string literal (detected at line 1) (3801399667.py, line 1)

In [33]:
f = 10
a = [1,2,3,4,6,7,8,9,9,9]
round(f*1/3)

3

In [34]:
a.insert(3,'\n')

In [35]:
a

[1, 2, 3, '\n', 4, 6, 7, 8, 9, 9, 9]

In [1]:
list_cols = {'ingdte':{'type':'STRING','comment':'วันเวลาที่ถ่ายโอนข้อมูลสู่ Big Data Platform'},'ingyer':{'type':'DECIMAL(4,0)','comment':'ปีที่ถ่ายโอนข้อมูลสู่ Big Data Platform'},'ingmth': {'type':'DECIMAL(2,0)','comment':'เดือนที่ถ่ายโอนข้อมูลสู่ Big Data Platform'},'ingday': {'type':'DECIMAL(2,0)','comment':'วันที่ถ่ายโอนข้อมูลสู่ Big Data Platform'}}
for col in list_cols:
    print(list_cols[col]['type'])

STRING
DECIMAL(4,0)
DECIMAL(2,0)
DECIMAL(2,0)


In [6]:
host = '192.168.170.224'
port = 1433
username = 'sa'
password = 'Password_123#'
database = 'master'
conn_string = f"mssql+pymssql://{username}:{password}@{host}:{port}/{database}"

In [7]:
conn_string

'mssql+pymssql://sa:Password_123#@192.168.170.224:1433/master'

In [8]:
engine = create_engine(conn_string)

ImportError: dlopen(/Users/nachanon/.pyenv/versions/3.11.2/lib/python3.11/site-packages/pymssql/_mssql.cpython-311-darwin.so, 0x0002): symbol not found in flat namespace '_bcp_batch'

In [9]:
conn_string = "postgresql://nachanon:1923@localhost:5432/postgres"
engine = create_engine(conn_string)
inspector = inspect(engine)
 
hive_schema = convert_schema_to_hive(engine, inspector, inspector.get_schema_names()[1],'postgres')

In [5]:
inspector.get_schema_names()

['information_schema', 'public']

In [10]:
hive_schema

{'test': [{'name': 'id',
   'hive_type': 'INT',
   'source_type': 'INTEGER',
   'comment': None}],
 'employees': [{'name': 'employee_id',
   'hive_type': 'INT',
   'source_type': 'INTEGER',
   'comment': None},
  {'name': 'first_name',
   'hive_type': 'STRING',
   'source_type': 'VARCHAR(50)',
   'comment': None},
  {'name': 'last_name',
   'hive_type': 'STRING',
   'source_type': 'VARCHAR(50)',
   'comment': None},
  {'name': 'email',
   'hive_type': 'STRING',
   'source_type': 'VARCHAR(100)',
   'comment': None},
  {'name': 'salary',
   'hive_type': 'DECIMAL(10,2)',
   'source_type': 'NUMERIC(10, 2)',
   'comment': None},
  {'name': 'hire_date',
   'hive_type': 'DATE',
   'source_type': 'DATE',
   'comment': None}],
 'testtest': [{'name': 'id',
   'hive_type': 'INT',
   'source_type': 'INTEGER',
   'comment': None},
  {'name': 'ingyer',
   'hive_type': 'DECIMAL(4,0)',
   'source_type': 'NUMERIC(4, 0)',
   'comment': None},
  {'name': 'ingmth',
   'hive_type': 'DECIMAL(2,0)',
   'sour