In [1]:
import os
import sys
import pandas as pd
sys.path.append("../ixf2json/")
import ixf2json

import pyodbc
import json
import sqlalchemy as sa

In [2]:
from IPython.core.display import display, HTML, Markdown
display(HTML("<style>.container { width:95% !important; }</style>"))

In [3]:
# # Uncomment and run these shell commands to download and extract the sample database files
# ! wget ftp://ftp.software.ibm.com/software/data/sample/GSDB_DB2_LUW_ZOS_v2r3.zip -P ../ 
# ! unzip ../GSDB_DB2_LUW_ZOS_v2r3.zip -d ../

In [4]:
os.listdir("..")

['example_notebooks',
 'json_data',
 'README.md',
 '.gitignore',
 'ixf2json',
 '.ipynb_checkpoints',
 'DB2',
 'environment.yaml',
 '.git',
 'cp4d-mortgage-artifacts',
 'creds']

In [4]:
# os.listdir("../json_data/")

In [22]:
log = {
    "json_conversion_success": [],
    "file_name_errors": [], 
    "file_conversion_errors": [],
    "file_extension_errors": [],
    "json_read_errors": [],
    "sql_insert_errors": []
    }


In [5]:
log = {
    "json_conversion_success": [],
    "file_name_errors": [], 
    "file_conversion_errors": [],
    "file_extension_errors": [],
    "json_read_errors": [],
    "sql_insert_errors": []
    }

def convert_ixf():
    for file in sorted(os.listdir("../DB2/data")):
        file_list = file.split(".")
        if len(file_list) > 2:
            print("file name error: ", file)
            log["file_name_errors"].append(file)
        else:
            filename = file_list[0]
            extension = file_list[-1]
            if extension != "ixf":
                print("file extension error: ", file)
                log["file_extension_errors"].append(file)
            else:
                try:
                    ixf2json.convert(f'../DB2/data/{file}', f'../json_data/{filename}.json')
                    log["json_conversion_success"].append(file)
                except Exception as e:
                    log["file_conversion_errors"].append({"file": file, "error_mssg": str(e)})
                    pass
        
    with open("log.json", "w") as f:
        json.dump(log, f)

In [6]:
convert_ixf()

file extension error:  EXPORT.out
file extension error:  GOSALES_VERSION.txt
file extension error:  db2move.lst
file extension error:  gsdb_constraints.sql
file extension error:  gsdb_readme.txt
file name error:  tab110a.001.xml
file name error:  tab9a.001.lob


In [15]:
with open('log.json', 'r') as f:
     log = json.load(f)

In [16]:
log.keys()

dict_keys(['json_conversion_success', 'file_name_errors', 'file_conversion_errors', 'file_extension_errors', 'json_read_errors', 'sql_insert_errors'])

In [17]:
log["file_name_errors"]

[]

In [18]:
len(log["json_conversion_success"])

0

In [19]:
log["file_conversion_errors"]

[]

In [20]:
json_tables = os.listdir("../json_data/")

In [13]:
ixf_tables = os.listdir("../DB2/data")

In [14]:
len(ixf_tables)

158

In [15]:
len(json_tables)

151

In [16]:
for record in log["file_conversion_errors"]:
    file_lst = record["file"].split(".")
    extension = file_lst[-1]

    if extension == "ixf":
        filename = file_lst[0]
        print(filename)
        with open(f"../json_data/{filename}.json", "r") as f:
            json_obj = json.load(f)
        print(filename, len(json_obj))

## Parsing list that maps filenames to table/schema names

In [5]:
ref_lst = []

schema_set = set()

with open("../DB2/data/db2move.lst", "r") as f:
    for line in f.readlines():
        lst = line.split("!")
        schema, table = lst[1].split(".")
        schema = schema.replace('"', '' ).strip()
        table = table.replace('"', '' ).strip()
        file = lst[2]
        filename = lst[2].split(".")[0]
        result = dict(
            schema=schema,
            table=table,
            file=file,
            filename=filename
        )
        schema_set.add(schema)
        ref_lst.append(result)

In [6]:
ref_df = pd.DataFrame(ref_lst)

In [7]:
ref_df.shape

(151, 4)

In [8]:
ref_df.head()

Unnamed: 0,schema,table,file,filename
0,GOSALESRT,ACTIVITY_STATUS_LOOKUP,tab1.ixf,tab1
1,GOSALES,BRANCH,tab2.ixf,tab2
2,GOSALES,CONVERSION_RATE,tab3.ixf,tab3
3,GOSALES,COUNTRY,tab4.ixf,tab4
4,GOSALES,CURRENCY_LOOKUP,tab5.ixf,tab5


In [9]:
json_tables = os.listdir("../json_data/")

In [10]:
json_tables.sort(key=lambda x: int(x.strip("tab").strip(".json")))

In [11]:
len(json_tables)

151

In [12]:
schema_set

{'GOSALES', 'GOSALESCT', 'GOSALESDW', 'GOSALESHR', 'GOSALESMR', 'GOSALESRT'}

In [13]:
ref_df

Unnamed: 0,schema,table,file,filename
0,GOSALESRT,ACTIVITY_STATUS_LOOKUP,tab1.ixf,tab1
1,GOSALES,BRANCH,tab2.ixf,tab2
2,GOSALES,CONVERSION_RATE,tab3.ixf,tab3
3,GOSALES,COUNTRY,tab4.ixf,tab4
4,GOSALES,CURRENCY_LOOKUP,tab5.ixf,tab5
...,...,...,...,...
146,GOSALES,TIME_QUARTER_LOOKUP,tab147.ixf,tab147
147,GOSALESHR,TRAINING,tab148.ixf,tab148
148,GOSALESHR,TRAINING_DETAILS,tab149.ixf,tab149
149,GOSALES,XGOREV,tab150.ixf,tab150


## Read in JSONs into pandas, display and record metadata, log errors

In [23]:
ref_df["col_count"] = pd.NA
ref_df["row_count"] = pd.NA

for json_table in json_tables:
    filename = json_table.split(".")[0]
#     display(Markdown(f"### {filename}"))
    try:
        df = pd.read_json(f"../json_data/{json_table}", lines=False)
        row_ct, col_ct = df.shape
#         display(Markdown(f"#### Size: {df_shape}"))
        ref_df.loc[ref_df["filename"] == filename, ["row_count"]] = row_ct
        ref_df.loc[ref_df["filename"] == filename, ["col_count"]] = col_ct

    except:
        display(Markdown(f"#### Error reading file {json_table}"))
        log["json_read_errors"].append(json_table)
        pass

#### Error reading file tab110.json

In [24]:
ref_df.sort_values("row_count", ascending=False).head(10)

Unnamed: 0,schema,table,file,filename,col_count,row_count
140,GOSALESDW,SLS_SALES_ORDER_DIM,tab141.ixf,tab141,4,446023
87,GOSALES,ORDER_DETAILS,tab88.ixf,tab88,9,446023
139,GOSALESDW,SLS_SALES_FACT,tab140.ixf,tab140,17,446023
141,GOSALESDW,SLS_SALES_TARG_FACT,tab142.ixf,tab142,8,233625
128,GOSALES,SALES_TARGET,tab129.ixf,tab129,9,233625
79,GOSALESDW,MRK_PRODUCT_SURVEY_FACT,tab80.ixf,tab80,6,165074
101,GOSALESMR,PRODUCT_SURVEY_RESULTS,tab102.ixf,tab102,6,165074
59,GOSALESDW,FIN_FINANCE_FACT,tab60.ixf,tab60,6,164132
97,GOSALES,PRODUCT_FORECAST,tab98.ixf,tab98,7,129096
19,GOSALESDW,DIST_PRODUCT_FORECAST_FACT,tab20.ixf,tab20,7,129096


In [21]:
bad_jsons = set(log["json_read_errors"])

In [22]:
bad_jsons

{'tab110.json'}

## Insert data into SQL DB

### Make sql connection
You will need to change the connection details in `creds.json` in order to connect to your own SQL instance.

In [27]:
! ls ../

[34mDB2[m[m                     [34mcreds[m[m                   [34mixf2json[m[m
README.md               environment.yaml        [34mjson_data[m[m
[34mcp4d-mortgage-artifacts[m[m [34mexample_notebooks[m[m


In [45]:
with open("../creds/creds.json", "r") as f:
    all_creds = json.load(f)

all_creds.keys()

creds = all_creds["ms-sqlserver-openshift"]

In [35]:
creds = dict(
    username="sa",
    password="P@ssw0rd",
    host="127.0.0.1",
    port=1433,
    db="tempdb"
)

In [46]:
drivers = pyodbc.drivers()
driver = drivers[0]

with open("../creds/creds.json", "r") as f:
    all_creds = json.load(f)

# creds = all_creds["ms-sqlserver"]
locals().update(creds)  # Creating local variables from the JSON entries for this database

connxn_string = f"mssql+pyodbc://{username}:{password}@{host}:{port}/{db}?driver={driver}"

In [47]:
connxn_string

'mssql+pyodbc://sa:P@ssw0rd@mssql-service-mssql.apps.test-dbs.ocp.csplab.local:31433/tempdb?driver=ODBC Driver 17 for SQL Server'

In [48]:
# import sqlalchemy_utils as sa_utils
# sa_utils.functions.drop_database(connxn_string)

In [49]:
engine = sa.create_engine(
    connxn_string, 
    connect_args={'autocommit':True},  # This is necessary for some reason.
    fast_executemany=True  # This provides a substantial speed-up for query execution.
)

In [50]:
insp = sa.inspect(engine)


In [51]:
db_list = insp.get_schema_names()
print(db_list)

['db_accessadmin', 'db_backupoperator', 'db_datareader', 'db_datawriter', 'db_ddladmin', 'db_denydatareader', 'db_denydatawriter', 'db_owner', 'db_securityadmin', 'dbo', 'guest', 'INFORMATION_SCHEMA', 'sys']


In [52]:
with engine.connect() as con:
    con.execute("USE master;") 
    con.execute("CREATE DATABASE GSDB;")
    con.execute("USE GSDB;")

In [53]:
# Create Schemata
with engine.connect() as con:
    for schema in schema_set:  
        con.execute(f"CREATE SCHEMA {schema};")

### Insert data using SQL Alchemy and Pandas

In [54]:
def insert_tables():
    log["sql_insert_errors"] = []
    for json_table in json_tables:
        # read in JSON into pandas, display metadata
        filename = json_table.split(".")[0]
        display(Markdown(f"### {filename}"))
        if json_table not in bad_jsons:
            df = pd.read_json(f"../json_data/{json_table}")
        
        num_cols = df.shape[1]  
        tsql_chunksize = 2097 // num_cols  # limit based on sp_prepexec parameter count
        tsql_chunksize = 1000 if tsql_chunksize > 1000 else tsql_chunksize  # cap at 1000

        # Lookup table name and schema name from reference df
        ref_row = ref_df.query("filename == @filename")
        schema = ref_row["schema"].values[0]
        table = ref_row["table"].values[0]
        print(f"{schema}.{table}")
        print(df.shape)
        
        type_mapping = {col : sa.types.NVARCHAR for col in df.select_dtypes("O")}
        
        # Write df to sql
        try:
            df.to_sql(
                con=engine,
                name=table, 
                schema=schema,
                if_exists="replace",
                method="multi",
                index=False,
                dtype=type_mapping,
                chunksize=tsql_chunksize
                     )
        except Exception as e:
            display(Markdown("# ERROR"))
            log["sql_insert_errors"].append({"json_table": json_table, "error_mssg": str(e)})
            pass

        print("*" * 20)

    with open("log.json", "w") as f:
            json.dump(log, f)

In [55]:
insert_tables()

### tab83

GOSALESDW.MRK_PROMOTION_FACT
(11034, 13)
********************


### tab101

GOSALES.PRODUCT_SIZE_LOOKUP
(55, 24)
********************


### tab140

GOSALESDW.SLS_SALES_FACT
(446023, 17)
********************


### tab95

GOSALES.PRODUCT
(274, 10)
********************


### tab117

GOSALESRT.RETAILER
(562, 5)
********************


### tab4

GOSALES.COUNTRY
(21, 30)
********************


### tab56

GOSALESDW.FIN_ACCOUNT_CLASS_LOOKUP
(5, 29)
********************


### tab40

GOSALESDW.EMP_RANKING_FACT
(1897, 6)
********************


### tab17

GOSALESCT.CUST_PROFESSION
(156, 3)
********************


### tab60

GOSALESDW.FIN_FINANCE_FACT
(164132, 6)
********************


### tab37

GOSALESDW.EMP_POSITION_LOOKUP
(57, 24)
********************


### tab21

GOSALESDW.DIST_RETURNED_ITEMS_FACT
(10249, 10)
********************


### tab76

GOSALESDW.MRK_ACTIVITY_STATUS_DIM
(2, 24)
********************


### tab99

GOSALES.PRODUCT_LINE
(5, 24)
********************


### tab8

GOSALESCT.CUST_CRDT_CHECK
(900, 3)
********************


### tab137

GOSALESDW.SLS_PRODUCT_SIZE_LOOKUP
(55, 24)
********************


### tab121

GOSALESRT.RETAILER_SITE_MB
(847, 9)
********************


### tab120

GOSALESRT.RETAILER_SITE
(847, 9)
********************


### tab136

GOSALESDW.SLS_PRODUCT_LOOKUP
(6302, 4)
********************


### tab9

GOSALESCT.CUST_CUSTOMER
(31255, 22)
********************


### tab77

GOSALESDW.MRK_BUNDLE_GROUP_LOOKUP
(15, 24)
********************


### tab98

GOSALES.PRODUCT_FORECAST
(129096, 7)
********************


### tab20

GOSALESDW.DIST_PRODUCT_FORECAST_FACT
(129096, 7)
********************


### tab36

GOSALESDW.EMP_POSITION_DIM
(57, 9)
********************


### tab61

GOSALESDW.FIN_SUBM_CURRENCY_LOOKUP
(7, 24)
********************


### tab16

GOSALESCT.CUST_PRICE
(8742, 11)
********************


### tab41

GOSALESDW.EMP_RECRUITMENT_DIM
(14, 49)
********************


### tab57

GOSALESDW.FIN_ACCOUNT_DIM
(242, 24)
********************


### tab5

GOSALES.CURRENCY_LOOKUP
(21, 24)
********************


### tab116

GOSALESHR.RECRUITMENT_TYPE
(7, 24)
********************


### tab94

GOSALESHR.POSITION_SUMMARY
(15050, 9)
********************


### tab141

GOSALESDW.SLS_SALES_ORDER_DIM
(446023, 4)
********************


### tab100

GOSALES.PRODUCT_NAME_LOOKUP
(6302, 4)
********************


### tab82

GOSALESDW.MRK_PROMOTION_DIM
(112, 27)
********************


### tab66

GOSALESCT.GO_CRDT_METHOD
(6, 3)
********************


### tab89

GOSALES.ORDER_HEADER
(53267, 10)
********************


### tab31

GOSALESDW.EMP_EMPLOYEE_DIM
(972, 50)
********************


### tab27

GOSALESHR.EMPLOYEE_SUMMARY
(24233, 3)
********************


### tab70

GOSALESDW.GO_REGION_DIM
(21, 54)
********************


### tab131

GOSALESDW.SLS_ORDER_METHOD_DIM
(7, 25)
********************


### tab127

GOSALES.RETURN_REASON
(5, 24)
********************


### tab85

GOSALESDW.MRK_RTL_SURVEY_DIM
(9, 25)
********************


### tab107

GOSALESMR.PROMOTION_BUNDLE_GROUP
(15, 24)
********************


### tab150

GOSALES.XGOREV
(17, 9)
********************


### tab146

GOSALES.TIME_DIMENSION
(1465, 50)
********************


### tab93

GOSALESHR.POSITION_LOOKUP
(45, 24)
********************


### tab111

GOSALESCT.PTNR_CONTACT
(3, 21)
********************


### tab2

GOSALES.BRANCH
(29, 13)
********************


### tab50

GOSALESDW.EMP_TRAINING_DIM
(42, 26)
********************


### tab46

GOSALESDW.EMP_SURVEY_FACT
(5725, 5)
********************


### tab11

GOSALESCT.CUST_INTEREST_LOOKUP
(338, 3)
********************


### tab10

GOSALESCT.CUST_INTEREST
(31255, 3)
********************


### tab47

GOSALESDW.EMP_SURVEY_TARG_FACT
(20, 2)
********************


### tab51

GOSALESDW.EMP_TRAINING_FACT
(4465, 7)
********************


### tab3

GOSALES.CONVERSION_RATE
(624, 1)
********************


### tab110

GOSALESCT.PTNR_ACTIVITY
(624, 1)
********************


### tab92

GOSALESHR.POSITION_DEPARTMENT
(45, 5)
********************


### tab147

GOSALES.TIME_QUARTER_LOOKUP
(20, 24)
********************


### tab151

GOSALESDW.XGOREV
(28, 9)
********************


### tab106

GOSALESMR.PROMOTIONS
(112, 26)
********************


### tab84

GOSALESDW.MRK_PROMOTION_PLAN_FACT
(8652, 11)
********************


### tab126

GOSALES.RETURNED_ITEM
(10249, 9)
********************


### tab130

GOSALESHR.SATISFACTION_INDEX
(5, 24)
********************


### tab71

GOSALESCT.GO_SALES_TAX
(94, 3)
********************


### tab26

GOSALESHR.EMPLOYEE_HISTORY
(972, 12)
********************


### tab30

GOSALESHR.EMPLOYEE_SURVEY_TOPIC
(5, 24)
********************


### tab67

GOSALESDW.GO_GENDER_LOOKUP
(2, 23)
********************


### tab88

GOSALES.ORDER_DETAILS
(446023, 9)
********************


### tab48

GOSALESDW.EMP_SURVEY_TOPIC_DIM
(5, 25)
********************


### tab125

GOSALESRT.RETAILER_TYPE
(8, 24)
********************


### tab133

GOSALESDW.SLS_PRODUCT_COLOR_LOOKUP
(27, 24)
********************


### tab72

GOSALESDW.GO_SATISFACTION_DIM
(5, 25)
********************


### tab25

GOSALESHR.EMPLOYEE_EXPENSE_PLAN
(37317, 5)
********************


### tab148

GOSALESHR.TRAINING
(42, 25)
********************


### tab33

GOSALESDW.EMP_EXPENSE_PLAN_FACT
(30150, 5)
********************


### tab64

GOSALESHR.GENDER_LOOKUP
(2, 23)
********************


### tab109

GOSALESMR.PROMOTION_PLAN
(8652, 12)
********************


### tab13

GOSALESCT.CUST_ORDER_DETAIL
(60252, 9)
********************


### tab129

GOSALES.SALES_TARGET
(233625, 9)
********************


### tab44

GOSALESDW.EMP_SUCCESSION_STATUS_DIM
(5, 25)
********************


### tab52

GOSALES.EURO_CONVERSION
(8, 1)
********************


### tab113

GOSALESHR.RANKING_RESULTS
(1898, 4)
********************


### tab91

GOSALESHR.ORGANIZATION
(123, 25)
********************


### tab144

GOSALESHR.SUCCESSOR_STATUS
(5, 24)
********************


### tab29

GOSALESHR.EMPLOYEE_SURVEY_TARGETS
(20, 3)
********************


### tab105

GOSALES.PRODUCT_TYPE
(21, 25)
********************


### tab87

GOSALESDW.MRK_RTL_SURVEY_TARG_FACT
(64, 3)
********************


### tab68

GOSALESDW.GO_ORG_DIM
(123, 7)
********************


### tab86

GOSALESDW.MRK_RTL_SURVEY_FACT
(22508, 6)
********************


### tab69

GOSALESDW.GO_ORG_NAME_LOOKUP
(123, 24)
********************


### tab104

GOSALESMR.PRODUCT_SURVEY_TOPIC
(7, 24)
********************


### tab28

GOSALESHR.EMPLOYEE_SURVEY_RESULTS
(5725, 5)
********************


### tab145

GOSALESHR.TERMINATION_LOOKUP
(6, 24)
********************


### tab90

GOSALES.ORDER_METHOD
(7, 24)
********************


### tab112

GOSALESHR.RANKING
(5, 24)
********************


### tab1

GOSALESRT.ACTIVITY_STATUS_LOOKUP
(2, 23)
********************


### tab53

GOSALESHR.EXPENSE_GROUP
(10, 24)
********************


### tab45

GOSALESDW.EMP_SUMMARY_FACT
(24233, 5)
********************


### tab128

GOSALES.SALES_REGION
(5, 24)
********************


### tab12

GOSALESCT.CUST_MARITAL_STATUS
(39, 3)
********************


### tab108

GOSALESMR.PROMOTION_CAMPAIGN
(12, 24)
********************


### tab65

GOSALESDW.GO_BRANCH_DIM
(29, 13)
********************


### tab32

GOSALESDW.EMP_EXPENSE_FACT
(127984, 7)
********************


### tab149

GOSALESHR.TRAINING_DETAILS
(4471, 4)
********************


### tab24

GOSALESHR.EMPLOYEE_EXPENSE_DETAIL
(127997, 6)
********************


### tab73

GOSALESDW.GO_TIME_DIM
(1465, 51)
********************


### tab132

GOSALESDW.SLS_PRODUCT_BRAND_LOOKUP
(28, 24)
********************


### tab124

GOSALESMR.RETAILER_SURVEY_TOPIC
(9, 24)
********************


### tab49

GOSALESDW.EMP_TERMINATION_LOOKUP
(6, 24)
********************


### tab15

GOSALESCT.CUST_ORDER_STATUS
(65, 3)
********************


### tab42

GOSALESDW.EMP_RECRUITMENT_FACT
(416, 9)
********************


### tab139

GOSALESDW.SLS_RTL_DIM
(847, 73)
********************


### tab54

GOSALESHR.EXPENSE_TYPE
(39, 27)
********************


### tab6

GOSALESCT.CUST_COUNTRY
(23, 30)
********************


### tab115

GOSALESHR.RECRUITMENT_MEDIUM
(14, 25)
********************


### tab97

GOSALES.PRODUCT_COLOR_LOOKUP
(27, 24)
********************


### tab78

GOSALESDW.MRK_CAMPAIGN_LOOKUP
(12, 24)
********************


### tab142

GOSALESDW.SLS_SALES_TARG_FACT
(233625, 8)
********************


### tab39

GOSALESDW.EMP_RANKING_DIM
(5, 25)
********************


### tab103

GOSALESMR.PRODUCT_SURVEY_TARGETS
(5824, 4)
********************


### tab81

GOSALESDW.MRK_PROD_SURVEY_TARG_FACT
(5824, 3)
********************


### tab19

GOSALESDW.DIST_INVENTORY_FACT
(53837, 10)
********************


### tab123

GOSALESMR.RETAILER_SURVEY_TARGETS
(64, 2)
********************


### tab58

GOSALESDW.FIN_ACCOUNT_NAME_LOOKUP
(242, 24)
********************


### tab135

GOSALESDW.SLS_PRODUCT_LINE_LOOKUP
(5, 24)
********************


### tab74

GOSALESDW.GO_TIME_QUARTER_LOOKUP
(20, 24)
********************


### tab119

GOSALESRT.RETAILER_CONTACT
(847, 33)
********************


### tab23

GOSALESHR.EMPLOYEE
(766, 13)
********************


### tab35

GOSALESDW.EMP_EXPENSE_UNIT_LOOKUP
(3, 24)
********************


### tab62

GOSALESDW.FIN_SUBM_DIM
(52, 28)
********************


### tab63

GOSALESDW.FIN_SUBM_TYPE_LOOKUP
(3, 24)
********************


### tab34

GOSALESDW.EMP_EXPENSE_TYPE_DIM
(39, 50)
********************


### tab22

GOSALESDW.DIST_RETURN_REASON_DIM
(5, 25)
********************


### tab118

GOSALESRT.RETAILER_ACTIVITY
(17754, 2)
********************


### tab75

GOSALES.INVENTORY_LEVELS
(53837, 8)
********************


### tab134

GOSALESDW.SLS_PRODUCT_DIM
(274, 14)
********************


### tab59

GOSALESDW.FIN_ACCOUNT_TYPE_LOOKUP
(4, 29)
********************


### tab122

GOSALESMR.RETAILER_SURVEY_RESULTS
(22508, 6)
********************


### tab18

GOSALESHR.DEPARTMENT_LOOKUP
(12, 24)
********************


### tab80

GOSALESDW.MRK_PRODUCT_SURVEY_FACT
(165074, 6)
********************


### tab102

GOSALESMR.PRODUCT_SURVEY_RESULTS
(165074, 6)
********************


### tab38

GOSALESDW.EMP_POSITION_SUMMARY_FACT
(15050, 8)
********************


### tab143

GOSALESHR.SUCCESSION_DETAILS
(182, 6)
********************


### tab96

GOSALES.PRODUCT_BRAND
(28, 24)
********************


### tab79

GOSALESDW.MRK_PRODUCT_SURVEY_DIM
(7, 25)
********************


### tab114

GOSALESHR.RECRUITMENT
(416, 8)
********************


### tab7

GOSALESCT.CUST_CRDT_CARD
(31255, 6)
********************


### tab55

GOSALESHR.EXPENSE_UNIT
(3, 24)
********************


### tab138

GOSALESDW.SLS_PRODUCT_TYPE_LOOKUP
(21, 24)
********************


### tab43

GOSALESDW.EMP_SUCCESSION_FACT
(181, 7)
********************


### tab14

GOSALESCT.CUST_ORDER_HEADER
(39389, 13)
********************


## Check to make sure it worked

In [56]:
for json_table in json_tables:
    # read in JSON into pandas, display metadata
    filename = json_table.split(".")[0]
    display(Markdown(f"#### {filename}"))
    # if json_table not in bad_jsons:
    # Lookup table name and schema name from reference df
    ref_row = ref_df.query("filename == @filename")
    schema = ref_row["schema"].values[0]
    table = ref_row["table"].values[0]
    print(f"{schema}.{table}")
    if json_table not in bad_jsons:
        df = pd.read_sql_table(table, con=engine, schema=schema)
        print(df.shape)
        display(df.head())
    print("*" * 90)

#### tab83

GOSALESDW.MRK_PROMOTION_FACT


OperationalError: (pyodbc.OperationalError) ('08S01', '[08S01] [Microsoft][ODBC Driver 17 for SQL Server]Communication link failure (0) (SQLPrepare)')
[SQL: SELECT [INFORMATION_SCHEMA].[TABLES].[TABLE_NAME] 
FROM [INFORMATION_SCHEMA].[TABLES] 
WHERE [INFORMATION_SCHEMA].[TABLES].[TABLE_SCHEMA] = CAST(? AS NVARCHAR(max)) AND [INFORMATION_SCHEMA].[TABLES].[TABLE_TYPE] = CAST(? AS NVARCHAR(max)) ORDER BY [INFORMATION_SCHEMA].[TABLES].[TABLE_NAME]]
[parameters: ('GOSALESDW', 'BASE TABLE')]
(Background on this error at: http://sqlalche.me/e/13/e3q8)

## Troubleshooting

In [61]:
log.keys()

dict_keys(['json_conversion_success', 'file_name_errors', 'file_conversion_errors', 'file_extension_errors', 'json_read_errors', 'sql_insert_errors'])

In [62]:
for record in log["sql_insert_errors"]:
    print(record["json_table"], record["error_mssg"][:200])
    print("\n")

In [63]:
log["file_conversion_errors"]

[]

In [170]:
filename = "tab129"

In [171]:
json_table = filename + ".json"

In [156]:
df = pd.read_json(f"../json_data/{json_table}")

In [78]:
df.head()

Unnamed: 0,PRODUCT_LINE_CODE,PRODUCT_LINE_EN,PRODUCT_LINE_FR,PRODUCT_LINE_DE,PRODUCT_LINE_NL,PRODUCT_LINE_SC,PRODUCT_LINE_KO,PRODUCT_LINE_JA,PRODUCT_LINE_CS,PRODUCT_LINE_HU,...,PRODUCT_LINE_FI,PRODUCT_LINE_IT,PRODUCT_LINE_NO,PRODUCT_LINE_PL,PRODUCT_LINE_RU,PRODUCT_LINE_SV,PRODUCT_LINE_ES,PRODUCT_LINE_TH,PRODUCT_LINE_MS,PRODUCT_LINE_ID
0,994,Outdoor Protection,Articles de protection,Outdoor-Schutzausrüstung,Buitensport - preventie,户外防护用品,야외 보호 장비,アウトドア用保護用品,Vybavení do přírody,Védőfelszerelés,...,Ulkoiluvarusteet,Protezione personale,Utendørs beskyttelse,Wyposażenie ochronne,Средства защиты,Skyddsartiklar,Protección aire libre,สิ่งป้องกันเมื่ออยู่กลางแจ้ง,Perlindungan Luar Bangunan,Perlindungan Luar Ruang
1,991,Camping Equipment,Matériel de camping,Campingausrüstung,Kampeerbenodigdheden,露营装备,캠핑 장비,キャンプ用品,Vybavení pro kempování,Kempingfelszerelés,...,Retkeilyvarusteet,Attrezzatura per campeggio,Campingutstyr,Ekwipunek kempingowy,Снаряжение для туризма,Campingutrustning,Equipo de acampada,อุปกรณ์ตั้งแคมป์,Kelengkapan Berkhemah,Perlengkapan Berkemah
2,995,Golf Equipment,Matériel de golf,Golfausrüstung,Golfartikelen,高尔夫球装备,골프 장비,ゴルフ用品,Golfové potřeby,Golffelszerelés,...,Golf-varusteet,Attrezzatura da golf,Golfutstyr,Ekwipunek golfowy,Снаряжение для гольфа,Golfutrustning,Equipo de golf,อุปกรณ์กอล์ฟ,Kelengkapan Golf,Perlengkapan Golf
3,993,Personal Accessories,Accessoires personnels,Accessoires,Persoonlijke accessoires,个人附件,개인 용품,個人装備,Věci osobní potřeby,Személyes kiegészítők,...,Henkilökohtaiset tarvikkeet,Accessori personali,Personlig utrustning,Akcesoria osobiste,Личные принадлежности,Personliga tillbehör,Accesorios personales,อุปกรณ์ส่วนตัว,Aksesori Diri,Aksesori pribadi
4,992,Mountaineering Equipment,Matériel de montagne,Bergsteigerausrüstung,Bergsportartikelen,登山装备,등산 장비,登山用品,Horolezecké vybavení,Hegymászó-felszerelés,...,Vuorikiipeilyvarusteet,Attrezzatura per alpinismo,Klatreutstyr,Sprzęt wspinaczkowy,Горное снаряжение,Klätterutrustning,Equipo de montañismo,อุปกรณ์ปีนเขา,Kelengkapan Mendaki Gunung,Perlengkapan Pendaki Gunung


In [174]:
df.shape

(233625, 9)

In [176]:
ref_row = ref_df.query("filename == @filename")
schema = ref_row["schema"].values[0]
display(Markdown(f"#### Schema: {schema}"))
table = ref_row["table"].values[0]
display(Markdown(f"#### Table Name: {table}"))

#### Schema: GOSALES

#### Table Name: SALES_TARGET