In [15]:
import sys
import os

# Get the current working directory
current_dir = os.getcwd()

# Construct the path to the parent directory
parent_dir = os.path.dirname(current_dir)

# Add the parent directory to the Python path
sys.path.append(parent_dir)

In [21]:
from ddtools.json_excel_conversion import standardize_excel, dd_json_to_excel, dd_excel_to_json, standardize_json
from ddtools.custom_cols import get_col_headers
from ddtools.fetch_table_info import initialize_data_dict, list_files, update_data_dict
from ddtools.search import search_data_dicts
import shutil

# Example Usage of ddtools' Data Dictionary Functions
This notebook contains some of the most common use cases of the ddtools module. There are additional functions in that module that may be helpful aside from these, please review the function documentation in the module code if you would like to learn more.

You must have SQL access to the specified servers/tables. These scripts use data from outside what is contained in this repository alone, and therefor the file/SQL tables should be replaced with what is appropriate for your use case.

Please do not commit edits of this unless authorized. Instead, make a copy for your own usage in your own file. Make sure to list that file in .gitignore if you plan on committing.

## Standardizing Excel Data Dictionaries
The new standardized Excel files will be placed in the output_file path.

In [18]:
directories = [
    '..\\..\\data\\RPT',
]

# Get a list of all files in the directory
files = []
for directory in directories:
    files.extend(list_files(directory))

for file in files:
    print(file)
    names = file.split("\\")[-1].replace("_data_dict.xlsx", "").split(".")
    database_name = names[0]
    output_file = file.replace('RPT\\', 'RPT2\\')

    directory = "\\".join(output_file.split("\\")[0:-1])
    if not os.path.exists(directory):
        os.makedirs(directory)

    if '_data_dict.xlsx' in file:
        standardize_excel(file,
                            output_file,
                            make_json=False,
                            find_codes=True,
                            order_codes=True,
                            maintain_columns=True,
                            custom_col_names=get_col_headers(database_name),
                            include_web_sleds_info=False)
    else:
        shutil.copy(file, output_file)

..\..\data\RPT\RDMEnrollment.RPT.uvw_Attendance_data_dict.xlsx


                                                                                                                               

## Initializing Data Dictionaries
This will create the data dictionary skeleton populated by the column names and metadata that can be pulled directly from SQL.

In [19]:
# Read in the table names
with open('..\\..\\data\\rdm_assessments_tables.txt') as f:
    tables = [line.strip() for line in f]

server = 'EDU-SQLPROD01'
database = 'Assessments'
view = 'dbo'
for table in tables:
    data_dict = initialize_data_dict(server, database, view, table)
    file_name = f"..\\..\\data\\initialized\\{database}\\{database}.{view}.{table}_data_dict.xlsx"
    os.makedirs(os.path.dirname(file_name), exist_ok=True)
    dd_json_to_excel(data_dict, file_name)

Connection successful!
Connection successful!
uvw_ParticipationDetail


## Update Data Dictionaries
This will check for any new columns in the SQL table and add them to the data dictionary. Note that if you want to add new codes as well you must standardize the data dictionary too.

In [22]:
directories = ['data\\update']
files = []
for directory in directories:
    files.extend(list_files(directory))
for file in files:
    table_name = file.split("\\")[-1].replace(
        "_data_dict.xlsx", "").replace("_Data_Dictionary.xlsx", "")
    table_name = f"[EDU-SQLPROD01].[{table_name.replace('.', '].[')}]"
    names = table_name[1:-1].split('].[')
    server_name = names[0]
    database_name = names[1]
    view_name = names[2]
    table_name = names[3]

    print(table_name)
    dd = dd_excel_to_json(file)
    standardized_dd = standardize_json(dd,
                                        find_codes=True,
                                        order_codes=True)
    update_data_dict(server_name, database_name, view_name, table_name,
                        standardized_dd)
    dd_json_to_excel(standardized_dd, file.replace('update', 'updated'))

## Search Data Dictionaries

In [34]:
search_results = search_data_dicts(
    directories=['..\\..\\data\\EDU-SQLPROD01\\Assessments', '..\\..\\data\\EDU-SQLPROD01\\MARSS'],
    search_terms=['EL', 'English learner'],
    column_names=['Description'],
    search_term_op='OR',
    match_case=True
)
for result in search_results:
    print(result)

[EDU-SQLPROD01].[Assessments].[dbo].[Assessment_WIDA_VERIFIED_ACCESS].[timeInProgram]
[EDU-SQLPROD01].[Assessments].[dbo].[Assessment_WIDA_VERIFIED_WIDAALT].[timeInProgram]
[EDU-SQLPROD01].[Assessments].[dbo].[Assessment_WIDA_VERIFIED_WIDAALT].[StudentPerformanceELA]
[EDU-SQLPROD01].[Assessments].[dbo].[Assessment_WIDA_VERIFIED_WIDAALT].[ELServicesBilingual]
[EDU-SQLPROD01].[Assessments].[dbo].[Assessment_WIDA_VERIFIED_WIDAALT].[ELServicesConsultativeServices]
[EDU-SQLPROD01].[Assessments].[dbo].[Assessment_WIDA_VERIFIED_WIDAALT].[ELServicesCoteaching]
[EDU-SQLPROD01].[Assessments].[dbo].[Assessment_WIDA_VERIFIED_WIDAALT].[ELServicesESLClassPeriod]
[EDU-SQLPROD01].[Assessments].[dbo].[Assessment_WIDA_VERIFIED_WIDAALT].[ELServicesPulloutServices]
[EDU-SQLPROD01].[Assessments].[dbo].[Assessment_WIDA_VERIFIED_WIDAALT].[ELServicesPushinServices]
[EDU-SQLPROD01].[Assessments].[dbo].[Assessment_WIDA_VERIFIED_WIDAALT].[ELServicesShelteredorContentBasedInstruction]
[EDU-SQLPROD01].[Assessments