In [0]:
%run ../parse_sql

In [0]:
# query_string = """
# WITH
# -- First CTE: Calculate total salary and employee count by department
# DeptStats AS (
#     SELECT
#         DepartmentID,
#         SUM(Salary) AS TotalDeptSalary,
#         COUNT(*) AS NumEmployees,
#         AVG(Salary) AS AvgDeptSalary
#     FROM
#         Employees
#     GROUP BY
#         DepartmentID
# ),
# -- Second CTE: Count completed projects by employee and get latest project completion year
# EmpProjects AS (
#     SELECT
#         EmployeeID,
#         COUNT(ProjectID) AS ProjectsCompleted,
#         MAX(CompletedDate) AS LastProjectDate,
#         YEAR(MAX(CompletedDate)) AS LastProjectYear
#     FROM
#         Projects
#     WHERE
#         Status = 'Completed'
#     GROUP BY
#         EmployeeID
# )

# -- Main query with UNION ALL
# SELECT
#     e.EmployeeID,
#     UPPER(e.Name) AS Name, -- String function
#     d.Name AS Department,
#     -- CASE with subquery and function: Check if employee's salary is above department average
#     CASE
#         WHEN e.Salary > (
#             SELECT AVG(Salary)
#             FROM Employees
#             WHERE DepartmentID = e.DepartmentID
#         ) THEN CONCAT('Above Average (', CAST(e.Salary AS VARCHAR), ')')
#         ELSE 'Average or Below'
#     END AS SalaryStatus,
#     ep.ProjectsCompleted,
#     -- Date function: Extract hire year and month
#     YEAR(e.HireDate) AS HireYear,
#     MONTH(e.HireDate) AS HireMonth,
#     -- Use COALESCE to handle NULLs in LastProjectYear
#     COALESCE(ep.LastProjectYear, 'N/A') AS LastProjectYear
# FROM
#     Employees e
#     JOIN Departments d ON e.DepartmentID = d.DepartmentID
#     LEFT JOIN EmpProjects ep ON e.EmployeeID = ep.EmployeeID
# UNION ALL
# SELECT
#     NULL AS EmployeeID,
#     NULL AS Name,
#     d.Name AS Department,
#     CONCAT('Department Total: ', CAST(ds.TotalDeptSalary AS VARCHAR)) AS SalaryStatus,
#     ds.NumEmployees AS ProjectsCompleted,
#     NULL AS HireYear,
#     NULL AS HireMonth,
#     NULL AS LastProjectYear
# FROM
#     DeptStats ds
#     JOIN Departments d ON ds.DepartmentID = d.DepartmentID;
# """

# print_full_queries(query_string, extract_columns_flag=True)

In [0]:
query_string = """
WITH
DeptStats AS (
    SELECT
        DepartmentID,
        SUM(Salary) AS TotalDeptSalary,
        COUNT(*) AS NumEmployees,
        AVG(Salary) AS AvgDeptSalary
    FROM
        Employees
    GROUP BY
        DepartmentID
),
EmpProjects AS (
    SELECT
        EmployeeID,
        COUNT(ProjectID) AS ProjectsCompleted,
        MAX(CompletedDate) AS LastProjectDate,
        YEAR(MAX(CompletedDate)) AS LastProjectYear
    FROM
        Projects
    WHERE
        Status = 'Completed'
    GROUP BY
        EmployeeID
)

SELECT
    e.EmployeeID,
    UPPER(e.Name) AS Name,
    d.Name AS Department,
    CASE
        WHEN e.Salary > (
            SELECT AVG(Salary)
            FROM Employees
            WHERE DepartmentID = e.DepartmentID
        ) THEN CONCAT('Above Average (', CAST(e.Salary AS VARCHAR), ')')
        ELSE 'Average or Below'
    END AS SalaryStatus,
    ep.ProjectsCompleted,
    YEAR(e.HireDate) AS HireYear,
    MONTH(e.HireDate) AS HireMonth,
    COALESCE(ep.LastProjectYear, 'N/A') AS LastProjectYear
FROM
    Employees e
    JOIN Departments d ON e.DepartmentID = d.DepartmentID
    LEFT JOIN EmpProjects ep ON e.EmployeeID = ep.EmployeeID
WHERE
    e.EmployeeID IN (
        -- Subquery returning EmployeeIDs that meet some criteria
        SELECT
            e2.EmployeeID
        FROM
            Employees e2
            JOIN Departments d2 ON e2.DepartmentID = d2.DepartmentID
            LEFT JOIN EmpProjects ep2 ON e2.EmployeeID = ep2.EmployeeID
        WHERE
            -- Example filter: employees with salary above their department average
            e2.Salary > (
                SELECT AVG(Salary)
                FROM Employees
                WHERE DepartmentID = e2.DepartmentID
            )
    )
UNION ALL
SELECT
    NULL AS EmployeeID,
    NULL AS Name,
    d.Name AS Department,
    CONCAT('Department Total: ', CAST(ds.TotalDeptSalary AS VARCHAR)) AS SalaryStatus,
    ds.NumEmployees AS ProjectsCompleted,
    NULL AS HireYear,
    NULL AS HireMonth,
    NULL AS LastProjectYear
FROM
    DeptStats ds
    JOIN Departments d ON ds.DepartmentID = d.DepartmentID;

"""

print_full_queries(query_string, extract_columns_flag=True)

In [0]:
subquery_df = get_split_sql_as_dataframe(query_string, extract_columns_flag=True)
display(subquery_df.select("name", "original", "columns"))

In [0]:
converted_df = convert_and_get_dataframe(
    query_string, endpoint_name="databricks-claude-3-7-sonnet", extract_columns_flag=True, test_mode=True, target_table="users.paul_signorelli.sql_parsing_log"
)
# display(
#     converted_df.select("name", "original", "columns", "converted", "converted_columns", "response_error", "status")
# )

In [0]:
%sql
select * from users.paul_signorelli.sql_parsing_log;

In [0]:
assembled_df = assemble_final_query(converted_df, target_table="users.paul_signorelli.sql_parsing_log")

In [0]:
%sql
select * from users.paul_signorelli.sql_parsing_log;

In [0]:
prettified_value = prettify_final(get_main(assembled_df))
print(prettified_value)