Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31,448 changes: 15,724 additions & 15,724 deletions __fixtures__/generated/generated.json

Large diffs are not rendered by default.

1,594 changes: 1,594 additions & 0 deletions __fixtures__/generated/upstream-diff.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion __fixtures__/kitchen-sink/original/copy.sql
Original file line number Diff line number Diff line change
@@ -1 +1 @@
COPY (SELECT 1) TO 'test.csv' WITH (FORMAT 'CSV');
COPY (SELECT 1) TO '/test.csv' WITH (FORMAT CSV);
1 change: 1 addition & 0 deletions packages/deparser/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"fixtures:ast": "ts-node scripts/make-fixtures-ast.ts",
"fixtures:sql": "ts-node scripts/make-fixtures-sql.ts",
"fixtures": "ts-node scripts/make-fixtures.ts",
"fixtures:upstream-diff": "ts-node scripts/make-upstream-diff.ts",
"lint": "eslint . --fix",
"test": "jest",
"test:watch": "jest --watch"
Expand Down
37 changes: 19 additions & 18 deletions packages/deparser/scripts/make-fixtures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import * as path from 'path';
import * as fs from 'fs';
import { sync as globSync } from 'glob';
import { parse, deparse } from 'libpg-query';
import { ParseResult, RawStmt } from '@pgsql/types';
import { parse } from 'libpg-query';
import { splitStatements, generateStatementKey } from '../src/utils/statement-splitter';

const FIXTURE_DIR = path.join(__dirname, '../../../__fixtures__/kitchen-sink');
const OUT_DIR = path.join(__dirname, '../../../__fixtures__/generated');
Expand All @@ -19,32 +19,33 @@ ensureDir(OUT_DIR);
const fixtures = globSync(path.join(FIXTURE_DIR, '**/*.sql'));

async function main() {
// Collect deparsed SQL in a single JSON
// Collect original SQL in a single JSON
const results: Record<string, string> = {};

for (const fixturePath of fixtures) {
const relPath = path.relative(FIXTURE_DIR, fixturePath);
const sql = fs.readFileSync(fixturePath, 'utf-8');
let parseResult: ParseResult;

try {
parseResult = await parse(sql);
const statements = await splitStatements(sql);

for (const stmt of statements) {
const key = generateStatementKey(relPath, stmt.index);

// Validate that the extracted statement parses correctly on its own
try {
await parse(stmt.statement);
results[key] = stmt.statement;
} catch (parseErr: any) {
console.error(`Failed to parse extracted statement ${key}:`, parseErr.message);
console.error(`Statement: ${stmt.statement.substring(0, 200)}${stmt.statement.length > 200 ? '...' : ''}`);
// Skip this statement - don't add it to results
}
}
} catch (err: any) {
console.error(`Failed to parse ${relPath}:`, err);
continue;
}

for (let idx = 0; idx < parseResult.stmts.length; idx++) {
const stmt = parseResult.stmts[idx];
let deparsedSql: string;
try {
deparsedSql = await deparse({ version: 170000, stmts: [stmt] });
} catch (err: any) {
console.error(`Failed to deparse statement ${idx + 1} in ${relPath}:`, err);
continue;
}
const key = `${relPath.replace(/\.sql$/, '')}-${idx + 1}.sql`;
results[key] = deparsedSql;
}
}

// Write aggregated JSON to output file
Expand Down
105 changes: 105 additions & 0 deletions packages/deparser/scripts/make-upstream-diff.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/env ts-node
import * as path from 'path';
import * as fs from 'fs';
import { sync as globSync } from 'glob';
import { parse, deparse } from 'libpg-query';
import { ParseResult, RawStmt } from '@pgsql/types';
import { deparse as ourDeparse } from '../src';
import { cleanTree } from '../src/utils';
import { splitStatements, generateStatementKey } from '../src/utils/statement-splitter';

const FIXTURE_DIR = path.join(__dirname, '../../../__fixtures__/kitchen-sink');
const OUT_DIR = path.join(__dirname, '../../../__fixtures__/generated');

function ensureDir(dir: string) {
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
}

ensureDir(OUT_DIR);

const fixtures = globSync(path.join(FIXTURE_DIR, '**/*.sql'));

async function main() {
// Collect only files with differences between deparsers
const results: Record<string, { upstream?: string; deparsed?: string; original: string }> = {};

for (const fixturePath of fixtures) {
const relPath = path.relative(FIXTURE_DIR, fixturePath);
const sql = fs.readFileSync(fixturePath, 'utf-8');

try {
const statements = await splitStatements(sql);

for (const stmt of statements) {
// We need the original statement to get the RawStmt for deparsing
const parseResult = await parse(sql);
const rawStmt = parseResult.stmts[stmt.index];

// Get source of truth: cleanTree(parse(original))
let sourceOfTruthAst: any;
try {
const originalParsed = await parse(stmt.statement);
sourceOfTruthAst = cleanTree(originalParsed.stmts?.[0]?.stmt);
} catch (err: any) {
console.error(`Failed to parse original SQL for statement ${stmt.index + 1} in ${relPath}:`, err);
continue;
}

// Get upstream deparse and its AST
let upstreamSql: string | undefined;
let upstreamAst: any;
try {
upstreamSql = await deparse({ version: 170000, stmts: [rawStmt] });
const upstreamParsed = await parse(upstreamSql);
upstreamAst = cleanTree(upstreamParsed.stmts?.[0]?.stmt);
} catch (err: any) {
console.error(`Failed to process upstream deparse for statement ${stmt.index + 1} in ${relPath}:`, err);
continue;
}

// Get our deparse and its AST
let ourDeparsedSql: string | undefined;
let ourAst: any;
let ourDeParseError = false;
try {
ourDeparsedSql = ourDeparse(rawStmt.stmt);
const ourParsed = await parse(ourDeparsedSql);
ourAst = cleanTree(ourParsed.stmts?.[0]?.stmt);
} catch (err: any) {
console.error(`Failed to process our deparse for statement ${stmt.index + 1} in ${relPath}:`, err);
ourDeParseError = true;
// Keep ourDeparsedSql so we can still show it in results even if it doesn't parse
}

// Compare ASTs to source of truth only
const upstreamMatches = JSON.stringify(upstreamAst) === JSON.stringify(sourceOfTruthAst);
const ourMatches = ourAst ? JSON.stringify(ourAst) === JSON.stringify(sourceOfTruthAst) : false;


// Only include if either deparser differs from original OR our deparser failed to parse
if (!upstreamMatches || !ourMatches || ourDeParseError) {
const key = generateStatementKey(relPath, stmt.index);
results[key] = {
original: stmt.statement,
// Show upstream only if it differs from original
...(!upstreamMatches && upstreamSql && { upstream: upstreamSql }),
// Show our deparser if it differs from original OR if it failed to parse (both indicate issues)
...((!ourMatches || ourDeParseError) && ourDeparsedSql && { deparsed: ourDeparsedSql })
};
}
}
} catch (err: any) {
console.error(`Failed to parse ${relPath}:`, err);
continue;
}
}

// Write aggregated JSON to output file
const outputFile = path.join(OUT_DIR, 'upstream-diff.json');
fs.writeFileSync(outputFile, JSON.stringify(results, null, 2));
console.log(`Wrote JSON to ${outputFile}`);
}

main().catch(console.error);
15 changes: 14 additions & 1 deletion packages/deparser/src/deparser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5461,7 +5461,20 @@ export class Deparser implements DeparserVisitor {
: argValue;
return `${node.defname} = ${quotedValue}`;
}


// Handle CopyStmt WITH clause options - uppercase format without quotes
if (context.parentNodeTypes.includes('CopyStmt')) {
if (node.defname === 'format' && node.arg && this.getNodeType(node.arg) === 'String') {
const stringData = this.getNodeData(node.arg);
return `FORMAT ${stringData.sval.toUpperCase()}`;
}
// Handle other COPY options with uppercase defname
if (node.arg) {
return `${node.defname.toUpperCase()} ${argValue}`;
}
return node.defname.toUpperCase();
}

// Handle CREATE OPERATOR and CREATE TYPE context
if (context.parentNodeTypes.includes('DefineStmt')) {
const preservedName = this.preserveOperatorDefElemCase(node.defname);
Expand Down
177 changes: 177 additions & 0 deletions packages/deparser/src/utils/statement-splitter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import { parse } from 'libpg-query';
import { ParseResult, RawStmt } from '@pgsql/types';

export interface ExtractedStatement {
statement: string;
index: number;
location?: number;
length?: number;
}

export interface StatementSplitterOptions {
/** Skip validation for malformed statements */
skipValidation?: boolean;
/** Strip leading comments from extracted statements */
stripComments?: boolean;
}

/**
* Extracts a single statement from SQL using PostgreSQL's location information.
* Handles Unicode properly by using byte positions instead of character positions.
*/
export function extractStatement(
originalSQL: string,
rawStmt: RawStmt,
isFirst: boolean = false,
options: StatementSplitterOptions = {}
): string | null {
let extracted: string | null = null;

// Convert string to buffer to handle byte positions correctly (for Unicode)
const sqlBuffer = Buffer.from(originalSQL, 'utf8');

if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
// Use byte positions as provided by PostgreSQL
const startByte = rawStmt.stmt_location;
const endByte = rawStmt.stmt_location + rawStmt.stmt_len;

// Extract using byte positions and convert back to string
const extractedBuffer = sqlBuffer.slice(startByte, endByte);
extracted = extractedBuffer.toString('utf8');
} else if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len === undefined) {
// We have location but no length - extract from location to end of file
const extractedBuffer = sqlBuffer.slice(rawStmt.stmt_location);
extracted = extractedBuffer.toString('utf8');
} else if (isFirst && rawStmt.stmt_len !== undefined) {
// For first statement when location is missing but we have length
const extractedBuffer = sqlBuffer.slice(0, rawStmt.stmt_len);
extracted = extractedBuffer.toString('utf8');
} else if (isFirst && rawStmt.stmt_location === undefined && rawStmt.stmt_len === undefined) {
// For first statement when both location and length are missing, use entire SQL
extracted = originalSQL;
}

if (extracted && options.stripComments !== false) {
// Split into lines to handle leading whitespace and comments properly
const lines = extracted.split('\n');
let startLineIndex = 0;

// Find the first line that contains actual SQL content
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
// Skip empty lines and comment-only lines
if (line === '' || line.startsWith('--')) {
continue;
}
startLineIndex = i;
break;
}

// Reconstruct from the first SQL line, preserving the original indentation of that line
if (startLineIndex < lines.length) {
const resultLines = lines.slice(startLineIndex);
extracted = resultLines.join('\n').trim();
}
}

// Final validation unless skipped
if (extracted && !options.skipValidation) {
const firstLine = extracted.split('\n')[0].trim();
const firstWord = firstLine.split(/\s+/)[0].toUpperCase();

// Only check for most obvious malformed patterns at the BEGINNING
if (
// Check if it starts with truncated patterns (not just contains anywhere)
extracted.trim().startsWith('ELECT ') || // Missing S from SELECT
extracted.trim().startsWith('REATE ') || // Missing C from CREATE
extracted.trim().startsWith('NSERT ') || // Missing I from INSERT
// Completely empty or whitespace only
extracted.trim().length === 0
) {
return null; // Invalid extraction, skip this statement
}
}

return extracted;
}

/**
* Splits SQL text into individual statements using PostgreSQL's parser.
* Handles Unicode characters properly and provides detailed location information.
*/
export async function splitStatements(
sql: string,
options: StatementSplitterOptions = {}
): Promise<ExtractedStatement[]> {
const parseResult: ParseResult = await parse(sql);
const statements: ExtractedStatement[] = [];

if (!parseResult.stmts) {
return statements;
}

for (let idx = 0; idx < parseResult.stmts.length; idx++) {
const stmt = parseResult.stmts[idx];
const extracted = extractStatement(sql, stmt, idx === 0, options);

if (extracted) {
statements.push({
statement: extracted,
index: idx,
location: stmt.stmt_location,
length: stmt.stmt_len
});
}
}

return statements;
}

/**
* Utility to generate statement keys for fixtures
*/
export function generateStatementKey(
relativePath: string,
statementIndex: number,
extension: string = 'sql'
): string {
return `${relativePath.replace(/\.sql$/, '')}-${statementIndex + 1}.${extension}`;
}

/**
* Test utility to compare byte vs character extraction for debugging Unicode issues
*/
export function debugUnicodeExtraction(sql: string, rawStmt: RawStmt): {
characterBased: string;
byteBased: string;
matches: boolean;
unicodeChars: number;
byteLength: number;
charLength: number;
} {
const charLength = sql.length;
const byteLength = Buffer.from(sql, 'utf8').length;

// Character-based extraction (old way)
let characterBased = '';
if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
characterBased = sql.substring(rawStmt.stmt_location, rawStmt.stmt_location + rawStmt.stmt_len);
}

// Byte-based extraction (new way)
let byteBased = '';
if (rawStmt.stmt_location !== undefined && rawStmt.stmt_len !== undefined) {
const sqlBuffer = Buffer.from(sql, 'utf8');
const extractedBuffer = sqlBuffer.slice(rawStmt.stmt_location, rawStmt.stmt_location + rawStmt.stmt_len);
byteBased = extractedBuffer.toString('utf8');
}

return {
characterBased,
byteBased,
matches: characterBased === byteBased,
unicodeChars: byteLength - charLength,
byteLength,
charLength
};
}