Skip to content
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ The following switches have different behavior in this version of `sqlcmd` compa
- To provide the value of the host name in the server certificate when using strict encryption, pass the host name with `-F`. Example: `-Ns -F myhost.domain.com`
- More information about client/server encryption negotiation can be found at <https://docs.microsoft.com/openspecs/windows_protocols/ms-tds/60f56408-0188-4cd5-8b90-25c6f2423868>
- `-u` The generated Unicode output file will have the UTF16 Little-Endian Byte-order mark (BOM) written to it.
- `-f` Specifies the code page for input and output files. Format: `codepage | i:codepage[,o:codepage] | o:codepage[,i:codepage]`. Use `65001` for UTF-8. Supported codepages include Unicode (65001, 1200, 1201), Windows (874, 1250-1258), OEM/DOS (437, 850, etc.), ISO-8859 (28591-28606), CJK (932, 936, 949, 950), and EBCDIC (37, 1047, 1140). Use `--list-codepages` to see all supported code pages.
- Some behaviors that were kept to maintain compatibility with `OSQL` may be changed, such as alignment of column headers for some data types.
- All commands must fit on one line, even `EXIT`. Interactive mode will not check for open parentheses or quotes for commands and prompt for successive lines. The ODBC sqlcmd allows the query run by `EXIT(query)` to span multiple lines.
- `-i` doesn't handle a comma `,` in a file name correctly unless the file name argument is triple quoted. For example:
Expand Down
28 changes: 28 additions & 0 deletions cmd/sqlcmd/sqlcmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ type SQLCmdArguments struct {
ChangePassword string
ChangePasswordAndExit string
TraceFile string
CodePage string
ListCodePages bool
// Keep Help at the end of the list
Help bool
}
Expand Down Expand Up @@ -171,6 +173,10 @@ func (a *SQLCmdArguments) Validate(c *cobra.Command) (err error) {
err = rangeParameterError("-t", fmt.Sprint(a.QueryTimeout), 0, 65534, true)
case a.ServerCertificate != "" && !encryptConnectionAllowsTLS(a.EncryptConnection):
err = localizer.Errorf("The -J parameter requires encryption to be enabled (-N true, -N mandatory, or -N strict).")
case a.CodePage != "":
if _, parseErr := sqlcmd.ParseCodePage(a.CodePage); parseErr != nil {
err = localizer.Errorf(`'-f %s': %v`, a.CodePage, parseErr)
}
}
}
if err != nil {
Expand Down Expand Up @@ -239,6 +245,17 @@ func Execute(version string) {
listLocalServers()
os.Exit(0)
}
// List supported codepages
if args.ListCodePages {
fmt.Println(localizer.Sprintf("Supported Code Pages:"))
fmt.Println()
fmt.Printf("%-8s %-20s %s\n", "Code", "Name", "Description")
fmt.Printf("%-8s %-20s %s\n", "----", "----", "-----------")
for _, cp := range sqlcmd.SupportedCodePages() {
fmt.Printf("%-8d %-20s %s\n", cp.CodePage, cp.Name, cp.Description)
}
os.Exit(0)
}
if len(argss) > 0 {
fmt.Printf("%s'%s': Unknown command. Enter '--help' for command help.", sqlcmdErrorPrefix, argss[0])
os.Exit(1)
Expand Down Expand Up @@ -479,6 +496,8 @@ func setFlags(rootCmd *cobra.Command, args *SQLCmdArguments) {
rootCmd.Flags().BoolVarP(&args.EnableColumnEncryption, "enable-column-encryption", "g", false, localizer.Sprintf("Enable column encryption"))
rootCmd.Flags().StringVarP(&args.ChangePassword, "change-password", "z", "", localizer.Sprintf("New password"))
rootCmd.Flags().StringVarP(&args.ChangePasswordAndExit, "change-password-exit", "Z", "", localizer.Sprintf("New password and exit"))
rootCmd.Flags().StringVarP(&args.CodePage, "code-page", "f", "", localizer.Sprintf("Specifies the code page for input/output. Use 65001 for UTF-8. Format: codepage | i:codepage[,o:codepage] | o:codepage[,i:codepage]"))
rootCmd.Flags().BoolVar(&args.ListCodePages, "list-codepages", false, localizer.Sprintf("List supported code pages and exit"))
}

func setScriptVariable(v string) string {
Expand Down Expand Up @@ -813,6 +832,15 @@ func run(vars *sqlcmd.Variables, args *SQLCmdArguments) (int, error) {
defer s.StopCloseHandler()
s.UnicodeOutputFile = args.UnicodeOutputFile

// Parse and apply codepage settings
if args.CodePage != "" {
codePageSettings, err := sqlcmd.ParseCodePage(args.CodePage)
if err != nil {
return 1, localizer.Errorf("Invalid code page: %v", err)
}
s.CodePage = codePageSettings
}

if args.DisableCmd != nil {
s.Cmd.DisableSysCommands(args.errorOnBlockedCmd())
}
Expand Down
21 changes: 21 additions & 0 deletions cmd/sqlcmd/sqlcmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,22 @@ func TestValidCommandLineToArgsConversion(t *testing.T) {
{[]string{"-N", "true", "-J", "/path/to/cert2.pem"}, func(args SQLCmdArguments) bool {
return args.EncryptConnection == "true" && args.ServerCertificate == "/path/to/cert2.pem"
}},
// Codepage flag tests
{[]string{"-f", "65001"}, func(args SQLCmdArguments) bool {
return args.CodePage == "65001"
}},
{[]string{"-f", "i:1252,o:65001"}, func(args SQLCmdArguments) bool {
return args.CodePage == "i:1252,o:65001"
}},
{[]string{"-f", "o:65001,i:1252"}, func(args SQLCmdArguments) bool {
return args.CodePage == "o:65001,i:1252"
}},
{[]string{"--code-page", "1252"}, func(args SQLCmdArguments) bool {
return args.CodePage == "1252"
}},
{[]string{"--list-codepages"}, func(args SQLCmdArguments) bool {
return args.ListCodePages
}},
}

for _, test := range commands {
Expand Down Expand Up @@ -178,6 +194,11 @@ func TestInvalidCommandLine(t *testing.T) {
{[]string{"-N", "optional", "-J", "/path/to/cert.pem"}, "The -J parameter requires encryption to be enabled (-N true, -N mandatory, or -N strict)."},
{[]string{"-N", "disable", "-J", "/path/to/cert.pem"}, "The -J parameter requires encryption to be enabled (-N true, -N mandatory, or -N strict)."},
{[]string{"-N", "strict", "-F", "myserver.domain.com", "-J", "/path/to/cert.pem"}, "The -F and the -J options are mutually exclusive."},
// Codepage validation tests
{[]string{"-f", "invalid"}, `'-f invalid': invalid codepage: invalid`},
{[]string{"-f", "99999"}, `'-f 99999': unsupported codepage 99999`},
{[]string{"-f", "i:invalid"}, `'-f i:invalid': invalid input codepage: i:invalid`},
{[]string{"-f", "x:1252"}, `'-f x:1252': invalid codepage: x:1252`},
}

for _, test := range commands {
Expand Down
203 changes: 203 additions & 0 deletions pkg/sqlcmd/codepage.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

package sqlcmd

import (
"sort"
"strconv"
"strings"

"github.com/microsoft/go-sqlcmd/internal/localizer"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/korean"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/encoding/unicode"
)

// codepageEntry defines a codepage with its encoding and metadata
type codepageEntry struct {
encoding encoding.Encoding // nil for UTF-8 (Go's native encoding)
name string
description string
}

// codepageRegistry is the single source of truth for all supported codepages.
// Both GetEncoding and SupportedCodePages use this registry.
var codepageRegistry = map[int]codepageEntry{
// Unicode
65001: {nil, "UTF-8", "Unicode (UTF-8)"},
1200: {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "UTF-16LE", "Unicode (UTF-16 Little-Endian)"},
1201: {unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), "UTF-16BE", "Unicode (UTF-16 Big-Endian)"},

// OEM/DOS codepages
437: {charmap.CodePage437, "CP437", "OEM United States"},
850: {charmap.CodePage850, "CP850", "OEM Multilingual Latin 1"},
852: {charmap.CodePage852, "CP852", "OEM Latin 2"},
855: {charmap.CodePage855, "CP855", "OEM Cyrillic"},
858: {charmap.CodePage858, "CP858", "OEM Multilingual Latin 1 + Euro"},
860: {charmap.CodePage860, "CP860", "OEM Portuguese"},
862: {charmap.CodePage862, "CP862", "OEM Hebrew"},
863: {charmap.CodePage863, "CP863", "OEM Canadian French"},
865: {charmap.CodePage865, "CP865", "OEM Nordic"},
866: {charmap.CodePage866, "CP866", "OEM Russian"},

// Windows codepages
874: {charmap.Windows874, "Windows-874", "Thai"},
1250: {charmap.Windows1250, "Windows-1250", "Central European"},
1251: {charmap.Windows1251, "Windows-1251", "Cyrillic"},
1252: {charmap.Windows1252, "Windows-1252", "Western European"},
1253: {charmap.Windows1253, "Windows-1253", "Greek"},
1254: {charmap.Windows1254, "Windows-1254", "Turkish"},
1255: {charmap.Windows1255, "Windows-1255", "Hebrew"},
1256: {charmap.Windows1256, "Windows-1256", "Arabic"},
1257: {charmap.Windows1257, "Windows-1257", "Baltic"},
1258: {charmap.Windows1258, "Windows-1258", "Vietnamese"},

// ISO-8859 codepages
28591: {charmap.ISO8859_1, "ISO-8859-1", "Latin 1 (Western European)"},
28592: {charmap.ISO8859_2, "ISO-8859-2", "Latin 2 (Central European)"},
28593: {charmap.ISO8859_3, "ISO-8859-3", "Latin 3 (South European)"},
28594: {charmap.ISO8859_4, "ISO-8859-4", "Latin 4 (North European)"},
28595: {charmap.ISO8859_5, "ISO-8859-5", "Cyrillic"},
28596: {charmap.ISO8859_6, "ISO-8859-6", "Arabic"},
28597: {charmap.ISO8859_7, "ISO-8859-7", "Greek"},
28598: {charmap.ISO8859_8, "ISO-8859-8", "Hebrew"},
28599: {charmap.ISO8859_9, "ISO-8859-9", "Turkish"},
28600: {charmap.ISO8859_10, "ISO-8859-10", "Nordic"},
28603: {charmap.ISO8859_13, "ISO-8859-13", "Baltic"},
28604: {charmap.ISO8859_14, "ISO-8859-14", "Celtic"},
28605: {charmap.ISO8859_15, "ISO-8859-15", "Latin 9 (Western European with Euro)"},
28606: {charmap.ISO8859_16, "ISO-8859-16", "Latin 10 (South-Eastern European)"},

// Cyrillic
20866: {charmap.KOI8R, "KOI8-R", "Russian"},
21866: {charmap.KOI8U, "KOI8-U", "Ukrainian"},

// Macintosh
10000: {charmap.Macintosh, "Macintosh", "Mac Roman"},
10007: {charmap.MacintoshCyrillic, "x-mac-cyrillic", "Mac Cyrillic"},

// EBCDIC
37: {charmap.CodePage037, "IBM037", "EBCDIC US-Canada"},
1047: {charmap.CodePage1047, "IBM1047", "EBCDIC Latin 1/Open System"},
1140: {charmap.CodePage1140, "IBM01140", "EBCDIC US-Canada with Euro"},

// Japanese
932: {japanese.ShiftJIS, "Shift_JIS", "Japanese (Shift-JIS)"},
20932: {japanese.EUCJP, "EUC-JP", "Japanese (EUC)"},
50220: {japanese.ISO2022JP, "ISO-2022-JP", "Japanese (JIS)"},
50221: {japanese.ISO2022JP, "csISO2022JP", "Japanese (JIS-Allow 1 byte Kana)"},
50222: {japanese.ISO2022JP, "ISO-2022-JP", "Japanese (JIS-Allow 1 byte Kana SO/SI)"},

// Korean
949: {korean.EUCKR, "EUC-KR", "Korean"},
51949: {korean.EUCKR, "EUC-KR", "Korean (EUC)"},

// Simplified Chinese
936: {simplifiedchinese.GBK, "GBK", "Chinese Simplified (GBK)"},
54936: {simplifiedchinese.GB18030, "GB18030", "Chinese Simplified (GB18030)"},
52936: {simplifiedchinese.HZGB2312, "HZ-GB-2312", "Chinese Simplified (HZ)"},

// Traditional Chinese
950: {traditionalchinese.Big5, "Big5", "Chinese Traditional (Big5)"},
}

// CodePageSettings holds the input and output codepage settings
type CodePageSettings struct {
InputCodePage int
OutputCodePage int
}

// ParseCodePage parses the -f codepage argument
// Format: codepage | i:codepage[,o:codepage] | o:codepage[,i:codepage]
func ParseCodePage(arg string) (*CodePageSettings, error) {
if arg == "" {
return nil, nil
}

settings := &CodePageSettings{}
parts := strings.Split(arg, ",")

for _, part := range parts {
part = strings.TrimSpace(part)
if part == "" {
continue
}

if strings.HasPrefix(strings.ToLower(part), "i:") {
// Input codepage
cp, err := strconv.Atoi(strings.TrimPrefix(strings.ToLower(part), "i:"))
if err != nil {
return nil, localizer.Errorf("invalid input codepage: %s", part)
}
settings.InputCodePage = cp
} else if strings.HasPrefix(strings.ToLower(part), "o:") {
// Output codepage
cp, err := strconv.Atoi(strings.TrimPrefix(strings.ToLower(part), "o:"))
if err != nil {
return nil, localizer.Errorf("invalid output codepage: %s", part)
}
settings.OutputCodePage = cp
} else {
// Both input and output
cp, err := strconv.Atoi(part)
if err != nil {
return nil, localizer.Errorf("invalid codepage: %s", part)
}
settings.InputCodePage = cp
settings.OutputCodePage = cp
}
}

Copy link

Copilot AI Jan 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ParseCodePage currently accepts inputs that contain no actual codepage value (e.g. "," or only whitespace) and returns a non-nil CodePageSettings with both InputCodePage/OutputCodePage left as 0. That silently disables codepage handling even though the user supplied -f. Consider detecting this case (arg != "" but neither codepage parsed) and returning an error (and add a unit test for it).

Suggested change
// If a non-empty argument was provided but no codepage was parsed,
// treat this as an error rather than silently disabling codepage handling.
if arg != "" && settings.InputCodePage == 0 && settings.OutputCodePage == 0 {
return nil, localizer.Errorf("invalid codepage: %s", arg)
}

Copilot uses AI. Check for mistakes.
// Validate codepages
if settings.InputCodePage != 0 {
if _, err := GetEncoding(settings.InputCodePage); err != nil {
return nil, err
}
}
if settings.OutputCodePage != 0 {
if _, err := GetEncoding(settings.OutputCodePage); err != nil {
return nil, err
}
}

return settings, nil
}

// GetEncoding returns the encoding for a given Windows codepage number.
// Returns nil for UTF-8 (65001) since Go uses UTF-8 natively.
func GetEncoding(codepage int) (encoding.Encoding, error) {
entry, ok := codepageRegistry[codepage]
if !ok {
return nil, localizer.Errorf("unsupported codepage %s", strconv.Itoa(codepage))
}
return entry.encoding, nil
}

// CodePageInfo describes a supported codepage
type CodePageInfo struct {
CodePage int
Name string
Description string
}

// SupportedCodePages returns a list of all supported codepages with descriptions
func SupportedCodePages() []CodePageInfo {
result := make([]CodePageInfo, 0, len(codepageRegistry))
for cp, entry := range codepageRegistry {
result = append(result, CodePageInfo{
CodePage: cp,
Name: entry.name,
Description: entry.description,
})
}
// Sort by codepage number for consistent output
sort.Slice(result, func(i, j int) bool {
return result[i].CodePage < result[j].CodePage
})
return result
}
Loading
Loading