2121import lit .ShUtil as ShUtil
2222import lit .Test as Test
2323import lit .util
24- from lit .util import to_bytes , to_string , to_unicode
2524from lit .BooleanExpression import BooleanExpression
2625
2726
@@ -391,18 +390,14 @@ def executeBuiltinEcho(cmd, shenv):
391390 # Some tests have un-redirected echo commands to help debug test failures.
392391 # Buffer our output and return it to the caller.
393392 is_redirected = True
394- encode = lambda x : x
395393 if stdout == subprocess .PIPE :
396394 is_redirected = False
397395 stdout = StringIO ()
398396 elif kIsWindows :
399- # Reopen stdout in binary mode to avoid CRLF translation. The versions
400- # of echo we are replacing on Windows all emit plain LF, and the LLVM
401- # tests now depend on this.
402- # When we open as binary, however, this also means that we have to write
403- # 'bytes' objects to stdout instead of 'str' objects.
404- encode = lit .util .to_bytes
405- stdout = open (stdout .name , stdout .mode + "b" )
397+ # Reopen stdout with `newline=""` to avoid CRLF translation.
398+ # The versions of echo we are replacing on Windows all emit plain LF,
399+ # and the LLVM tests now depend on this.
400+ stdout = open (stdout .name , stdout .mode , encoding = "utf-8" , newline = "" )
406401 opened_files .append ((None , None , stdout , None ))
407402
408403 # Implement echo flags. We only support -e and -n, and not yet in
@@ -423,16 +418,15 @@ def maybeUnescape(arg):
423418 if not interpret_escapes :
424419 return arg
425420
426- arg = lit .util .to_bytes (arg )
427- return arg .decode ("unicode_escape" )
421+ return arg .encode ("utf-8" ).decode ("unicode_escape" )
428422
429423 if args :
430424 for arg in args [:- 1 ]:
431- stdout .write (encode ( maybeUnescape (arg ) ))
432- stdout .write (encode ( " " ) )
433- stdout .write (encode ( maybeUnescape (args [- 1 ]) ))
425+ stdout .write (maybeUnescape (arg ))
426+ stdout .write (" " )
427+ stdout .write (maybeUnescape (args [- 1 ]))
434428 if write_newline :
435- stdout .write (encode ( "\n " ) )
429+ stdout .write ("\n " )
436430
437431 for (name , mode , f , path ) in opened_files :
438432 f .close ()
@@ -463,7 +457,7 @@ def executeBuiltinMkdir(cmd, cmd_shenv):
463457 exitCode = 0
464458 for dir in args :
465459 dir = pathlib .Path (dir )
466- cwd = pathlib .Path (to_unicode ( cmd_shenv .cwd ) )
460+ cwd = pathlib .Path (cmd_shenv .cwd )
467461 if not dir .is_absolute ():
468462 dir = lit .util .abs_path_preserve_drive (cwd / dir )
469463 if parent :
@@ -508,8 +502,6 @@ def on_rm_error(func, path, exc_info):
508502 exitCode = 0
509503 for path in args :
510504 cwd = cmd_shenv .cwd
511- path = to_unicode (path ) if kIsWindows else to_bytes (path )
512- cwd = to_unicode (cwd ) if kIsWindows else to_bytes (cwd )
513505 if not os .path .isabs (path ):
514506 path = lit .util .abs_path_preserve_drive (os .path .join (cwd , path ))
515507 if force and not os .path .exists (path ):
@@ -718,10 +710,7 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
718710 else :
719711 # Make sure relative paths are relative to the cwd.
720712 redir_filename = os .path .join (cmd_shenv .cwd , name )
721- redir_filename = (
722- to_unicode (redir_filename ) if kIsWindows else to_bytes (redir_filename )
723- )
724- fd = open (redir_filename , mode )
713+ fd = open (redir_filename , mode , encoding = "utf-8" )
725714 # Workaround a Win32 and/or subprocess bug when appending.
726715 #
727716 # FIXME: Actually, this is probably an instance of PR6753.
@@ -1083,14 +1072,14 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
10831072 if out is None :
10841073 out = ""
10851074 else :
1086- out = to_string ( out .decode ("utf-8" , errors = "replace" ) )
1075+ out = out .decode ("utf-8" , errors = "replace" )
10871076 except :
10881077 out = str (out )
10891078 try :
10901079 if err is None :
10911080 err = ""
10921081 else :
1093- err = to_string ( err .decode ("utf-8" , errors = "replace" ) )
1082+ err = err .decode ("utf-8" , errors = "replace" )
10941083 except :
10951084 err = str (err )
10961085
@@ -1284,7 +1273,7 @@ def executeScriptInternal(
12841273
12851274 # Add the command output, if redirected.
12861275 for (name , path , data ) in result .outputFiles :
1287- data = to_string ( data .decode ("utf-8" , errors = "replace" ) )
1276+ data = data .decode ("utf-8" , errors = "replace" )
12881277 out += formatOutput (f"redirected output from '{ name } '" , data , limit = 1024 )
12891278 if result .stdout .strip ():
12901279 out += formatOutput ("command stdout" , result .stdout )
@@ -1340,13 +1329,6 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
13401329 script += ".bat"
13411330
13421331 # Write script file
1343- mode = "w"
1344- open_kwargs = {}
1345- if litConfig .isWindows and not isWin32CMDEXE :
1346- mode += "b" # Avoid CRLFs when writing bash scripts.
1347- else :
1348- open_kwargs ["encoding" ] = "utf-8"
1349- f = open (script , mode , ** open_kwargs )
13501332 if isWin32CMDEXE :
13511333 for i , ln in enumerate (commands ):
13521334 match = re .fullmatch (kPdbgRegex , ln )
@@ -1355,8 +1337,9 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
13551337 commands [i ] = match .expand (
13561338 "echo '\\ 1' > nul && " if command else "echo '\\ 1' > nul"
13571339 )
1358- f .write ("@echo on\n " )
1359- f .write ("\n @if %ERRORLEVEL% NEQ 0 EXIT\n " .join (commands ))
1340+ with open (script , "w" , encoding = "utf-8" ) as f :
1341+ f .write ("@echo on\n " )
1342+ f .write ("\n @if %ERRORLEVEL% NEQ 0 EXIT\n " .join (commands ))
13601343 else :
13611344 for i , ln in enumerate (commands ):
13621345 match = re .fullmatch (kPdbgRegex , ln )
@@ -1395,8 +1378,6 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
13951378 # seen the latter manage to terminate the shell running lit.
13961379 if command :
13971380 commands [i ] += f" && {{ { command } ; }}"
1398- if test .config .pipefail :
1399- f .write (b"set -o pipefail;" if mode == "wb" else "set -o pipefail;" )
14001381
14011382 # Manually export any DYLD_* variables used by dyld on macOS because
14021383 # otherwise they are lost when the shell executable is run, before the
@@ -1406,14 +1387,14 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
14061387 for k , v in test .config .environment .items ()
14071388 if k .startswith ("DYLD_" )
14081389 )
1409- f .write (bytes (env_str , "utf-8" ) if mode == "wb" else env_str )
1410- f .write (b"set -x;" if mode == "wb" else "set -x;" )
1411- if mode == "wb" :
1412- f .write (bytes ("{ " + "; } &&\n { " .join (commands ) + "; }" , "utf-8" ))
1413- else :
1390+
1391+ with open (script , "w" , encoding = "utf-8" , newline = "" ) as f :
1392+ if test .config .pipefail :
1393+ f .write ("set -o pipefail;" )
1394+ f .write (env_str )
1395+ f .write ("set -x;" )
14141396 f .write ("{ " + "; } &&\n { " .join (commands ) + "; }" )
1415- f .write (b"\n " if mode == "wb" else "\n " )
1416- f .close ()
1397+ f .write ("\n " )
14171398
14181399 if isWin32CMDEXE :
14191400 command = ["cmd" , "/c" , script ]
@@ -1449,19 +1430,11 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
14491430 (line_number, command_type, line).
14501431 """
14511432
1452- # This code is carefully written to be dual compatible with Python 2.5+ and
1453- # Python 3 without requiring input files to always have valid codings. The
1454- # trick we use is to open the file in binary mode and use the regular
1455- # expression library to find the commands, with it scanning strings in
1456- # Python2 and bytes in Python3.
1457- #
1458- # Once we find a match, we do require each script line to be decodable to
1459- # UTF-8, so we convert the outputs to UTF-8 before returning. This way the
1460- # remaining code can work with "strings" agnostic of the executing Python
1461- # version.
1433+ # We use `bytes` for scanning input files to avoid requiring them to always
1434+ # have valid codings.
14621435
14631436 keywords_re = re .compile (
1464- to_bytes ( "(%s)(.*)\n " % ("|" .join (re .escape (k ) for k in keywords ),) )
1437+ b "(%s)(.*)\n " % (b "|" .join (re .escape (k . encode ( "utf-8" )) for k in keywords ),)
14651438 )
14661439
14671440 f = open (source_path , "rb" )
@@ -1470,8 +1443,8 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
14701443 data = f .read ()
14711444
14721445 # Ensure the data ends with a newline.
1473- if not data .endswith (to_bytes ( "\n " ) ):
1474- data = data + to_bytes ( "\n " )
1446+ if not data .endswith (b "\n " ):
1447+ data = data + b "\n "
14751448
14761449 # Iterate over the matches.
14771450 line_number = 1
@@ -1480,24 +1453,20 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
14801453 # Compute the updated line number by counting the intervening
14811454 # newlines.
14821455 match_position = match .start ()
1483- line_number += data .count (
1484- to_bytes ("\n " ), last_match_position , match_position
1485- )
1456+ line_number += data .count (b"\n " , last_match_position , match_position )
14861457 last_match_position = match_position
14871458
14881459 # Convert the keyword and line to UTF-8 strings and yield the
1489- # command. Note that we take care to return regular strings in
1490- # Python 2, to avoid other code having to differentiate between the
1491- # str and unicode types.
1460+ # command.
14921461 #
14931462 # Opening the file in binary mode prevented Windows \r newline
14941463 # characters from being converted to Unix \n newlines, so manually
14951464 # strip those from the yielded lines.
14961465 keyword , ln = match .groups ()
14971466 yield (
14981467 line_number ,
1499- to_string ( keyword .decode ("utf-8" ) ),
1500- to_string ( ln .decode ("utf-8" ).rstrip ("\r " ) ),
1468+ keyword .decode ("utf-8" ),
1469+ ln .decode ("utf-8" ).rstrip ("\r " ),
15011470 )
15021471 finally :
15031472 f .close ()
0 commit comments