In [7]:
import pandas as pd
import urllib
# see https://www.utf8-chartable.de/unicode-utf8-table.pl?start=9728&number=128&names=-&utf8=dec
import unicodedata

def normalizeC24String(c24String):

  def convertUnicode(string):
    if string[0:2] == "\\u":
      return chr(int(string[2:6], 16))
    else:
      return string

  def replaceUnicode(string):
    utfMapFileUrl = "https://raw.githubusercontent.com/littlecapa/chess24Parser/main/config/mapUTF2PGN.csv"
    mapList = pd.read_csv(utfMapFileUrl, sep=';')
    mapList = mapList.fillna("")
    for index, row in mapList.iterrows():
      string = string.replace(convertUnicode(row["UTF"]), row["pgnStr"])
    return string

  def findPgnToken(string):
    pgnTokenFileUrl = "https://raw.githubusercontent.com/littlecapa/chess24Parser/main/config/pgntoken.csv"
    pgnList = pd.read_csv(pgnTokenFileUrl, sep=';')
    pgnList = pgnList.iloc[pgnList.agg({"string":len}).sort_values('string', ascending=False).index]

    for index, row in pgnList.iterrows():
      string = string.replace(row["string"], row["pgnStr"])
    return string

  c24String = replaceUnicode(c24String)
  c24String = findPgnToken(c24String)

  return c24String


In [2]:
def Test_normalizeC24String():
  # Test Case 1
  c24String ="1. d4 d5+/= 2. c4+- c6-+ 3. ♘f3=/+ ♘f6 4. ♘c3 dxc4 5. a4! ♗f5? 6. e3!! e6?? 7. ♗xc4!? ♗b4?! 8. O-O= O-O 9. ♕e2"
  normString = normalizeC24String(c24String)
  if normString != "1. d4 d5$14 2. c4$18 c6$19 3. Nf3$15 Nf6 4. Nc3 dxc4 5. a4$28 Bf5$29 6. e3$3 e6$4 7. Bxc4$24 Bb4$25 8. O-O$10 O-O 9. Qe2":
    print("Error")
  else:
    print("OK")

In [3]:
Test_normalizeC24String()

OK


In [11]:
def testLeningrad():
  url = "https://raw.githubusercontent.com/littlecapa/chess24Parser/main/testdata/nimzoOddsEnds.c24"
  file = urllib.request.urlopen(url)
  c24String = ""

  for line in file:
    c24String += str(line.decode('utf-8'))

  normString = normalizeC24String(c24String)
  print (normString)

testLeningrad()

1. d4  ♘f6 2. c4 e6 3. ♘c3 ♗b4 4. ♕b3

4. ♗d2 O-O 5. e3

a) 5... b6 6. ♘f3 ♗b7 7. ♗d3 d5 ( 7... d6 8. O-O ♘bd7 9. a3 ♗xc3 10. ♗xc3 ♘e4 ) 8. O-O

b) 5... d5 6. ♘f3 b6 ( 6... ♖e8 7. ♗d3 dxc4 8. ♗xc4 ) 7. a3 ♗e7

4. ♗g5 c5

a) 5. dxc5

a1) 5... h6 6. ♗h4 ( 6. ♗d2 ) 6... ♕a5 7. ♗xf6

a2) 5... ♘a6 6. ♕c2 ♘xc5 7. a3 ♗xc3+ 8. ♕xc3 b6

b) 5. d5 d6

b1) 6. e3 exd5 7. cxd5 ♘bd7

b11) 8. ♘f3 ♕a5

b12) 8. ♗d3 ♕a5 9. ♘e2 ♘xd5 10. O-O ♗xc3 11. bxc3 c4 12. ♗c2 ( 12. ♗e4 ♘5f6 13. ♗xf6 ♘xf6 14. ♗f3 ♕c5 ; 12. ♗xc4 ♘5b6 ) 12... O-O

b121) 13. ♕d4 ♘xc3

b122) 13. ♘g3 ♘xc3 14. ♕h5 f5 15. e4 ( 15. ♗e7 ♖f7 16. ♘xf5 ♘e5 17. ♗xd6 ♗xf5 18. ♗xf5 ♘f3+ 19. ♕xf3 ♕xf5 20. ♕xf5 ♘e2+ 21. ♔h1 ♖xf5 22. ♖ae1 ♘c3 ) 15... ♘xe4

b123) 13. ♗h4 13... ♘xc3 14. ♘xc3 ♕xc3 15. ♖c1 ♕a3 ( 15... ♕e5 16. ♗g3 ♕e6 17. ♕xd6 ♕xd6 18. ♗xd6 ♖d8 19. ♗e4 ♘f6 ) 16. ♗e7 ♖e8 17. ♗xd6 ♕a6 ( 17... ♕xa2 ) 18. e4 ♕c6

b13) 8. ♗b5 8... h6 ( 8... a6 9. ♗xd7+ ♗xd7 10. ♕f3 ) 9. ♗h4 ♗xc3+ 10. bxc3 O-O 11. ♘e2 ♘b6 12. O-O ♘bxd5

b2) 6. f3 6... h6 7. ♗h4 