In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import chess.pgn
import re
import sys
import os.path
import pathlib
import logging
from datetime import datetime
import sys, traceback


def get_file_list(local_path):
    tree = os.walk(str(local_path))
    file_list = []
    out = []
    test = r'.+pgn$'
    for i in tree:
        file_list = i[2]

    for name in file_list:
        if(len(re.findall(test, name))):
            out.append(local_path +'\\'+ name)
    return out


def get_data(pgn_file):
    node = chess.pgn.read_game(pgn_file)
    while node is not None:
        data = node.headers

        data["moves"] = []

        while node.variations:
            next_node = node.variation(0)
            data["moves"].append(
                    re.sub("\{.*?\}", "", node.board().san(next_node.move)))
            node = next_node

        node = chess.pgn.read_game(pgn_file)

        out_dict = {}

        for key in data.keys():
            out_dict[key] = data.get(key)

        # log(data.get('Event'))
        yield out_dict


def convert_file(file_path):
    file_name = file_path.name.replace(file_path.suffix, '') + '.json'
    log('convert file '+file_path.name)
    out_list = []
    try:
        json_file = open(out_dir + '\\' + file_name, 'w')
        pgn_file = open(str(file_path), encoding='ISO-8859-1')

        for count_d, data in enumerate(get_data(pgn_file), start=0):
            log(file_path.name+' '+str(count_d))
            out_list.append(data)

        log(' save '+file_path.name)
        json.dump(out_list, json_file)
        json_file.close()
        log('done')
    except Exception as e:
        log(traceback.format_exc(10))
        log('ERROR file '+file_name+' not converted')


def create_join_file(file_list):
    log(' create_join_file ')
    name = out_dir +'\\'+ 'join_data.json'
    open(name, 'w').close()
    json_file = open(out_dir +'\\'+ 'join_data.json', 'a')
    json_file.write('[')
    for count_f, file in enumerate(file_list, start=0):
        pgn_file = open(file, encoding='ISO-8859-1')
        for count_d, data in enumerate(get_data(pgn_file), start=0):
            log(str(count_f)+' '+str(count_d))
            if count_f or count_d:
                json_file.write(',')
            data_str = json.dumps(data)
            json_file.write(data_str)
        log(pathlib.Path(file).name)
    json_file.write(']')
    json_file.close()



In [2]:
def get_file_list_json(local_path):
    tree = os.walk(str(local_path))
    file_list = []
    out = []
    test = r'.+json$'
    for i in tree:
        file_list = i[2]

    for name in file_list:
        if(len(re.findall(test, name))):
            out.append(local_path +'\\'+ name)
    return out

In [3]:
inp_dir = r'C:\Users\Giorgio\Documents\Python Scripts\Scacchi chessbase DB\data\Lichess Elite Database'
out_dir = inp_dir
log = logging.getLogger().error

In [4]:
file_list_json = get_file_list_json(inp_dir)
file_list_json

['C:\\Users\\Giorgio\\Documents\\Python Scripts\\Scacchi chessbase DB\\data\\Lichess Elite Database\\lichess_elite_2013-09.json',
 'C:\\Users\\Giorgio\\Documents\\Python Scripts\\Scacchi chessbase DB\\data\\Lichess Elite Database\\lichess_elite_2013-11.json',
 'C:\\Users\\Giorgio\\Documents\\Python Scripts\\Scacchi chessbase DB\\data\\Lichess Elite Database\\lichess_elite_2014-01.json',
 'C:\\Users\\Giorgio\\Documents\\Python Scripts\\Scacchi chessbase DB\\data\\Lichess Elite Database\\lichess_elite_2014-02.json',
 'C:\\Users\\Giorgio\\Documents\\Python Scripts\\Scacchi chessbase DB\\data\\Lichess Elite Database\\lichess_elite_2014-03.json',
 'C:\\Users\\Giorgio\\Documents\\Python Scripts\\Scacchi chessbase DB\\data\\Lichess Elite Database\\lichess_elite_2014-04.json',
 'C:\\Users\\Giorgio\\Documents\\Python Scripts\\Scacchi chessbase DB\\data\\Lichess Elite Database\\lichess_elite_2014-05.json',
 'C:\\Users\\Giorgio\\Documents\\Python Scripts\\Scacchi chessbase DB\\data\\Lichess Elite

In [5]:

file_list = get_file_list(inp_dir)
file_list = file_list[-3:]
file_list

['C:\\Users\\Giorgio\\Documents\\Python Scripts\\Scacchi chessbase DB\\data\\Lichess Elite Database\\lichess_elite_2020-03.pgn',
 'C:\\Users\\Giorgio\\Documents\\Python Scripts\\Scacchi chessbase DB\\data\\Lichess Elite Database\\lichess_elite_2020-04.pgn',
 'C:\\Users\\Giorgio\\Documents\\Python Scripts\\Scacchi chessbase DB\\data\\Lichess Elite Database\\lichess_elite_2020-05.pgn']

In [6]:
#file_list = get_file_list(inp_dir)
is_join = False

start_time = datetime.now()
if not is_join:
    for file in file_list:
        convert_file(pathlib.Path(file))
else:
    create_join_file(file_list)

end_time = datetime.now()
log('time '+str(end_time-start_time))

convert file lichess_elite_2020-03.pgn
lichess_elite_2020-03.pgn 0
lichess_elite_2020-03.pgn 1
lichess_elite_2020-03.pgn 2
lichess_elite_2020-03.pgn 3
lichess_elite_2020-03.pgn 4
lichess_elite_2020-03.pgn 5
lichess_elite_2020-03.pgn 6
lichess_elite_2020-03.pgn 7
lichess_elite_2020-03.pgn 8
lichess_elite_2020-03.pgn 9
lichess_elite_2020-03.pgn 10
lichess_elite_2020-03.pgn 11
lichess_elite_2020-03.pgn 12
lichess_elite_2020-03.pgn 13
lichess_elite_2020-03.pgn 14
lichess_elite_2020-03.pgn 15
lichess_elite_2020-03.pgn 16
lichess_elite_2020-03.pgn 17
lichess_elite_2020-03.pgn 18
lichess_elite_2020-03.pgn 19
lichess_elite_2020-03.pgn 20
lichess_elite_2020-03.pgn 21
lichess_elite_2020-03.pgn 22
lichess_elite_2020-03.pgn 23
lichess_elite_2020-03.pgn 24
lichess_elite_2020-03.pgn 25
lichess_elite_2020-03.pgn 26
lichess_elite_2020-03.pgn 27
lichess_elite_2020-03.pgn 28
lichess_elite_2020-03.pgn 29
lichess_elite_2020-03.pgn 30
lichess_elite_2020-03.pgn 31
lichess_elite_2020-03.pgn 32
lichess_elite_

KeyboardInterrupt: 

In [None]:
os.system(“shutdown /s /t 1”)

In [None]:
import json
  
# Opening JSON file
f = open('data.json')
  
# returns JSON object as 
# a dictionary
data = json.load(f)
  
# Iterating through the json
# list
for i in data['emp_details']:
    print(i)
  
# Closing file
f.close()