## Extracting and Formatting Puzzle Data from Lichess

In [1]:
import pandas as pd
import io
import chess.pgn
import requests
from random import shuffle

def get_last_fen(pgn): # plays all the moves up the puzzle position to obtain pgn of puzzle position
    pgn_io = io.StringIO(pgn)
    game = chess.pgn.read_game(pgn_io)
    board = game.board()
    for move in game.mainline_moves():
        board.push(move)
    return board.fen()

df = pd.read_csv("puzzle_ids.csv") # Credit --> https://database.lichess.org/#puzzles
df = df[df.columns[1]]

puzzle_list = []

for puzzle_id in df[:10000]:
    url = f"https://lichess.org/api/puzzle/{puzzle_id}"
    puzzle = requests.get(url).json() # gets puzzle information from lichess database
    starting_position = get_last_fen(puzzle['game']['pgn'])
    positions = [starting_position.split()[0]]
    fen = (puzzle['puzzle']['rating'], starting_position.split()[1], positions)
    board = chess.Board(starting_position)
    for move in puzzle['puzzle']['solution']:
        board.push(chess.Move.from_uci(move))
        positions.append(board.fen().split()[0])
    puzzle_list.append(fen)
    
new_puzzle_format = pd.DataFrame(columns = ['rating', 'color', 'positions'], data = puzzle_list)
new_puzzle_format['color'] = new_puzzle_format['color'].apply(lambda x: 'white' if x == 'w' else 'black')
new_puzzle_format.to_csv('puzzles.csv')