In [21]:
import pandas as pd
import chess
import io
from tqdm import tqdm
tqdm.pandas()
import swifter
import numpy as np
import os

In [22]:
# Add some crucial constants to the `chess` module.
chess.WHITE_PIECES, chess.BLACK_PIECES = (
    ('P', 'N', 'B', 'R', 'Q', 'K'),
    ('p', 'n', 'b' ,'r', 'q', 'k')
)
chess.PIECES = chess.WHITE_PIECES + chess.BLACK_PIECES
chess.SLIDING_PIECES = (
    'B', 'R', 'Q', 'b', 'r', 'q'
)
#Max number of pieces of each type (extra knight and queen)
#!TODO: Check if this changes values
chess.PIECE_CAPACITY = {
    'P' : 8, 'N' : 3, 'B' : 2, 'R' : 2, 'Q' : 2, 'K' : 1,
    'p' : 8, 'n' : 3, 'b' : 2, 'r' : 2, 'q' : 2, 'k' : 1
}
chess.MISSING_PIECE_SQUARE = -1
chess.PIECE_MOVEMENTS = {
    'R' : tuple(zip((+1, +0, -1, +0), (+0, +1, +0, -1))),
    'B' : tuple(zip((+1, -1, -1, +1), (+1, +1, -1, -1))),
    'N' : tuple(
        zip(
            (+2, +1, -1, -2, -2, -1, +1, +2),
            (+1, +2, +2, +1, -1, -2, -2, -1)
        )
    ),
    'P' : tuple(zip((-1, -1), (-1, +1))),
    'p' : tuple(zip((+1, +1), (-1, +1)))
}
chess.PIECE_MOVEMENTS['Q'] = chess.PIECE_MOVEMENTS['K'] = (
    chess.PIECE_MOVEMENTS['B'] + chess.PIECE_MOVEMENTS['R']
)
for piece in ('k', 'q', 'r', 'b', 'n'):
    chess.PIECE_MOVEMENTS[piece] = chess.PIECE_MOVEMENTS[piece.upper()]

In [23]:
file_name = '../data/4_lichess_csv_processed/filtered_lichess_db_standard_rated_2013-01_parsed.csv'

df = pd.read_csv(file_name, index_col=0, header=0)
#only 1% of the rows
df = df.sample(frac=0.01, random_state=1)
df.reset_index(drop=False, inplace=True)

#print(df.head())

In [45]:
#df.head()
#df.info()

In [25]:
def initialize_dataframe(df):

    df = df.reindex(df.columns.tolist() + ['K', 'Q', 'R', 'B', 'N', 'P', 'k', 'q', 'r', 'b', 'n', 'p',
            'SideToMove','white_kingside_castling_rights', 'white_queenside_castling_rights', 'black_kingside_castling_rights', 'black_queenside_castling_rights',
            #Piece type, positions and atk/def 
            *[f'{p}_{i}_{j}' if j !='' else f'{p}_{i}' for p in range(16) for i in ('white', 'black') for j in ('type','row', 'col', '', 'min_attacker', 'min_defender')],
            #Sliding pieces mobility (D = direction)
            *[f'{p}_{i}_D{j}' for p in range(16) for i in ('white', 'black') for j in range(8)],
            # attack and defense maps
            *[f'{row}_{col}_{i}' for row in range(8) for col in range(8) for i in ('min_attacker','min_defender')],
        ], axis=1, fill_value=np.int8(-1))
    
    # Add new columns to dataframe with dtype int8
    
    # cast columns to int
    
    #initialize all values with default values (-1 or False depending on column type)
    values = [df.columns.tolist().index(f'{x}_{i}') for x in range(16) for i in ('white', 'black')]
    #convert values to column names
    values = [df.columns.tolist()[i] for i in values] + ['SideToMove'] + ['white_kingside_castling_rights', 'white_queenside_castling_rights', 'black_kingside_castling_rights', 'black_queenside_castling_rights']

    df.replace(to_replace = np.int8(-1),value={x: False for x in values}, inplace=True)
    #df.fillna(-1, inplace=True, downcast='int8')

    return df

df = initialize_dataframe(df)


In [46]:
#df.info()
#df.dtypes.to_csv('dtypes.csv')

In [47]:
#df.head()
#len(df.columns)

: 

In [28]:
def __init_attackers_and_scope(board, piece_squares):
    '''
    Manually calculates the value of the lowest-valued attacker and
    defender of each square and the scope of each sliding piece in
    `board`. Stores this information as:

        `board.min_attacker_of`:
            The value of the lowest-valued attacker of each square for
            each color. `board.min_attacker_of[square][chess.BLACK]`
            is the value of the lowest-valued black piece that attacks
            `square`.

        `board.sliding_piece_scopes`:
            How far each sliding piece can slide in each direction
            before either hitting a piece or the edge of the board.
            If it hits a piece of the opposite color, the square
            the piece is on counts as a square onto which it can slide.

    '''
    # The color of each piece on each square -- -1 if the square is
    # empty.
    piece_colors = np.full(shape=(8, 8), fill_value=-1, dtype=int)
    for piece in piece_squares:
        for square in piece_squares[piece]:
            piece_colors[__to_coord(square)] = (
                chess.WHITE if piece in chess.WHITE_PIECES
                else chess.BLACK
            )

    # The value of the lowest-valued attacker of each square.
    min_white_attacker_of, min_black_attacker_of = (
        np.zeros((8, 8)),
        np.zeros((8, 8))
    )

    def in_range(i, j):
        '''
        Whether the row-major coordinate `(i, j)` exists.
        '''
        return (0 <= i < 8) and (0 <= j < 8)

    def assign(arr, i, j, val):
        '''
        Returns:
            3-d tuple
                The first element is a bool that is `True` if the
                square exists; the second is a bool that is `True`
                if the square has a piece on it; the third is the color
                of the piece if the second is `True` and `None`
                otherwise.
        '''
        if not in_range(i, j):
            return False, False, None
        elif piece_colors[i, j] != -1:
            arr[i, j] = val
            return True, True, piece_colors[i, j]
        else:
            arr[i, j] = val
            return True, False, None

    def assign_while(arr, piece_color, i, di, j, dj, val):
        '''
        Simulates a sliding piece moving.

        Starts at `(i, j)` and iterates `(di, dj)` until it hits a
        piece or the edge of the board. Assigns its value to each
        square it visited.

        Returns:
            `int`
                The number of times it assigned a square -- the scope
                of the piece at `(i, j)`.
        '''
        continue_assigning, scope = True, 0
        while continue_assigning:
            exists, had_piece, other_piece_color = assign(
                arr, i + di, j + dj, val
            )
            continue_assigning = exists and not had_piece
            scope += (
                (exists and not had_piece)
                or (had_piece and not other_piece_color == piece_color)
            )
            i += di
            j += dj
        return scope

    # The relative value of each piece.
    relative_vals = {
        'P' : 1, 'N' : 2, 'B' : 3, 'R' : 4, 'Q' : 5, 'K' : 6,
        'p' : 1, 'n' : 2, 'b' : 3, 'r' : 4, 'q' : 5, 'k' : 6
    }

    # How far each sliding piece can move in each direction.
    board.sliding_piece_scopes = {
        (sliding_piece, square) : []
        for sliding_piece in chess.SLIDING_PIECES
        for square in piece_squares[sliding_piece]
    }

    # Iterate through all legal moves of each piece, beginning with the
    # highest value, assigning the piece's value to the `attackers`
    # arrays. The result is the value of the lowest-valued attacker
    # for each square.
    for piece in reversed(chess.PIECES):
        piece_color = (
            chess.WHITE
            if piece in chess.WHITE_PIECES
            else chess.BLACK
        )
        # Which array to which to assign.
        arr = (
            min_white_attacker_of
            if piece_color == chess.WHITE
            else min_black_attacker_of
        )
        # If it's a sliding piece, assign its value in each direction
        # while it can continue moving in the direction.
        if piece in chess.SLIDING_PIECES:
            for square in piece_squares[piece]:
                i, j = __to_coord(square)
                for di, dj in chess.PIECE_MOVEMENTS[piece]:
                    scope = assign_while(
                        arr, piece_color,
                        i, di,
                        j, dj,
                        relative_vals[piece]
                    )
                    board.sliding_piece_scopes[(piece, square)].append(
                        scope
                    )
        # If it's not a sliding piece, simply iterate through each of
        # its movements and assign its value.
        else:
            for i, j in (
                __to_coord(square)
                for square in piece_squares[piece]
            ):
                for di, dj in chess.PIECE_MOVEMENTS[piece]:
                    assign(arr, i + di, j + dj, relative_vals[piece])

    board.min_attacker_of = [
        (j, i)
        for i, j in zip(
            min_white_attacker_of.flatten().astype(int).tolist(),
            min_black_attacker_of.flatten().astype(int).tolist()
        )
    ]

In [29]:
def _init_square_data(board):
    '''
    Calculates some basic information of the board and stores it in
    `board`.

    `board.piece_squares`:

        Each possible piece -- 8 pawns, 3 knights, 2 queens, etc. --
        and its square. If the piece isn't on the board, the square is
        set to `chess.MISSING_PIECE_SQUARE`. The length is constant
        regardless of the board because the number of possible
        pieces is constant.

        Pieces are grouped together in the same order as `chess.PIECES`
        -- 'P', 'N', 'B' ... 'p', 'n', 'b' ... -- but their squares are
        randomly permuted. As a result, the first 8 pieces are
        guaranteed to be 'P' but their squares random.
    '''
    # The squares of the pieces on the board.
    piece_squares = { piece : [] for piece in chess.PIECES }
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece is not None:
            piece_squares[piece.symbol()].append(square)

    # Pass `piece_squares` before adding the missing pieces' squares.
    # This is a bit ugly, I know.
    __init_attackers_and_scope(board, piece_squares)

    # Add the missing pieces and their squares,
    # `chess.MISSING_PIECE_SQUARE`.
    for piece in chess.PIECES:
        piece_squares[piece] += (
            [chess.MISSING_PIECE_SQUARE]
            * (chess.PIECE_CAPACITY[piece] - len(piece_squares[piece]))
        )

    # Set to `board.piece_squares` with the pieces ordered correctly
    # and the squares of each piece permuted.
    board.piece_squares = [
        (piece, square)
        for piece in chess.PIECES
        for square in np.random.permutation(piece_squares[piece]).tolist()
    ]

In [30]:
def __to_coord(square):
    '''
    The row-major coordinate of square. Used by `_piece_lists()`.
    Example: `__to_coord(chess.A1) == (0, 0)`.
            `__to_coord(chess.H8) == (7, 7)`.
    '''
    return (8 - square // 8 - 1, square % 8)

In [31]:
def __direction(from_square, to_square):
    '''
    The direction traveled in going from `from_square` to `to_square`.
    The value v returned yields the direction in <cos(v * pi / 4),
    sin(v * pi / 4)>.
    '''
    from_coord, to_coord = __to_coord(from_square), __to_coord(to_square)
    dx, dy = (to_coord[0] - from_coord[0], to_coord[1] - from_coord[1])
    dx, dy = dx // max(abs(dx), abs(dy)), dy // max(abs(dx), abs(dy))
    return {
        (0, 1) : 0,
        (-1, 1) : 1,
        (-1, 0) : 2,
        (-1, -1) : 3,
        (0, -1) : 4,
        (1, -1) : 5,
        (1, 0) : 6,
        (1, 1) : 7
    }[(dx, dy)]

In [32]:
def attack_and_defend_maps(board):
    '''
    The value of the lowest-valued attack and defender of each square;
    by default, 0.
    These maps are in the perspective of white.
    so the attack map is the value of the lowest-valued black piece attacking the square.
    and the defend map is the value of the lowest-valued white piece defending the square.

    For the following 4x4 board, R B . .
                                 . . b .
                                 q k . .
                                 . . . P,
    the attack map would be  5 3 5 3
                             5 5 6 0
                             6 3 6 3
                             3 5 6 0,
    and the defend map  4 4 0 0
                        3 0 3 0
                        4 0 1 3
                        0 0 0 0.

    Number of features contributed: 128. Sixty-four integers for each
    the attack and defend maps.
    '''

    attack_and_defend_maps_dict = dict()
    """
    example:
    attack_and_defend_maps_dict = {
        0_0_min_attacker: 5,
        0_0_min_defender: 4,
        1_0_min_attacker: 3,
        1_0_min_defender: 4,
        ...
    }
    """

    for square in chess.SQUARES:
        attack_and_defend_maps_dict[f'{square//8}_{square%8}_min_attacker'] = np.int8(board.min_attacker_of[square][chess.BLACK])
        attack_and_defend_maps_dict[f'{square//8}_{square%8}_min_defender'] = np.int8(board.min_attacker_of[square][chess.WHITE])


    #attack_and_defend_maps = [
    #    board.min_attacker_of[square][color]
    #    for color in (chess.BLACK, chess.WHITE)
    #    for square in chess.SQUARES
    #]

    return attack_and_defend_maps_dict

In [33]:
def sliding_pieces_mobility(board):
    '''
    How far each {white, black} {bishop, rook, queen} can slide in each
    legal direction.

    Number of features contributed: One `int` for each direction
    for each possible piece. 8 directions for each existing sliding piece.
    If a piece doesn't exist or is not a sliding piece, the value is -1 for all 8 directions.
    '''

    mobilities_dict = dict()
    """
    example:
    queen
    mobilities_dict = {
        0_white_D1: 5,
        0_white_D2: 2,
        0_white_D3: 3,
        0_white_D4: 1,
        0_white_D5: 0,
        0_white_D6: 1,
        0_white_D7: 1,
        0_white_D8: 2,
        0_black_D1: 3,
        ...
    }
    """

    current_piece_white = 0
    current_piece_black = 0
    for piece,square in board.piece_squares:
        if square != chess.MISSING_PIECE_SQUARE:
            number_of_directions = 0
            if piece in chess.SLIDING_PIECES:
                for scope_dir in board.sliding_piece_scopes[(piece, square)]:
                    mobilities_dict[f'{current_piece_white}_white_D{number_of_directions}' if board.piece_at(square).color == chess.WHITE else f'{current_piece_black}_black_D{number_of_directions}'  ] = np.int8(scope_dir)
                    number_of_directions += 1
                while number_of_directions < 8:
                    mobilities_dict[f'{current_piece_white}_white_D{number_of_directions}' if board.piece_at(square).color == chess.WHITE else f'{current_piece_black}_black_D{number_of_directions}'  ] = np.int8(-1)
                    number_of_directions += 1
        
            if board.piece_at(square).color == chess.WHITE:
                while number_of_directions < 8:
                    mobilities_dict[f'{current_piece_white}_white_D{number_of_directions}'] = np.int8(-1)
                    number_of_directions += 1
                current_piece_white += 1
            else:
                while number_of_directions < 8:
                    mobilities_dict[f'{current_piece_black}_black_D{number_of_directions}'] = np.int8(-1)
                    number_of_directions += 1
                current_piece_black += 1

    while current_piece_white < 16:
        for number_of_directions in range(8):
            mobilities_dict[f'{current_piece_white}_white_D{number_of_directions}'] = np.int8(-1)
        current_piece_white += 1
    
    while current_piece_black < 16:
        for number_of_directions in range(8):
            mobilities_dict[f'{current_piece_black}_black_D{number_of_directions}'] = np.int8(-1)
        current_piece_black += 1

    #mobilities = [
    #    scope_dir
    #    for piece, square in board.piece_squares
    #    if piece in chess.SLIDING_PIECES
    #    for scope_dir in (
    #        board.sliding_piece_scopes[(piece, square)]
    #        if square != chess.MISSING_PIECE_SQUARE
    #        else [-1] * len(chess.PIECE_MOVEMENTS[piece])
    #    )
    #]

    return mobilities_dict

In [34]:
def piece_lists(board):
    '''
    For each possible piece (*):
    1. Its type. By default, -1. (This is a dummy value.) (Can be P, N, B, R, Q, K, p, n, b, r, q, k)
    2. Its row-major, zero-indexed coordinate. By default, (-1, -1).
    3. Whether the piece is on the board.
    4. The values of the minimum-valued {attacker, defender} of the
       piece stored in a tuple. By default, (-1, -1).

    Number of features contributed: 16 * (1 + 2 + 1 + 2) * 2 = 192
    6 features for each one of the 16 pieces of each color.
    '''

    piece_lists_dict = dict()
    """
    example:
    pawn
    piece_lists = {
        0_white_type : 'P',
        0_white_row : 4,
        0_white_col : 2,
        0_white : True,
        0_white_min_attacker : 0,
        0_white_min_defender : 2, 
    
    """
    current_piece_white = 0
    current_piece_black = 0
    for piece, square in board.piece_squares:
        if square != chess.MISSING_PIECE_SQUARE:
            if board.piece_at(square).color == chess.WHITE:
                feature_key = str(current_piece_white) + '_white' 
                piece_lists_dict[feature_key + '_min_attacker'], piece_lists_dict[feature_key + '_min_defender'] = np.int8(board.min_attacker_of[square])
                #update current piece because the current is already in feature_key
                current_piece_white += 1
            else:
                feature_key = str(current_piece_black) + '_black'
                piece_lists_dict[feature_key + '_min_attacker'], piece_lists_dict[feature_key + '_min_defender'] = np.int8(tuple(reversed(board.min_attacker_of[square])))
                #update current piece because the current is already in feature_key
                current_piece_black += 1

            piece_lists_dict[feature_key + '_type'] = piece
            piece_lists_dict[feature_key + '_row'], piece_lists_dict[feature_key + '_col'] = np.int8(__to_coord(square))
            piece_lists_dict[feature_key] = True

    while current_piece_black < 16:
        feature_key = str(current_piece_black) + '_black'
        piece_lists_dict[feature_key + '_type'] = -1
        piece_lists_dict[feature_key + '_row'], piece_lists_dict[feature_key + '_col'] = np.int8(-1), np.int8(-1)
        piece_lists_dict[feature_key] = False
        piece_lists_dict[feature_key + '_min_attacker'], piece_lists_dict[feature_key + '_min_defender'] = np.int8(-1), np.int8(-1)
        current_piece_black += 1
    
    while current_piece_white < 16:
        feature_key = str(current_piece_white) + '_white'
        piece_lists_dict[feature_key + '_type'] = -1
        piece_lists_dict[feature_key + '_row'], piece_lists_dict[feature_key + '_col'] = np.int8(-1), np.int8(-1)
        piece_lists_dict[feature_key] = False
        piece_lists_dict[feature_key + '_min_attacker'], piece_lists_dict[feature_key + '_min_defender'] = np.int8(-1), np.int8(-1)
        current_piece_white += 1


    #piece_lists = list(
    #    sum(
    #        [
    #            (-1, -1, False, -1, -1)
    #            if square == chess.MISSING_PIECE_SQUARE
    #            else (
    #                __to_coord(square)
    #                + (True, )
    #                + (
    #                    board.min_attacker_of[square]
    #                    if board.piece_at(square).color == chess.WHITE
    #                    else tuple(reversed(board.min_attacker_of[square]))
    #                )
    #            )
    #            for piece, square in board.piece_squares
    #        ],
    #        tuple()
    #    )
    #)

    return piece_lists_dict

In [35]:
def count_pieces(fen: str) -> dict:
    '''
    The number of each piece on the board.

    Number of features contributed: 12. Six types of pieces for each
    side.
    '''

    piece_count = {'K': 0, 'Q': 0, 'R': 0, 'B': 0, 'N': 0, 'P': 0, 'k': 0, 'q': 0, 'r': 0, 'b': 0, 'n': 0, 'p': 0}

    fen_parts = fen.split()
    board_state = fen_parts[0]
    rows = board_state.split('/')
    
    # Define a lookup table to convert a piece character to its corresponding count key
    piece_lookup = {
        'K': 'K',
        'Q': 'Q',
        'R': 'R',
        'B': 'B',
        'N': 'N',
        'P': 'P',
        'k': 'k',
        'q': 'q',
        'r': 'r',
        'b': 'b',
        'n': 'n',
        'p': 'p'
    }
    
    for i, row in enumerate(rows):
        rank = 8 - i
        file = 0
        for chr in row:
            if chr.isnumeric():
                file += int(chr)
            else:
                piece_count[piece_lookup[chr]] += 1
                file += 1

    for piece in piece_count:
        piece_count[piece] = np.int8(piece_count[piece])

    return piece_count

In [36]:
def side_to_move(board: chess.Board):
    '''
    True if it's White turn to move.

    Number of features contributed: 1.
    '''
    side_to_move = board.turn
    return side_to_move

In [37]:
def castling_rights(board:chess.Board):
    '''
    Castling rights for both players.
    True if {White,BLack} the player can castle in {kingside,queenside}.

    Number of features contributed: 4.
    '''

    castling_rights = [board.has_kingside_castling_rights(chess.WHITE),
                          board.has_queenside_castling_rights(chess.WHITE),
                          board.has_kingside_castling_rights(chess.BLACK),
                          board.has_queenside_castling_rights(chess.BLACK)]
    
    return castling_rights

In [38]:
def extract_features_from_board(row: pd.Series) -> pd.Series:

    fen_board = row['Board']
    
    board = chess.Board(fen_board)

    _init_square_data(board)
    
    pieces_dict = count_pieces(fen_board)


    row['SideToMove'] = side_to_move(board)

    row['white_kingside_castling_rights'], row['white_queenside_castling_rights'], row['black_kingside_castling_rights'], row['black_queenside_castling_rights'] = castling_rights(board)

    piece_lists_dict = piece_lists(board)

    
    sliding_pieces_mobility_dict = sliding_pieces_mobility(board)

    
    attack_and_defend_maps_dict = attack_and_defend_maps(board)

    
    merged_dict = pieces_dict | piece_lists_dict | sliding_pieces_mobility_dict | attack_and_defend_maps_dict
    row.update(merged_dict)
    
    #if row.name == 0 :
        
        #for key, value in merged_dict.items():
        #    if type(value) != np.int8:
        #        print(key)  
        

        # print pieces dict values types
        #for key, value in pieces_dict.items():
        #    if type(value) != np.int8:
        #        print(key)        

        # print piece_lists_dict values types
        #for key, value in piece_lists_dict.items():
        #    if type(value) == np.int8:
        #        print(str(key) + " " + str(value))
        
        # print sliding_pieces_mobility_dict values types
        #for key, value in sliding_pieces_mobility_dict.items():
        #    if type(value) != np.int8:
        #        print(key)
        
        # print attack_and_defend_maps_dict values types
        #for key, value in attack_and_defend_maps_dict.items():
        #    if type(value) != np.int8:
        #        print(key)

        #print(row)
        #print(len(piece_lists(board))/5)
        #print(fen_board)
        #print(len(sliding_pieces_mobility(board))/8)
        #print(attack_and_defend_maps(board))
    return row

In [39]:
n = 100000  #chunk row size
#list_df = [df[i:i+n] for i in range(0,df.shape[0],n)]

In [40]:
for i in range(0,df.shape[0],n):
    print(i)
    df[i:i+n] = df[i:i+n].swifter.apply(extract_features_from_board, axis=1)

0


Pandas Apply: 100%|██████████| 45419/45419 [03:01<00:00, 249.70it/s]


In [41]:
df.head()

Unnamed: 0,Id,MoveNumber,WhiteElo,BlackElo,Result,Board,K,Q,R,B,...,7_3_min_attacker,7_3_min_defender,7_4_min_attacker,7_4_min_defender,7_5_min_attacker,7_5_min_defender,7_6_min_attacker,7_6_min_defender,7_7_min_attacker,7_7_min_defender
0,https://lichess.org/hh07xjwa-32,32,1617,1614,1-0,r2r2k1/pp3ppp/8/8/8/2N2nP1/PP3P1P/R1B2RK1 w - ...,1,0,2,1,...,4,2,2,4,0,6,2,4,0,6
1,https://lichess.org/14zvqvvn-83,83,1514,1862,0-1,4r1k1/5pp1/1R5p/8/7P/1pB3P1/5PK1/1r6 b - - 1 42,1,0,1,1,...,4,0,4,3,4,6,4,6,4,6
2,https://lichess.org/5zd3wjlp-56,56,1786,1714,1-0,4R3/5ppk/p4b1p/1p3q2/2nB4/Q7/PPP2P1P/1K6 w - -...,1,1,1,1,...,0,0,0,4,0,0,0,0,0,0
3,https://lichess.org/9biaj2oe-88,88,1339,1665,1-0,8/3r2Q1/4kP2/pp5R/P7/8/6PP/6K1 w - - 9 45,1,1,1,0,...,4,0,0,0,0,6,0,0,0,6
4,https://lichess.org/f3eb85pu-48,48,1515,1617,0-1,r3k3/5p1p/p3p3/1pb1q3/3p4/1B1P2r1/PPPN2K1/R7 w...,1,0,1,1,...,0,4,5,4,0,2,0,4,0,4


In [42]:
#downcast to int8 only on int64 columns
int_columns = df.select_dtypes(include=['int64']).columns
df[int_columns] = df[int_columns].apply(pd.to_numeric, downcast='integer')

In [44]:
df.info()
#df.dtypes.to_csv('dtypes.csv')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45419 entries, 0 to 45418
Columns: 599 entries, Id to 7_7_min_defender
dtypes: bool(37), int16(3), int8(524), object(35)
memory usage: 36.7+ MB


In [None]:
#df.head()
#show dataframe only with type column
#df[df.columns[df.columns.str.contains('type')]].head()
#df.info()

df.to_pickle('../data/5_1_extra_features_testes.pkl')
df.to_csv('../data/5_1_extra_features_testes.csv')

In [None]:
# show only the columns K,k,Q,q.... in df
#df[['K', 'Q', 'R', 'B', 'N', 'P', 'k', 'q', 'r', 'b', 'n', 'p',]].where(df['b'] == 3).count()

In [None]:
#len(df.columns.to_list())