In [1]:
import pandas as pd
import numpy as np
import re
from datetime import datetime, timedelta
import chess.pgn
import io
import csv

### Formatting Data
Pandas dataframe. Currently, data is stored as a list of strings.

Game Dataframe:
- Player names and elos
- PGN
- Opening

Move Dataframe:
- Player name and elo
- Who is to move
- White time remaining
- Black time remaining
- White total material
- Black total material
- Current position
- Number of reasonable engine recommended moves (maybe?)
- Position complexity
- Move time (response)

In [2]:
games = pd.DataFrame(columns=["White Name", "Black Name", "ECO Opening", "White Elo", "Black Elo", "PGN", "Game Length"])

with open("games_condensed.txt", "r") as file:
    for line in file:
        line = line.split(",")

        # Don't include wacky variants like Chess960
        if "Variant" in line[3]:
            continue
        
        # Remove unnecessary features
        indices=[2, 3, 4, 6, 7, 8, 11, 12, 13, 14, 15, 16, 17, 19]  # Manually found
        for i in sorted(indices, reverse=True):
            del line[i]

        # Parse strings to get valuable information
        pattern = r'\\"(.*?)\\"'  # This string pattern encloses all important info
        for i in range(0, 5):
            match = re.search(pattern, line[i])
            if match:
                line[i] = match.group(1)
        
        pattern = r'(\d+)\. '
        match = re.findall(pattern, line[5])
        line.append(int(match[-1]))
        
        if line[6] >= 5 and line[6] <= 80:
            games.loc[games.shape[0]] = line

In [3]:
games.head()

Unnamed: 0,White Name,Black Name,ECO Opening,White Elo,Black Elo,PGN,Game Length
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12
1,colinsong1,Octopus6666,B22,1596,1600,"""1. e4 {[%clk 0:03:00]} 1... c5 {[%clk 0:03:0...",36
2,TimeFish_576,kimmmanhhh,A21,1564,1522,"""1. c4 {[%clk 0:03:00]} 1... e5 {[%clk 0:03:0...",64
3,TimeFish_576,Tavusheci,A15,1548,1563,"""1. c4 {[%clk 0:03:00]} 1... c5 {[%clk 0:02:5...",27
4,Octopus6666,S11chandru,D02,1625,1607,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",42


## PGN to FEN Files

In [6]:
PGN_lst = games['PGN']

In [7]:
games['FEN'] = 0
games.head()

Unnamed: 0,White Name,Black Name,ECO Opening,White Elo,Black Elo,PGN,Game Length,FEN
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,0
1,colinsong1,Octopus6666,B22,1596,1600,"""1. e4 {[%clk 0:03:00]} 1... c5 {[%clk 0:03:0...",36,0
2,TimeFish_576,kimmmanhhh,A21,1564,1522,"""1. c4 {[%clk 0:03:00]} 1... e5 {[%clk 0:03:0...",64,0
3,TimeFish_576,Tavusheci,A15,1548,1563,"""1. c4 {[%clk 0:03:00]} 1... c5 {[%clk 0:02:5...",27,0
4,Octopus6666,S11chandru,D02,1625,1607,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",42,0


## This chunk adds a complete FEN_lst for every game.

In [10]:
for i in range(len(PGN_lst)):
    this_game = chess.pgn.read_game(io.StringIO(PGN_lst[i]))
    
    # For each game, iterate through all moves and play them on a board.
    fen_lst = []
    board = this_game.board()
    for move in this_game.mainline_moves():
        board.push(move)
        fen_lst.append(board.fen())
    games['FEN'][i] = fen_lst

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games['FEN'][i] = fen_lst
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [11]:
games['FEN'][0]

['rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1',
 'rnbqkbnr/pppppp1p/6p1/8/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 2',
 'rnbqkbnr/pppppp1p/6p1/8/4P3/2N5/PPPP1PPP/R1BQKBNR b KQkq - 1 2',
 'rnbqk1nr/ppppppbp/6p1/8/4P3/2N5/PPPP1PPP/R1BQKBNR w KQkq - 2 3',
 'rnbqk1nr/ppppppbp/6p1/8/2B1P3/2N5/PPPP1PPP/R1BQK1NR b KQkq - 3 3',
 'rnbqk1nr/pppp1pbp/4p1p1/8/2B1P3/2N5/PPPP1PPP/R1BQK1NR w KQkq - 0 4',
 'rnbqk1nr/pppp1pbp/4p1p1/8/2B1P3/2NP4/PPP2PPP/R1BQK1NR b KQkq - 0 4',
 'rnbqk2r/ppppnpbp/4p1p1/8/2B1P3/2NP4/PPP2PPP/R1BQK1NR w KQkq - 1 5',
 'rnbqk2r/ppppnpbp/4p1p1/8/2B1P3/2NPB3/PPP2PPP/R2QK1NR b KQkq - 2 5',
 'rnbq1rk1/ppppnpbp/4p1p1/8/2B1P3/2NPB3/PPP2PPP/R2QK1NR w KQ - 3 6',
 'rnbq1rk1/ppppnpbp/4p1p1/8/2B1P3/2NPB3/PPPQ1PPP/R3K1NR b KQ - 4 6',
 'r1bq1rk1/ppppnpbp/2n1p1p1/8/2B1P3/2NPB3/PPPQ1PPP/R3K1NR w KQ - 5 7',
 'r1bq1rk1/ppppnpbp/2n1p1p1/8/2B1P3/2NPB3/PPPQ1PPP/2KR2NR b - - 6 7',
 'r1bq1rk1/ppp1npbp/2n1p1p1/3p4/2B1P3/2NPB3/PPPQ1PPP/2KR2NR w - - 0 8',
 'r1bq1rk1/ppp1npbp/2n1p1p1/3P4/2B5/

In [12]:
# Use the explode() method to split the FEN list into separate rows
new_games = games.explode('FEN')
new_games

Unnamed: 0,White Name,Black Name,ECO Opening,White Elo,Black Elo,PGN,Game Length,FEN
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqkbnr/pppppp1p/6p1/8/4P3/8/PPPP1PPP/RNBQKBN...
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqkbnr/pppppp1p/6p1/8/4P3/2N5/PPPP1PPP/R1BQK...
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqk1nr/ppppppbp/6p1/8/4P3/2N5/PPPP1PPP/R1BQK...
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqk1nr/ppppppbp/6p1/8/2B1P3/2N5/PPPP1PPP/R1B...
...,...,...,...,...,...,...,...,...
4573,vinicius-valle,bayesschack,B23,1557,1574,"""1. e4 {[%clk 0:03:00]} 1... c5 {[%clk 0:02:5...",24,r2qr1k1/3b2b1/4pnp1/2pP2Np/7P/pP3Q2/P1B2PP1/BR...
4573,vinicius-valle,bayesschack,B23,1557,1574,"""1. e4 {[%clk 0:03:00]} 1... c5 {[%clk 0:02:5...",24,r2qr1k1/3b2b1/4p1p1/2pn2Np/7P/pP3Q2/P1B2PP1/BR...
4573,vinicius-valle,bayesschack,B23,1557,1574,"""1. e4 {[%clk 0:03:00]} 1... c5 {[%clk 0:02:5...",24,r2qr1k1/3b2b1/4p1B1/2pn2Np/7P/pP3Q2/P4PP1/BR3R...
4573,vinicius-valle,bayesschack,B23,1557,1574,"""1. e4 {[%clk 0:03:00]} 1... c5 {[%clk 0:02:5...",24,r2q1rk1/3b2b1/4p1B1/2pn2Np/7P/pP3Q2/P4PP1/BR3R...


In [13]:
new_games["Game"] = new_games.index
new_games

Unnamed: 0,White Name,Black Name,ECO Opening,White Elo,Black Elo,PGN,Game Length,FEN,Game
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,0
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqkbnr/pppppp1p/6p1/8/4P3/8/PPPP1PPP/RNBQKBN...,0
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqkbnr/pppppp1p/6p1/8/4P3/2N5/PPPP1PPP/R1BQK...,0
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqk1nr/ppppppbp/6p1/8/4P3/2N5/PPPP1PPP/R1BQK...,0
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqk1nr/ppppppbp/6p1/8/2B1P3/2N5/PPPP1PPP/R1B...,0
...,...,...,...,...,...,...,...,...,...
4573,vinicius-valle,bayesschack,B23,1557,1574,"""1. e4 {[%clk 0:03:00]} 1... c5 {[%clk 0:02:5...",24,r2qr1k1/3b2b1/4pnp1/2pP2Np/7P/pP3Q2/P1B2PP1/BR...,4573
4573,vinicius-valle,bayesschack,B23,1557,1574,"""1. e4 {[%clk 0:03:00]} 1... c5 {[%clk 0:02:5...",24,r2qr1k1/3b2b1/4p1p1/2pn2Np/7P/pP3Q2/P1B2PP1/BR...,4573
4573,vinicius-valle,bayesschack,B23,1557,1574,"""1. e4 {[%clk 0:03:00]} 1... c5 {[%clk 0:02:5...",24,r2qr1k1/3b2b1/4p1B1/2pn2Np/7P/pP3Q2/P4PP1/BR3R...,4573
4573,vinicius-valle,bayesschack,B23,1557,1574,"""1. e4 {[%clk 0:03:00]} 1... c5 {[%clk 0:02:5...",24,r2q1rk1/3b2b1/4p1B1/2pn2Np/7P/pP3Q2/P4PP1/BR3R...,4573


In [2]:
new_games = pd.read_csv("new_games.csv")
new_games.head()

Unnamed: 0,White Name,Black Name,ECO Opening,White Elo,Black Elo,PGN,Game Length,FEN,Game
0,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,0
1,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqkbnr/pppppp1p/6p1/8/4P3/8/PPPP1PPP/RNBQKBN...,0
2,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqkbnr/pppppp1p/6p1/8/4P3/2N5/PPPP1PPP/R1BQK...,0
3,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqk1nr/ppppppbp/6p1/8/4P3/2N5/PPPP1PPP/R1BQK...,0
4,colinsong1,TimeFish_576,B06,1604,1609,"""1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:5...",12,rnbqk1nr/ppppppbp/6p1/8/2B1P3/2N5/PPPP1PPP/R1B...,0


In [45]:
test_pgn = new_games['PGN'][0]
test_pgn # move {clk after}

' "1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:59.9]} 2. Nc3 {[%clk 0:02:59]} 2... Bg7 {[%clk 0:02:59.6]} 3. Bc4 {[%clk 0:02:58.3]} 3... e6 {[%clk 0:02:59.3]} 4. d3 {[%clk 0:02:57.8]} 4... Ne7 {[%clk 0:02:59.1]} 5. Be3 {[%clk 0:02:57.2]} 5... O-O {[%clk 0:02:58.8]} 6. Qd2 {[%clk 0:02:56.7]} 6... Nbc6 {[%clk 0:02:58.5]} 7. O-O-O {[%clk 0:02:56.3]} 7... d5 {[%clk 0:02:58.2]} 8. exd5 {[%clk 0:02:55]} 8... exd5 {[%clk 0:02:57.5]} 9. Bb5 {[%clk 0:02:51.1]} 9... d4 {[%clk 0:02:56.9]} 10. Bxc6 {[%clk 0:02:50.5]} 10... Nxc6 {[%clk 0:02:55.8]} 11. Bh6 {[%clk 0:02:47.6]} 11... dxc3 {[%clk 0:02:54.5]} 12. b3 {[%clk 0:02:46.6]} 12... cxd2+ {[%clk 0:02:53.7]} 0-1"'

In [83]:
list_of_pgns = new_games['PGN'].unique()
len(list_of_pgns)

times_to_move = [] # the list of deltas.
for i in range(len(list_of_pgns)):
    delta_lst = get_game_time(list_of_pgns[i])
    times_to_move.append(delta_lst)
len(times_to_move)

3934

In [90]:
times_to_move

[[0,
  100,
  1000,
  300,
  700,
  300,
  500,
  200,
  600,
  300,
  500,
  300,
  400,
  300,
  1300,
  700,
  3900,
  600,
  600,
  1100,
  2900,
  1300,
  1000,
  800],
 [0,
  0,
  1100,
  100,
  1000,
  100,
  600,
  100,
  1400,
  500,
  2400,
  2100,
  1900,
  1600,
  100,
  2900,
  2000,
  1600,
  900,
  2100,
  1900,
  3500,
  1400,
  2100,
  4400,
  2500,
  1600,
  15700,
  7700,
  3600,
  3200,
  13400,
  3300,
  5000,
  6000,
  13100,
  2100,
  4100,
  2000,
  100,
  9200,
  1200,
  2200,
  2000,
  13700,
  11500,
  1700,
  9800,
  11200,
  2100,
  1300,
  18700,
  1900,
  4900,
  1100,
  6600,
  4500,
  6200,
  1800,
  300,
  9200,
  2000,
  7000,
  2200,
  600,
  2000,
  2700,
  4000,
  1900,
  13500,
  5600,
  1200],
 [0,
  0,
  300,
  800,
  500,
  500,
  600,
  1600,
  1200,
  2000,
  1100,
  1600,
  600,
  1200,
  500,
  3200,
  2100,
  3400,
  5000,
  6000,
  3400,
  6400,
  2300,
  1600,
  1600,
  2900,
  2200,
  6000,
  2300,
  9000,
  6100,
  2900,
  1300,
  8300

In [91]:
times_to_move[0]
flat_list2 = sum(times_to_move, [])
flat_list2

[0,
 100,
 1000,
 300,
 700,
 300,
 500,
 200,
 600,
 300,
 500,
 300,
 400,
 300,
 1300,
 700,
 3900,
 600,
 600,
 1100,
 2900,
 1300,
 1000,
 800,
 0,
 0,
 1100,
 100,
 1000,
 100,
 600,
 100,
 1400,
 500,
 2400,
 2100,
 1900,
 1600,
 100,
 2900,
 2000,
 1600,
 900,
 2100,
 1900,
 3500,
 1400,
 2100,
 4400,
 2500,
 1600,
 15700,
 7700,
 3600,
 3200,
 13400,
 3300,
 5000,
 6000,
 13100,
 2100,
 4100,
 2000,
 100,
 9200,
 1200,
 2200,
 2000,
 13700,
 11500,
 1700,
 9800,
 11200,
 2100,
 1300,
 18700,
 1900,
 4900,
 1100,
 6600,
 4500,
 6200,
 1800,
 300,
 9200,
 2000,
 7000,
 2200,
 600,
 2000,
 2700,
 4000,
 1900,
 13500,
 5600,
 1200,
 0,
 0,
 300,
 800,
 500,
 500,
 600,
 1600,
 1200,
 2000,
 1100,
 1600,
 600,
 1200,
 500,
 3200,
 2100,
 3400,
 5000,
 6000,
 3400,
 6400,
 2300,
 1600,
 1600,
 2900,
 2200,
 6000,
 2300,
 9000,
 6100,
 2900,
 1300,
 8300,
 1200,
 9200,
 4100,
 11500,
 1400,
 4100,
 3400,
 3000,
 2400,
 9400,
 10200,
 6700,
 800,
 5200,
 8200,
 4300,
 1700,
 1000,
 48

In [16]:
# Group by Game
# games_df = new_games.groupby('Game')
new_games.head()
grouped_by_game = new_games.groupby('Game')

In [26]:
grouped_by_game.count().head(100) # compare the Game length to the number of times per game.

Unnamed: 0_level_0,White Name,Black Name,ECO Opening,White Elo,Black Elo,PGN,Game Length,FEN
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,24,24,24,24,24,24,24,24
1,72,72,72,72,72,72,72,72
2,128,128,128,128,128,128,128,128
3,54,54,54,54,54,54,54,54
4,84,84,84,84,84,84,84,84
5,97,97,97,97,97,97,97,97
6,65,65,65,65,65,65,65,65
7,81,81,81,81,81,81,81,81
8,52,52,52,52,52,52,52,52
9,66,66,66,66,66,66,66,66


In [88]:
for e in times_to_move:
    print(len(e)) # some of the moves, such as in game 5, do not have corresponding delta values. 
    # therefore, you need a way to skip some rows.

24
72
128
54
84
97
65
81
52
66
46
25
61
59
39
75
83
80
81
71
103
135
41
54
68
39
43
78
41
69
39
15
54
146
73
13
17
48
48
91
70
72
80
77
26
73
88
111
83
50
36
126
85
59
94
61
77
25
39
77
36
72
92
155
110
21
54
94
58
114
93
75
90
117
47
35
112
43
44
38
97
17
48
105
76
40
108
123
139
140
57
62
40
107
61
40
44
35
65
70
51
42
87
105
63
115
32
67
39
133
79
51
56
43
95
44
37
93
109
43
66
132
92
97
94
61
60
119
62
65
78
104
41
69
53
107
118
63
71
120
149
27
75
88
91
85
46
18
65
128
79
68
75
38
129
109
66
45
35
23
43
51
115
64
40
55
51
47
75
128
67
81
101
67
84
106
70
91
101
115
123
104
156
68
59
50
65
78
88
96
70
51
42
102
142
113
64
38
32
63
45
98
77
109
90
108
69
74
112
62
49
82
49
119
75
48
151
130
66
51
79
39
126
63
87
58
65
70
100
155
59
78
63
82
61
32
139
83
53
74
60
54
77
102
84
65
50
122
40
36
54
109
41
64
116
120
46
103
68
74
108
97
79
66
90
81
39
47
55
60
42
37
61
70
19
68
37
80
97
41
58
60
49
33
52
151
103
23
48
82
54
61
141
40
40
31
78
71
64
64
47
46
35
24
38
61
94
104
61
24
63
39


In [41]:
new_games[new_games['Game'] == 5]

Unnamed: 0,White Name,Black Name,ECO Opening,White Elo,Black Elo,PGN,Game Length,FEN,Game
362,Octopus6666,Basel,D02,1641,1565,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",49,rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR ...,5
363,Octopus6666,Basel,D02,1641,1565,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",49,rnbqkbnr/ppp1pppp/8/3p4/3P4/8/PPP1PPPP/RNBQKBN...,5
364,Octopus6666,Basel,D02,1641,1565,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",49,rnbqkbnr/ppp1pppp/8/3p4/3P4/5N2/PPP1PPPP/RNBQK...,5
365,Octopus6666,Basel,D02,1641,1565,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",49,rn1qkbnr/ppp1pppp/8/3p1b2/3P4/5N2/PPP1PPPP/RNB...,5
366,Octopus6666,Basel,D02,1641,1565,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",49,rn1qkbnr/ppp1pppp/8/3p1b2/3P1B2/5N2/PPP1PPPP/R...,5
367,Octopus6666,Basel,D02,1641,1565,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",49,rn1qkb1r/ppp1pppp/5n2/3p1b2/3P1B2/5N2/PPP1PPPP...,5
368,Octopus6666,Basel,D02,1641,1565,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",49,rn1qkb1r/ppp1pppp/5n2/3p1b2/3P1B2/4PN2/PPP2PPP...,5
369,Octopus6666,Basel,D02,1641,1565,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",49,rn1qkb1r/ppp2ppp/4pn2/3p1b2/3P1B2/4PN2/PPP2PPP...,5
370,Octopus6666,Basel,D02,1641,1565,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",49,rn1qkb1r/ppp2ppp/4pn2/3p1b2/3P1B2/4PN2/PPPN1PP...,5
371,Octopus6666,Basel,D02,1641,1565,"""1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:0...",49,rn1qk2r/ppp2ppp/3bpn2/3p1b2/3P1B2/4PN2/PPPN1PP...,5


In [46]:
new_games['PGN'][373] # 49 moves, the last move made by one player.

' "1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:00]} 2. Nf3 {[%clk 0:02:59.9]} 2... Bf5 {[%clk 0:02:59.6]} 3. Bf4 {[%clk 0:02:59.3]} 3... Nf6 {[%clk 0:02:59.3]} 4. e3 {[%clk 0:02:59.2]} 4... e6 {[%clk 0:02:58.9]} 5. Nbd2 {[%clk 0:02:58.1]} 5... Bd6 {[%clk 0:02:58.1]} 6. Ne5 {[%clk 0:02:51.2]} 6... Nbd7 {[%clk 0:02:55.4]} 7. h3 {[%clk 0:02:46.3]} 7... Ne4 {[%clk 0:02:53]} 8. Be2 {[%clk 0:02:45.3]} 8... f6 {[%clk 0:02:50.8]} 9. Nxd7 {[%clk 0:02:44]} 9... Qxd7 {[%clk 0:02:49.5]} 10. Nxe4 {[%clk 0:02:42.2]} 10... Bxf4 {[%clk 0:02:43.2]} 11. exf4 {[%clk 0:02:39.2]} 11... Bxe4 {[%clk 0:02:39]} 12. O-O {[%clk 0:02:29]} 12... Qc6 {[%clk 0:02:30.1]} 13. c3 {[%clk 0:02:27.3]} 13... Qd6 {[%clk 0:02:25.5]} 14. Qd2 {[%clk 0:02:24.2]} 14... c5 {[%clk 0:02:21.8]} 15. dxc5 {[%clk 0:02:22.6]} 15... Qxc5 {[%clk 0:02:20.7]} 16. b4 {[%clk 0:02:17]} 16... Qb6 {[%clk 0:02:19]} 17. Rac1 {[%clk 0:02:12]} 17... Rc8 {[%clk 0:02:17.2]} 18. a4 {[%clk 0:02:10.6]} 18... O-O {[%clk 0:02:14.4]} 19. Rfd1 {[%clk 0:02:03.

In [47]:
new_games['PGN'][0]

' "1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:59.9]} 2. Nc3 {[%clk 0:02:59]} 2... Bg7 {[%clk 0:02:59.6]} 3. Bc4 {[%clk 0:02:58.3]} 3... e6 {[%clk 0:02:59.3]} 4. d3 {[%clk 0:02:57.8]} 4... Ne7 {[%clk 0:02:59.1]} 5. Be3 {[%clk 0:02:57.2]} 5... O-O {[%clk 0:02:58.8]} 6. Qd2 {[%clk 0:02:56.7]} 6... Nbc6 {[%clk 0:02:58.5]} 7. O-O-O {[%clk 0:02:56.3]} 7... d5 {[%clk 0:02:58.2]} 8. exd5 {[%clk 0:02:55]} 8... exd5 {[%clk 0:02:57.5]} 9. Bb5 {[%clk 0:02:51.1]} 9... d4 {[%clk 0:02:56.9]} 10. Bxc6 {[%clk 0:02:50.5]} 10... Nxc6 {[%clk 0:02:55.8]} 11. Bh6 {[%clk 0:02:47.6]} 11... dxc3 {[%clk 0:02:54.5]} 12. b3 {[%clk 0:02:46.6]} 12... cxd2+ {[%clk 0:02:53.7]} 0-1"'

In [4]:
# (player 1 delta move times)
# move 1 time = 3:00 - playera[0], 3:00 - playerb[0]
# move 2 time = clk_1 - clk_2 # 3:00 - 2:59

In [5]:
import re

string = '"1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:59.9]} 2. Nc3 {[%clk 0:02:59]} 2... Bg7 {[%clk 0:02:59.6]} 3. Bc4 {[%clk 0:02:58.3]} 3... e6 {[%clk 0:02:59.3]} 4. d3 {[%clk 0:02:57.8]} 4... Ne7 {[%clk 0:02:59.1]} 5. Be3 {[%clk 0:02:57.2]} 5... O-O {[%clk 0:02:58.8]} 6. Qd2 {[%clk 0:02:56.7]} 6... Nbc6 {[%clk 0:02:58.5]} 7. O-O-O {[%clk 0:02:56.3]} 7... d5 {[%clk 0:02:58.2]} 8. exd5 {[%clk 0:02:55]} 8... exd5 {[%clk 0:02:57.5]} 9. Bb5 {[%clk 0:02:51.1]} 9... d4 {[%clk 0:02:56.9]} 10. Bxc6 {[%clk 0:02:50.5]} 10... Nxc6 {[%clk 0:02:55.8]} 11. Bh6 {[%clk 0:02:47.6]} 11... dxc3 {[%clk 0:02:54.5]} 12. b3 {[%clk 0:02:46.6]} 12... cxd2+ {[%clk 0:02:53.7]} 0-1"'

pattern = r'\[%clk\s+(\d+:\d+:\d+(?:\.\d+)?)\]'

matches = re.findall(pattern, string)

matches = matches
print(matches)

playera = matches[::2]
print(playera)
print(len(playera))
playerb = matches[1::2]
print(playerb)
print(len(playerb))
# subtract the strings


['0:03:00', '0:02:59.9', '0:02:59', '0:02:59.6', '0:02:58.3', '0:02:59.3', '0:02:57.8', '0:02:59.1', '0:02:57.2', '0:02:58.8', '0:02:56.7', '0:02:58.5', '0:02:56.3', '0:02:58.2', '0:02:55', '0:02:57.5', '0:02:51.1', '0:02:56.9', '0:02:50.5', '0:02:55.8', '0:02:47.6', '0:02:54.5', '0:02:46.6', '0:02:53.7']
['0:03:00', '0:02:59', '0:02:58.3', '0:02:57.8', '0:02:57.2', '0:02:56.7', '0:02:56.3', '0:02:55', '0:02:51.1', '0:02:50.5', '0:02:47.6', '0:02:46.6']
12
['0:02:59.9', '0:02:59.6', '0:02:59.3', '0:02:59.1', '0:02:58.8', '0:02:58.5', '0:02:58.2', '0:02:57.5', '0:02:56.9', '0:02:55.8', '0:02:54.5', '0:02:53.7']
12


In [6]:
len(new_games[new_games['Game'] == 0]) # 24
print(len(times_to_move[0]))
len(new_games[new_games['Game'] == 1]) # 72
print(len(times_to_move[1]))
len(new_games[new_games['Game'] == 2]) # 128
len(times_to_move[2])

NameError: name 'times_to_move' is not defined

In [81]:
def get_player_time(list_of_clk_times):
    ### Returns the \delta times in a list
    time_format = '%H:%M:%S.%f'
    time_list = []
    
    initial = datetime.strptime('0:03:00.0', time_format) # append the first time (initial - first_item)
    
    first_item = list_of_clk_times[0]
    if "." not in first_item:
            first_item += ".0" # adding in milliseconds
    
    time_0 = datetime.strptime(first_item, time_format)
    time_diff = initial - time_0
    microseconds_int = int(time_diff.total_seconds() * 1000)
    time_list.append(microseconds_int)
    
    for i in range(len(list_of_clk_times)-1): # '0:03:00' - '0:02:59.9'
        time_str_1 = list_of_clk_times[i] # for each pair of strings
        time_str_2 = list_of_clk_times[i+1]

        if "." not in time_str_1:
            time_str_1 += ".0" # adding in milliseconds
        if "." not in time_str_2:
            time_str_2 += ".0" 

        
        time_1 = datetime.strptime(time_str_1, time_format)
        time_2 = datetime.strptime(time_str_2, time_format)

        time_diff = time_1 - time_2
        microseconds_int = int(time_diff.total_seconds() * 1000)
        time_list.append(microseconds_int)

    return time_list

In [82]:
def get_game_time(input_pgn):
    ### returns delt_time for each move.
    
    pattern = r'\[%clk\s+(\d+:\d+:\d+(?:\.\d+)?)\]'

    matches = re.findall(pattern, input_pgn)

    playera = matches[::2]
    # print(len(playera))

    playerb = matches[1::2]
    # print(len(playerb))
    
    playera_deltas = get_player_time(playera)
    # print(len(playera_deltas))
    playerb_deltas = get_player_time(playerb)
    # print(len(playerb_deltas))
    
    all_deltas = [val for pair in zip(playera_deltas, playerb_deltas) for val in pair]
    
    if len(playera_deltas) > len(playerb_deltas): # if play ends on playera
        all_deltas.append(playera_deltas[-1]) # append final delta of playera
    if len(playera_deltas) < len(playerb_deltas):
        all_deltas.append(playerb_deltas[-1]) # append final delta of playerb
    
    
    return all_deltas
    

In [76]:
input_odd = new_games['PGN'][373]
input_odd

' "1. d4 {[%clk 0:03:00]} 1... d5 {[%clk 0:03:00]} 2. Nf3 {[%clk 0:02:59.9]} 2... Bf5 {[%clk 0:02:59.6]} 3. Bf4 {[%clk 0:02:59.3]} 3... Nf6 {[%clk 0:02:59.3]} 4. e3 {[%clk 0:02:59.2]} 4... e6 {[%clk 0:02:58.9]} 5. Nbd2 {[%clk 0:02:58.1]} 5... Bd6 {[%clk 0:02:58.1]} 6. Ne5 {[%clk 0:02:51.2]} 6... Nbd7 {[%clk 0:02:55.4]} 7. h3 {[%clk 0:02:46.3]} 7... Ne4 {[%clk 0:02:53]} 8. Be2 {[%clk 0:02:45.3]} 8... f6 {[%clk 0:02:50.8]} 9. Nxd7 {[%clk 0:02:44]} 9... Qxd7 {[%clk 0:02:49.5]} 10. Nxe4 {[%clk 0:02:42.2]} 10... Bxf4 {[%clk 0:02:43.2]} 11. exf4 {[%clk 0:02:39.2]} 11... Bxe4 {[%clk 0:02:39]} 12. O-O {[%clk 0:02:29]} 12... Qc6 {[%clk 0:02:30.1]} 13. c3 {[%clk 0:02:27.3]} 13... Qd6 {[%clk 0:02:25.5]} 14. Qd2 {[%clk 0:02:24.2]} 14... c5 {[%clk 0:02:21.8]} 15. dxc5 {[%clk 0:02:22.6]} 15... Qxc5 {[%clk 0:02:20.7]} 16. b4 {[%clk 0:02:17]} 16... Qb6 {[%clk 0:02:19]} 17. Rac1 {[%clk 0:02:12]} 17... Rc8 {[%clk 0:02:17.2]} 18. a4 {[%clk 0:02:10.6]} 18... O-O {[%clk 0:02:14.4]} 19. Rfd1 {[%clk 0:02:03.

In [74]:
len(get_game_time(input_odd))

49
48
49
48


97

In [57]:
string = '"1. e4 {[%clk 0:03:00]} 1... g6 {[%clk 0:02:59.9]} 2. Nc3 {[%clk 0:02:59]} 2... Bg7 {[%clk 0:02:59.6]} 3. Bc4 {[%clk 0:02:58.3]} 3... e6 {[%clk 0:02:59.3]} 4. d3 {[%clk 0:02:57.8]} 4... Ne7 {[%clk 0:02:59.1]} 5. Be3 {[%clk 0:02:57.2]} 5... O-O {[%clk 0:02:58.8]} 6. Qd2 {[%clk 0:02:56.7]} 6... Nbc6 {[%clk 0:02:58.5]} 7. O-O-O {[%clk 0:02:56.3]} 7... d5 {[%clk 0:02:58.2]} 8. exd5 {[%clk 0:02:55]} 8... exd5 {[%clk 0:02:57.5]} 9. Bb5 {[%clk 0:02:51.1]} 9... d4 {[%clk 0:02:56.9]} 10. Bxc6 {[%clk 0:02:50.5]} 10... Nxc6 {[%clk 0:02:55.8]} 11. Bh6 {[%clk 0:02:47.6]} 11... dxc3 {[%clk 0:02:54.5]} 12. b3 {[%clk 0:02:46.6]} 12... cxd2+ {[%clk 0:02:53.7]} 0-1"'
get_game_time(string)

# len(get_game_time(string)) # 12 moves each.

12
12
12
12
12
12
12
12


24

In [86]:
flat_list = sum(times_to_move, [])

In [96]:
len(flat_list) 
# length of move times: 279238
# number of rows: 325455

flat_list

[0,
 100,
 1000,
 300,
 700,
 300,
 500,
 200,
 600,
 300,
 500,
 300,
 400,
 300,
 1300,
 700,
 3900,
 600,
 600,
 1100,
 2900,
 1300,
 1000,
 800,
 0,
 0,
 1100,
 100,
 1000,
 100,
 600,
 100,
 1400,
 500,
 2400,
 2100,
 1900,
 1600,
 100,
 2900,
 2000,
 1600,
 900,
 2100,
 1900,
 3500,
 1400,
 2100,
 4400,
 2500,
 1600,
 15700,
 7700,
 3600,
 3200,
 13400,
 3300,
 5000,
 6000,
 13100,
 2100,
 4100,
 2000,
 100,
 9200,
 1200,
 2200,
 2000,
 13700,
 11500,
 1700,
 9800,
 11200,
 2100,
 1300,
 18700,
 1900,
 4900,
 1100,
 6600,
 4500,
 6200,
 1800,
 300,
 9200,
 2000,
 7000,
 2200,
 600,
 2000,
 2700,
 4000,
 1900,
 13500,
 5600,
 1200,
 0,
 0,
 300,
 800,
 500,
 500,
 600,
 1600,
 1200,
 2000,
 1100,
 1600,
 600,
 1200,
 500,
 3200,
 2100,
 3400,
 5000,
 6000,
 3400,
 6400,
 2300,
 1600,
 1600,
 2900,
 2200,
 6000,
 2300,
 9000,
 6100,
 2900,
 1300,
 8300,
 1200,
 9200,
 4100,
 11500,
 1400,
 4100,
 3400,
 3000,
 2400,
 9400,
 10200,
 6700,
 800,
 5200,
 8200,
 4300,
 1700,
 1000,
 48

In [100]:
len(new_games)

new_games_cond = new_games[:279238]

new_games_cond['Move_Times'] = flat_list

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_games_cond['Move_Times'] = flat_list


In [104]:
# each row is a move. times (millisecs) (delta) added to last col.
new_games_cond.to_csv('new_games_cond.csv', index=False)