In [1]:
import pandas as pd
import re
from datetime import datetime
from docx import Document



In [2]:
def parse_hand(hand_text):
    # extract the hand number
    hand_number_search = re.search(r'Hand #(\d+)', hand_text)
    hand_number = int(hand_number_search.group(1)) if hand_number_search else None

    # extract the datetime
    datetime_str_search = re.search(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', hand_text)
    datetime_obj = datetime.strptime(datetime_str_search.group(0), '%Y/%m/%d %H:%M:%S') if datetime_str_search else None

    # extract the big blind from the first line
    first_line = hand_text.split('\n')[0]
    big_blind_search = re.search(r'/\$(\d+\.\d{2})', first_line)
    big_blind = float(big_blind_search.group(1)) if big_blind_search else None

    # split the text at "SUMMARY" and focus on the part after it for specific searches
    summary_text = hand_text.split('*** SUMMARY ***')[1] if '*** SUMMARY ***' in hand_text else ""

    # Count number of players in the summary section
    player_count = len(re.findall(r'Seat \d+:', summary_text))

    # extract the pot size from the summary section
    pot_size_search = re.search(r'Total pot \$([\d\.]+)', summary_text)
    pot_size = float(pot_size_search.group(1)) if pot_size_search else None

    # find the winner from the summary section
    winner_search = re.search(r'Seat \d+: ([^ ]+) .+ won', summary_text)
    winner = winner_search.group(1) if winner_search else "Unknown"

   # extract board information from the summary section
    board_search = re.search(r'Board \[([^\]]+)\]', summary_text)
    board = board_search.group(1) if board_search else "No flop"

    return pd.DataFrame([{
        'Hand number': hand_number,
        'Datetime': datetime_obj,
        'Game type': 'Omaha Reshuffle (Pot Limit)',  
        'Big blind': big_blind,
        'Number of players': player_count,
        'Pot size': pot_size,
        'Winner': winner,
        'Board': board
    }])

In [3]:
# intialize the df
df = pd.DataFrame(columns=['Hand number', 'Datetime', 'Game type', 'Big blind', 'Number of players', 'Pot size', 'Winner', 'Board'])



In [4]:
# storing file path as a variable 
doc_path = r'C:\Users\Gamer PC\Desktop\Project-Three\Fabian_hands.docx'
doc = Document(doc_path)

In [5]:
# loop through paragraphs, processing each hand
hand_text = []
for para in doc.paragraphs:
    if para.text.startswith('Hand #'):
        if hand_text:
            # Process the collected text of one hand
            hand_info = parse_hand('\n'.join(hand_text))
            df = pd.concat([df, hand_info], ignore_index=True)
            hand_text = []
    hand_text.append(para.text.strip())
# process the last hand if there's any text left
if hand_text:
    hand_info = parse_hand('\n'.join(hand_text))
    df = pd.concat([df, hand_info], ignore_index=True)

# display the first few rows of the DataFrame to verify
df.head()


Unnamed: 0,Hand number,Datetime,Game type,Big blind,Number of players,Pot size,Winner,Board
0,2084361933,2024-04-10 05:57:03,Omaha Reshuffle (Pot Limit),6.0,4,152.72,Src89,Th 6c 3d 6d Ts
1,2084361940,2024-04-10 05:57:04,Omaha Reshuffle (Pot Limit),6.0,5,42.75,FartingFornicator,Js 9s 5c 9c
2,2084362019,2024-04-10 05:57:15,Omaha Reshuffle (Pot Limit),10.0,6,184.5,RungoodWIZ,3s 5h 8h Ac 5s
3,2084362159,2024-04-10 05:57:37,Omaha Reshuffle (Pot Limit),10.0,6,157.0,Keypicks,Ac 7c 5d 5h 8c
4,2084362197,2024-04-10 05:57:43,Omaha Reshuffle (Pot Limit),4.0,5,10.0,dvz7,No flop


In [6]:
# Export to CSV
df.to_csv('C:/Users/Gamer PC/Desktop/Project-Three/poker_hands_exported.csv', index=False)