In [1]:
#help from https://codereview.stackexchange.com/questions/183668/parse-complex-text-files-using-python

import re
import pandas as pd
from collections import defaultdict

In [20]:
class _RegExLib:
    """Set up regular expressions"""
    # use https://regexper.com to visualise these if required
    _reg_player = re.compile('^(?!.*http)^.*:') #match players but not links
    _reg_diceroll = re.compile('rolling .*d.*')

    def __init__(self, line):
        # check whether line has a positive match with all of the regular expressions
        self.player = self._reg_player.match(line)
        self.diceroll = self._reg_diceroll.match(line)

In [21]:
def parse(filepath):
    """
    Parse text at given filepath

    Parameters
    ----------
    filepath : str
        Filepath for file to be parsed

    Returns
    -------
    data : pd.DataFrame
        Parsed data

    """

    roll_data = defaultdict(list)
    with open(filepath, 'r') as file:
        line = file.readline()
        state = ""
        while line:
            reg_match = _RegExLib(line)

            if reg_match.player:
                player = reg_match.player
                #print(player)

            if reg_match.diceroll:
                #dice rolls often come up like (/n##/n)+##, where ## is a number
                #diceroll = reg_match.diceroll.group(1)
                #set state to show we are processing a die roll
                state = "ROLL"
                
            if state=="ROLL":
                l = line.strip()
                if l.isdigit():
                    #a player roll. record.
                    roll_data[player.group(0)].append(l)
                
                if ")" in l:
                    #we're at the end of the roll. end state.
                    state = ""
                    
            line = file.readline()

    return roll_data

In [22]:
filepath = 'IXChats.txt'
data = parse(filepath)
print(data)

defaultdict(<class 'list'>, {'JonnyBadger H. (GM):': ['13', '2', '1', '15', '19', '6'], 'face t.:': ['5'], 'Jason M.:': ['17', '10'], 'Mitch:': ['2', '13', '11', '18', '8', '15', '7', '6', '8', '6', '2', '10', '16', '82', '14', '19', '7', '1', '20', '18', '33', '3', '12', '10', '3', '5', '1', '16', '14', '2', '2', '17', '19', '3', '20', '2', '16', '2', '16', '3', '1', '6', '5', '5', '2', '12', '6', '5', '3', '3', '1', '1', '4', '4'], 'JonnyBadger (GM):': ['17', '4', '2', '1', '7', '19', '4', '15', '20', '1', '12', '7', '15', '14', '1', '3', '12', '8', '7', '1', '2', '3', '2', '1', '3', '5', '7', '6', '5', '3', '1', '4', '8', '9', '12', '8', '4', '6', '4', '9', '2', '3', '3', '4', '2', '3', '3', '1', '2'], 'Merkatroid Skittle:': ['4', '1', '9', '18', '12', '1', '4', '20', '17', '16', '13', '19', '3', '3', '6', '8', '19', '8', '9', '1', '9', '5', '1', '10', '1', '16', '10', '12', '17', '13', '16', '19', '20', '12', '16', '7', '5', '18', '2', '17', '8', '11', '2', '3', '16', '12', '19', '