In [1]:
# Requirements
# Python 3
# Python libraries: pandas

# Can be downloaded from python.org
# pandas can be downloaded by running 'pip install pandas' from the terminal 
# (may need to use pip3 instead of pip)


# How To Use

# Download zip and unzip it. 
# Run setup script: hit shift and right-click in the unzipped folder and click on 'Open PowerShell window here'.
# Type `python fgstSetup.py` in the terminal and wait for it to complete. 
# Run data collection script in PowerShell after each session by running `python fallGuysData.py` 
# (May need to use python3 instead of python)

# Note: 
# If you forget to run for a session, run 'python fallGuysData.py Player-prev.log' 
# before running python fallGuysData.py for your new session
# word that better

In [2]:
# Things to do...

# get script to run when Player.log is updated...or just run after each session
# fix variable naming convention to use _s
# new repo for it

In [3]:
def getTZ():
    # https://stackoverflow.com/a/10854983
    offset = time.timezone if (time.localtime().tm_isdst == 0) else time.altzone
    tz = offset / 60 / 60 
    return int(tz) # set time zone each time

In [4]:
import os, datetime, time, json
import pandas as pd

# gets time zone
HOURS_DIFFERENTIAL = getTZ()

with open('totalshows.txt') as f:
    total_shows = f.read()
total_shows = int(total_shows.strip())

with open(os.path.join('data', 'session.txt')) as f:
    session_num = f.read()
session_num = int(session_num)

In [5]:
# Notes

# find player id for round - don't think we can do anything about that
# use that as well instead of username
# only affects userEndRoundLines in case of two with same name

# check elim time for jump_showdown - seems as though personal end times are only for...
# ...finishing races early

# playing around Daylight Savings switch may result in bad times for that round

# figure out if there's a max number of shows that will be stored in the .log : seems the whole session

In [6]:
def cleanLines(lines):
    return [line.replace('[', '').replace(']', '').replace('>', '').strip() for line in lines]

# splits show into show data and rounds
def roundSplit(lines):
    lines = cleanLines(lines)
    splits = []
    # start of each round in highlights section
    for i, line in enumerate(lines):
        if 'Round' in line:
            splits.append(i)
    
    splits.append(0)
    # split it into rounds
    rounds = [lines[splits[i-1]:splits[i]] if i != len(splits)-1 else lines[splits[i-1]:]
              for i in range(1, len(splits))]
    
    return lines[1:splits[0]], rounds

# takes registered and connected lines
# returns start times for rounds
# (time at which game is found)
def getStartTimes(reg, conne):
    startTimes = []
    
    for i in range(len(conne)):
        try:
            registeredTime = datetime.datetime.strptime(reg[i].split('at: ')[-1], '%m/%d/%Y %I:%M:%S %p')
        except:
            registeredTime = datetime.datetime.strptime(reg[i].split('check - ')[-1], '%m/%d/%Y %I:%M:%S %p')
        connectedTime = datetime.datetime.strptime(conne[i].split(': [')[0], '%H:%M:%S.%f')
        
        d = (connectedTime - registeredTime)
        startTimes.append(registeredTime + (d - datetime.timedelta(days=d.days+1, hours=HOURS_DIFFERENTIAL)))
    
    return startTimes

def getTimeTaken(start, end):
    d = datetime.datetime.strptime(end, '%H:%M:%S.%f') - start
    # start time was already adjusted for HOURS_DIFFERENTIAL, so need to again
    return str(d - datetime.timedelta(days=d.days, hours=HOURS_DIFFERENTIAL))[2:] # so hours isn't included

# gets season number
# need to update with each season
def getSeason(start_time):
    season_starts = {1: datetime.datetime.strptime("08/04/2020 11:00:00 AM", '%m/%d/%Y %I:%M:%S %p'), 
                    2: datetime.datetime.strptime("10/08/2020 11:00:00 AM", '%m/%d/%Y %I:%M:%S %p'), 
                    3: datetime.datetime.strptime("12/15/2020 11:00:00 AM", '%m/%d/%Y %I:%M:%S %p') }
    
    curr_day = start_time - datetime.timedelta(hours=HOURS_DIFFERENTIAL) # offset timezone
    
    for ky in sorted(list(season_starts.keys()), reverse=True):
        if curr_day > season_starts[ky]:
            return ky
        
    return 'undetermined'
    

# gets lines for CompletedEpisode section of a show
def getShowLines(lines, marker):
    finalLines = []
    tempLines = lines[marker:]
    for line in tempLines:
        if line == '': 
            continue
        if '>' == line[0] or '[Round' in line or '[Complet' in line:
            finalLines.append(line)
            continue
        if ':' in line:
            break
            
    return finalLines

def getExtraRoundInfoLines(possLines): # rename?
    rnds = []
    currRnd = possLines[0][1].split()[0]
    currSID = possLines[0][0]
    prevLine = possLines[0][2]
    
    # get last line before map switches (and server ID)
    for i, (serverID, line, line_num) in enumerate(possLines):
        rnd = line.split()[0]
        
        if rnd != currRnd: 
            currRnd = rnd
            currSID = serverID
            rnds.append(possLines[i-1][1])
            if (prevLine + 1) == line_num: # sometimes server changes map due to a dropout
                rnds.pop()      
        elif serverID != currSID:
            currRnd = rnd
            currSID = serverID
            rnds.append(possLines[i-1][1])
            if (prevLine + 1) == line_num:
                rnds.pop()
        prevLine = line_num
    
    rnds.append(possLines[-1][1])      
    return rnds


# preprocessGrade1 has been retired
# remove lines in-between completed games
def preprocessGrade2(lines):
    print(len(lines))
    lines2 = []
    allGood = False
    badgeID = False
    for line in lines:
        if '[CATAPULT] Login Succeeded' in line:
            allGood = True
        if 'BadgeId:' in line:
            badgeID = True
        if badgeID:
            if '[ClientGlobalGameState] ShutdownNetworkManager' in line or '[ClientGlobalGameState] sending graceful disconnect message' in line:
                badgeID = False
                allGood = False
                lines2.append(line)

        if allGood:
            lines2.append(line)
    lines = lines2
    print(len(lines))
    return lines

# changes to the data extraction removed the need for this
def preprocessGrade3(lines):
    #print(len(lines))
    start_conn = -1
    to_remove = []
    in_conn = False
    for i, line in enumerate(lines):
        if "[StateConnectToGame] We're connected to the server!" in line:
            start_conn = i
            in_conn = True
        if 'reports that it is not yet ready to accept connections.' in line:
            if in_conn:
                to_remove.append([start_conn, i])
                in_conn = False
    
    temp_lines = []
    for i, line in enumerate(lines):
        for check in to_remove:
            if i >= check[0] and i <= check[1]:
                continue
            temp_lines.append(line)
            
    #print(len(temp_lines))
    return temp_lines
    
# remove spectated rounds
def preprocessGrade4(lines):
    #print(len(lines))
    start_round = -1
    to_remove = []
    in_spec = False
    for i, line in enumerate(lines):
        if "Received instruction that server is ending a round, and to rejoin" in line:
            if in_spec: # finals hit this text before shutdown 
                to_remove.append([start_round, i])
                in_spec = False
            start_round = i
        if 'permission=Spectator' in line:
            in_spec = True
        if '[ClientGameManager] Shutdown' in line:
            if in_spec:
                to_remove.append([start_round, i])
                in_spec = False
            
    # remove selected lines
    temp_lines = []
    for i, line in enumerate(lines):
        to_append = True
        for check in to_remove:
            if i >= check[0] and i <= check[1]:
                to_append = False
        if to_append or 'Received disconnect reason from Catapult:' in line:
            temp_lines.append(line)
    
    #print(len(temp_lines))
    return temp_lines

# gets rid of shows in which the user got disconnected ()
def preprocessGrade5(lines):
    #print(len(lines))
    
    start_conn = -1
    to_remove = []
    in_conn = False # really just after first show starts
    end_ep = False
    
    for i, line in enumerate(lines):
        if '[CATAPULT] Login Succeeded' in line:
            if in_conn and not end_ep:
                to_remove.append([start_conn, i-1])
            start_conn = i
            in_conn = True
            end_ep = False
            
        if '[CompletedEpisodeDto]' in line:
            end_ep = True
                
    # remove selected lines
    temp_lines = []
    for i, line in enumerate(lines):
        to_append = True
        for check in to_remove:
            if i >= check[0] and i <= check[1]:
                to_append = False
        if to_append or 'Received disconnect reason from Catapult:' in line:
            temp_lines.append(line)
         
    #print(len(temp_lines))
    return temp_lines

# helper function (to account for if round wraps around midnight)
def subtractHours(a, b):
    c = (datetime.datetime.strptime(b, '%H:%M:%S.%f') - datetime.datetime.strptime(a, '%H:%M:%S.%f'))
    return str(c - datetime.timedelta(days=c.days))

# save data in csvs
def saveData(show, roundsList):
    new_shows_df = pd.DataFrame(pd.Series(show)).T
    new_rounds_df = pd.DataFrame(roundsList)
    try:
        #load them
        shows_df = pd.read_csv(os.path.join('data', 'shows.csv'))
        rounds_df = pd.read_csv(os.path.join('data', 'rounds.csv'))
        
        if str(show['Start Time'])[:21] in [tm[:21] for tm in shows_df['Start Time'].tolist()]:
            return False
        
        # append
        shows_df = shows_df.append(new_shows_df, ignore_index=True)
        rounds_df = rounds_df.append(new_rounds_df, ignore_index=True)
    except: # first time
        shows_df = new_shows_df
        rounds_df = new_rounds_df
        print('first time...creating csvs')
    
    # write them to their respective files
    shows_df.to_csv(os.path.join('data', 'shows.csv'), index=False)
    rounds_df.to_csv(os.path.join('data', 'rounds.csv'), index=False)
    return True

# gets path for each session, in order
def getSessions():
    # if running sessionX.txt, don't rerun preprocessing... (bool if session in argument path)
    def splitMe(x):
        return int(x.split('.txt')[0].split('session')[1])

    paths = []
    ss = sorted(os.listdir(os.path.join('personal', 'data', 'archive')), key=splitMe)
    for s in ss:
        paths.append(os.path.join('data', 'archive', s))
    
    return paths

# gets round times
def getRoundTimes(startRoundLines, userEndRoundLines, actualEndRoundLines):
    roundTimes = []
    for a, b in zip(startRoundLines, userEndRoundLines):
        roundTimes.append(subtractHours(a, b))

    # get total round time
    actualRoundTimes = []
    for a, b in zip(startRoundLines, actualEndRoundLines):
        try:
            actualRoundTimes.append(subtractHours(a, b))
        except ValueError:
            actualRoundTimes.append('uncertain')
    
    return roundTimes, actualRoundTimes


In [7]:
with open("log_path.txt") as f:
    log_path = f.read()

with open(log_path) as f:
    lines = f.read()

#with open(os.path.join('C:\\Users','Joseph','Downloads','Player (1).log')) as f:
 #   lines = f.read()
    
#with open('personal\\data\\archive\\session35.txt') as f:
 #   lines = f.read()

lines = lines.split('\n')
#lines = preprocessGrade2(lines)
print(len(lines))
lines = preprocessGrade4(lines)
print(len(lines))
lines = preprocessGrade5(lines)
print(len(lines))

8824
8824
8824


In [8]:
# get first line of each new show and usernames used for show
prevUser = '!!!!!!!!!!!!!!!'
lookUser = True
inRound = False
inARound = False
num_players_lock = False
finished = False
undo_time = False
received = False
to_skip = False
gameMode = 'main_show'
partySize = 'na'

episodeMarkers = []
usernames = []
partySizes = []

# times
reg = []
conne = []
startRoundLines = []
userEndRoundLines = []
actualEndRoundLines = []

possLines = []
gameModes = []
# to find the actual number of players that qualified (for racing rounds)
prevNumLine = "green"
numLines = []

# **********************************************************
# go through lines, looking for certain things**************
# **********************************************************
for i, line in enumerate(lines):
    # for username
    if '[CATAPULT] Attempting login' in line:
        finished = False
        received = False
    if 'Received disconnect reason from Catapult:' in line:
        to_skip = True
    if lookUser and 'Sending login request' in line:
        usernames.append(line.split(' player ')[-1].split(' networkID')[0].replace(',', ''))
        prevUser = usernames[-1]
        lookUser = False   
    # for type of show (main or alternate) (also called playlist)
    elif 'Chosen Show:' in line: # appears before entering matchmaking solo/group (not as of start of s3)
        gameMode = line.split(':')[-1]
        
    # signifies start of looking for new episode
    # get size of party
    elif 'Party Size' in line or 'Begin matchmaking solo' in line:
        if 'Begin matchmaking solo' in line:
            partySize = 1
        else:
            partySize = int(line.split(' ')[-1].strip())
    # for show start time
    elif '[QosManager] Registered' in line or 'QosManager: Registered' in line or '[QosManager] Updated next periodic check' in line: # for registered time (date)
        to_add_reg = line
    elif "[StateConnectToGame] We're connected to the server!" in line: # for connection time
        to_add_conne = line
    # for playlist
    elif 'Selected show is' in line: # appears before every round as of s3
        gameMode = line.split('Selected show is')[-1]
    # for server ID and map lines
    elif 'Received NetworkGameOptions from ' in line: 
        tmp = line.split('roundID=')[-1]
        serverID = line.split(' ')[4]
        if 'Default' not in tmp:
            possLines.append([serverID, tmp, i])
            
        received = True
    # for start round times and players that qualified from previous round
    elif 'state from Countdown to Playing' in line:
        startRoundLines.append(line.split(': [')[0])
        inRound = True
        inARound = True
        num_players_lock = False
        # append last # players achieving obj when hit new round
        if prevNumLine != "green":
            numLines.append(prevNumLine.split('=')[-1])
            prevNumLine = ""
    elif 'Changing state from GameOver to Results' in line: # occassionally a random NumPlayers... line right before new round
        num_players_lock = True
    elif '[ClientGameSession] NumPlayersAchievingObjective=' in line: # for total number of players that quality
        if not num_players_lock:
            prevNumLine = line
    # for end round / player active in round times
    elif '[ClientGameManager] Handling unspawn for player FallGuy' in line and prevUser in line:
        if inRound:
            userEndRoundLines.append(line.split(': [')[0])
            inRound = False
    elif 'Changing local player state to: SpectatingEliminated' in line: # no longer appears as of ` Nov 21, 2020
        if inRound:
            userEndRoundLines.append(line.split(': C')[0])
            inRound = False
    elif 'Changing state from Playing to GameOver' in line: # 'Changing state from GameOver to Results'
        if inARound:
            inARound = False
            actualEndRoundLines.append(line.split(': [')[0])
    # overall show data
    elif '[CompletedEpisodeDto]' in line: # marker for a good show; only append show stats here
        if received == False: 
            continue
        if to_skip:
            to_skip = False
            continue
        if finished: # last one was for a disconnected show then
            # save disconnected game
            final_lines = getShowLines(lines, episodeMarkers[-1])
            showData, rounds = roundSplit(final_lines)
            disc_json = {'session': session_num, 'show_data': showData, 'rounds': rounds}
            with open(os.path.join('data', 'disconnected.json')) as json_file: 
                data = json.load(json_file)
            data.append(disc_json)
            with open(os.path.join('data', 'disconnected.json'), 'w') as f: 
                json.dump(data, f) 
            
            partySizes[-1] = partySize
            gameModes[-1] = gameMode
            episodeMarkers[-1] = i
            reg[-1] = to_add_reg
            conne[-1] = to_add_conne
            if undo_time:
                actualEndRoundLines = actualEndRoundLines[::-1]
                actualEndRoundLines.remove('left')
                actualEndRoundLines = actualEndRoundLines[::-1]
                undo_time = False
            if inARound:
                actualEndRoundLines.append('left')
                inARound = False
                undo_time = True
            lookUser = True
            finished = True
            continue
        
        partySizes.append(partySize)
        gameModes.append(gameMode)
        episodeMarkers.append(i)
        reg.append(to_add_reg)
        conne.append(to_add_conne)
        lookUser = True
        partySize = 'na'
        finished = True
        if inARound:
            actualEndRoundLines.append('left')
            inARound = False
            undo_time = True

# append last # achieving obj
numLines.append(prevNumLine.split('=')[-1])

# if no episodes found, end
if len(episodeMarkers) == 0:
    print('no episodes found') # change
        
# **********************************************************        
# get time user spent in each round ************************        
# (time round starts until they either finish or are eliminated) (just qualify I think)
# **********************************************************
# gets round times: user's time in round and total round time
roundTimes, actualRoundTimes = getRoundTimes(startRoundLines, userEndRoundLines, actualEndRoundLines)

# gets start times for each show
startTimes = getStartTimes(reg, conne)


# **********************************************************
# for each show/episode ************************************
# **********************************************************
roundIdx = 0
showsSaved = 0
showsSkipped = 0
saved_a_show = False

rnds = getExtraRoundInfoLines(possLines)
    
for showIdx, (j, user) in enumerate(zip(episodeMarkers, usernames)):
    this_show = total_shows
    total_shows += 1
    
    # get lines for this show
    final_lines = getShowLines(lines, j)
    
    # split data
    showData, rounds = roundSplit(final_lines)
    
    # set show data
    show_dict = {}
    show_dict['Show ID'] = this_show # id
    show_dict['Start Time'] = startTimes[showIdx]
    show_dict['Season'] = getSeason(startTimes[showIdx]) 
    show_dict['Time Taken'] = getTimeTaken(startTimes[showIdx], final_lines[0].split(': ==')[0]) # approximate time taken
    show_dict['Game Mode'] = gameModes[showIdx]
    show_dict['Final'] = False
    show_dict['Rounds'] = len(rounds) # num rounds
    show_dict['Username'] = user
    show_dict['Party Size'] = partySizes[showIdx]
    show_dict['addID'] = final_lines[0].split(': ==')[0] # end time
    
    # add other show data
    for line in showData:
        show_dict[line.split(':')[0]] = line.split(':')[1].strip()
    
    # ********************************************
    # get data for each round in show ************
    # ********************************************
    rounds_list = []
    # for each round in the show/episode
    for round_ in rounds: # for list in 2D list
        round_dict = {'Show ID': this_show, 
                      'Round Num': round_[0].split(' ')[1].strip(), 
                      'Map': round_[0].split('|')[1].strip()}
        round_dict['Time Spent'] = roundTimes[roundIdx]
        round_dict['Round Length'] = actualRoundTimes[roundIdx]
        
        # add rest of data from list
        for line in round_[1:]:
            round_dict[line.split(':')[0]] = line.split(':')[1].strip()
        
        # add extra information
        rnd = rnds[roundIdx]
        splts = rnd.split()
        # round_dict['Participants'] = splts[5].split('=')[-1] # num people
        # round_dict['Qualification Percent'] = splts[8].split('=')[-1].replace(',', '') # qual %
        for x in splts:
            if 'currentParticipantCount' in x:
                round_dict['Participants'] = x.split('=')[-1]
            if 'qualificationPercentage' in x:
                round_dict['Qualification Percent'] = x.split('=')[-1].replace(',', '')
            if 'isFinalRound' in x:
                if x.split('=')[-1].replace(',', '') == 'True':
                    show_dict['Final'] = True
        round_dict['Actual Num Qual'] = numLines[roundIdx]
        
        roundIdx += 1
        rounds_list.append(round_dict)
    
    # ********************************************
    # save ***************************************
    # ******************************************** 
    # save show_dict to one table
    # save each dict in rounds_list to another table
    if not saveData(show_dict, rounds_list):
        showsSkipped += 1
        total_shows -= 1
    else:
        showsSaved += 1
        saved_a_show = True


print('csvs saved successfully with {} new shows while skipping {} shows that were already saved'.format(showsSaved, showsSkipped))

with open('totalshows.txt', 'w') as f:
    f.write(str(total_shows))

# save processed lines
if saved_a_show:
    with open(os.path.join('data', 'archive', 'session{}.txt'.format(session_num)), 'w') as f:
        f.write("\n".join(lines))
    session_num += 1

    with open(os.path.join('data', 'session.txt'), 'w') as f:
        f.write(str(session_num))
else:
    print('no new shows, not a new session')

first time...creating csvs
csvs saved successfully with 4 new shows while skipping 0 shows that were already saved


In [9]:
shows_df = pd.read_csv(os.path.join('data', 'shows.csv'))
shows_df

Unnamed: 0,Show ID,Start Time,Season,Time Taken,Game Mode,Final,Rounds,Username,Party Size,addID,Kudos,Fame,Crowns
0,1013,2021-03-12 21:40:41.047,3,05:48.623000,event_only_floor_fall_1203_to_1403_2021,True,3,Infallible Laughing Penguin,1,03:46:29.670,180,0,0
1,1014,2021-03-12 21:47:14.869000,3,06:10.807000,event_only_floor_fall_1203_to_1403_2021,True,3,Infallible Laughing Penguin,1,03:53:25.676,180,0,0
2,1015,2021-03-12 21:54:18.776000,3,03:07.787000,event_only_floor_fall_1203_to_1403_2021,False,2,Infallible Laughing Penguin,1,03:57:26.563,85,0,0
3,1016,2021-03-12 21:58:15.464000,3,06:18.807000,event_only_floor_fall_1203_to_1403_2021,True,3,Infallible Laughing Penguin,1,04:04:34.271,915,0,1


In [10]:
rounds_df = pd.read_csv(os.path.join('data', 'rounds.csv'))
rounds_df

Unnamed: 0,Show ID,Round Num,Map,Time Spent,Round Length,Qualified,Position,Kudos,Fame,Bonus Tier,Bonus Kudos,Bonus Fame,BadgeId,Participants,Qualification Percent,Actual Num Qual
0,1013,0,round_floor_fall_event_only_01,0:00:41.165000,0:00:41.174000,True,14,30,0,0.0,35,0,gold,20,75,15
1,1013,1,round_floor_fall_event_only_02,0:01:11.691000,0:01:11.697000,True,4,20,0,0.0,35,0,gold,15,66,9
2,1013,2,round_floor_fall_event_only_final,0:02:01.269000,uncertain,False,6,60,0,,0,0,,9,0,0
3,1014,0,round_floor_fall_event_only_01,0:00:32.671000,0:00:32.675000,True,5,30,0,0.0,35,0,gold,20,75,15
4,1014,1,round_floor_fall_event_only_02,0:01:06.305000,0:01:06.307000,True,8,20,0,0.0,35,0,gold,15,66,9
5,1014,2,round_floor_fall_event_only_final,0:01:51.086000,uncertain,False,5,60,0,,0,0,,9,0,0
6,1015,0,round_floor_fall_event_only_01,0:00:45.173000,0:00:45.176000,True,12,30,0,0.0,35,0,gold,18,75,13
7,1015,1,round_floor_fall_event_only_02,0:01:05.436000,uncertain,False,10,20,0,,0,0,,13,66,0
8,1016,0,round_floor_fall_event_only_01,0:00:45.169000,0:00:45.176000,True,1,30,0,0.0,35,0,gold,20,75,15
9,1016,1,round_floor_fall_event_only_02,0:01:09.235000,0:01:09.239000,True,3,20,0,0.0,35,0,gold,15,66,9
