# Let's process the GEM-STEP logs and reconstruct the game state for Photosynthesis

In [2]:
# Imports
import pandas as pd
import enum
import pathlib
import os
from tqdm import tqdm
from dataclasses import dataclass, field
from typing import List
import json

# Constants 
CWD = pathlib.Path(os.path.abspath(""))
GIT_ROOT = CWD.parent
DATA_DIR = GIT_ROOT / "data" / 'PhotosynthesisFall2022'

In [6]:
# Perform log pre-processing (Step #1) (combining and converting .txt to .csv)
def log_preprocessing(dir):

    output_file = dir / 'game_logs.csv'
    if output_file.exists():
        return

    # Combine all .txt files into one .csv file
    txt_files = [file for file in dir.iterdir() if file.suffix == '.txt']
    dfs = []
    for file in txt_files:
        
        # Won't work - error loading file
        # df = pd.read_csv(file, sep='\t', header=None)
        # dfs.append(df)

        with open(file, 'r') as f:
            lines = f.readlines()

            pass_config_line = False
            columns = {'datetime': [], 'event_type': [], 'event_data': []}
            selected_lines = lines # used to select only a subset of lines (debugging)
            for line in tqdm(selected_lines, total=len(selected_lines)):
                elements = line.split()
                
                # Don't keep any line before the presence of '---'
                if not pass_config_line and '---' not in elements:
                    pass_config_line = True
                    continue

                # Lines to completely remove if element found
                rule_found = False
                for rule in ['joined', 'CLIENT_LOG', 'services', '---']:
                    if rule in elements:
                        rule_found = True
                        break
                if rule_found:
                    continue

                # If NET:DISPLAY_LIST, process the JSON format
                if 'NET:DISPLAY_LIST' in elements:
                    json_element = json.loads(elements[-1])

                    # Filter to only get the actors that are students
                    results = []
                    for actor in json_element:
                        if 'pz' in actor['id']:
                            compressed = {k:v for k,v in actor.items() if k in ['id', 'skin', 'x', 'y']}
                            results.append(compressed)

                    # If empty, skip the line
                    if not results:
                        continue
                    
                    # Update the element
                    elements[-1] = json.dumps(results)

                # Elements to remove (don't provide information)
                for rule in ['bpid', 'Molecule', 'UADDR', 'agentId', 'targetId', 'b2b', 'binb', 'null', 'c2c', 'null', 'c2b']:
                    to_be_removed = []
                    for element in elements:
                        if rule in element:
                            to_be_removed.append(element)
                            break
                    
                    for element in to_be_removed:
                        elements.remove(element)

                # Elements to remove
                to_be_removed = []
                for element in elements:
                    if element in ['id', 'x', 'y', 'pz', 'null']:
                        to_be_removed.append(element)

                for element in to_be_removed:
                    elements.remove(element)

                # Identify the events and save it accordingly
                if elements[1] == 'NET:DISPLAY_LIST':
                    columns['datetime'].append(elements[0])
                    columns['event_type'].append('game_update')
                    columns['event_data'].append(elements[2])
                elif elements[1] == 'Touched':
                    columns['datetime'].append(elements[0])
                    columns['event_type'].append('touch')
                    event_data = {'src': elements[2], 'dst': elements[3]}
                    columns['event_data'].append(json.dumps(event_data))
                elif 'pz' in elements[1]:
                    columns['datetime'].append(elements[0])
                    columns['event_type'].append('position')
                    event_data = {'id': elements[1], 'x': elements[2], 'y': elements[3]}
                    columns['event_data'].append(json.dumps(event_data))

                # for i, element in enumerate(elements):
                #     if i not in columns:
                #         columns[i] = []
                #     columns[i].append(element)

                # Make sure to add empty elements to columns that don't have them
                # for i in range(len(elements), len(columns)):
                #     columns[i].append('')

            # Convert
            df = pd.DataFrame(columns)
            if len(df) > 0:
                dfs.append(df)
    
    # Concatenate all dataframes into one
    df = pd.concat(dfs)

    # Sort by timestamp
    df['datetime'] = pd.to_datetime(df['datetime'], format='%H:%M:%S:%f')
    df = df.sort_values(by='datetime')

    # Save the dataframe
    df.to_csv(output_file, index=False)

# Perform the routine
for dir in (DATA_DIR / 'logs').iterdir():
    log_preprocessing(dir)   

100%|██████████| 485994/485994 [00:02<00:00, 180691.64it/s]
100%|██████████| 1223479/1223479 [00:12<00:00, 101948.11it/s]
100%|██████████| 1413/1413 [00:00<00:00, 90953.83it/s]
100%|██████████| 3134/3134 [00:00<00:00, 27167.35it/s]
100%|██████████| 617431/617431 [00:03<00:00, 198589.63it/s]


In [None]:
# Game state reconstruction routines
@dataclass
class Participant:
    state: enum.Enum
    position: tuple

@dataclass
class EnvironmentState:
    sun_state: bool = False

@dataclass
class GameState:
    participants: List[Participant] = field(default_factory=list)
    environment: EnvironmentState = field(default_factory=EnvironmentState)

def game_state_reconstruction(csv_file: pathlib.Path):
    df = pd.read_csv(csv_file)

# Perform game state reconstruction