# Let's process the GEM-STEP logs and reconstruct the game state for Photosynthesis

In [7]:
# Imports
import pathlib
import os
from dataclasses import dataclass, field
from typing import Dict, Literal, Optional, Tuple
import json

from dataclasses_json import DataClassJsonMixin
import pandas as pd
from tqdm import tqdm

# Constants 
CWD = pathlib.Path(os.path.abspath(""))
GIT_ROOT = CWD.parent
DATA_DIR = GIT_ROOT / "data" / 'SSMVSpring23'

In [9]:
# Perform log pre-processing (Step #1) (combining and converting .txt to .csv)
def log_preprocessing(dir):

    output_file = dir / 'game_logs.csv'
    # if output_file.exists():
    #     return

    # Combine all .txt files into one .csv file
    txt_files = [file for file in dir.iterdir() if file.suffix == '.txt']
    dfs = []
    for file in txt_files:
        
        # Won't work - error loading file
        # df = pd.read_csv(file, sep='\t', header=None)
        # dfs.append(df)

        with open(file, 'r') as f:
            lines = f.readlines()

            pass_config_line = False
            columns = {'datetime': [], 'event_type': [], 'event_data': []}
            selected_lines = lines # used to select only a subset of lines (debugging)
            for line in tqdm(selected_lines, total=len(selected_lines)):
                elements = line.split()
                
                # Don't keep any line before the presence of '---'
                if not pass_config_line and '---' not in elements:
                    pass_config_line = True
                    continue

                # If NET:DISPLAY_LIST, process the JSON format
                if 'NET:DISPLAY_LIST' in elements:
                    json_element = json.loads(elements[-1])

                    # Filter to only get the actors that are students
                    results = []
                    for actor in json_element:
                        if 'pz' in actor['id']:
                            compressed = {k:v for k,v in actor.items() if k in ['id', 'skin', 'x', 'y']}
                            results.append(compressed)

                    # If empty, skip the line
                    if not results:
                        continue
                    
                    # Update the element
                    elements[-1] = json.dumps(results)

                # Elements to remove (don't provide information)
                for rule in ['bpid', 'Molecule', 'UADDR', 'agentId', 'targetId', 'b2b', 'binb', 'null', 'c2c', 'null', 'c2b']:
                    to_be_removed = []
                    for element in elements:
                        if rule in element:
                            to_be_removed.append(element)
                            break
                    
                    for element in to_be_removed:
                        elements.remove(element)

                # Elements to remove
                to_be_removed = []
                for element in elements:
                    if element in ['id', 'x', 'y', 'pz', 'null']:
                        to_be_removed.append(element)

                for element in to_be_removed:
                    elements.remove(element)

                # Identify the events and save it accordingly
                if elements[1] == 'NET:DISPLAY_LIST':
                    columns['datetime'].append(elements[0])
                    columns['event_type'].append('game_update')
                    columns['event_data'].append(elements[2])
                elif elements[1] == 'Touched':
                    columns['datetime'].append(elements[0])
                    columns['event_type'].append('touch')
                    event_data = {'src': elements[2], 'dst': elements[3]}
                    columns['event_data'].append(json.dumps(event_data))
                elif 'pz' in elements[1]:
                    columns['datetime'].append(elements[0])
                    columns['event_type'].append('position')
                    event_data = {'id': elements[1], 'x': elements[2], 'y': elements[3]}
                    columns['event_data'].append(json.dumps(event_data))

                # for i, element in enumerate(elements):
                #     if i not in columns:
                #         columns[i] = []
                #     columns[i].append(element)

                # Make sure to add empty elements to columns that don't have them
                # for i in range(len(elements), len(columns)):
                #     columns[i].append('')

            # Convert
            df = pd.DataFrame(columns)
            if len(df) > 0:
                dfs.append(df)
    
    # Concatenate all dataframes into one
    df = pd.concat(dfs)

    # Sort by timestamp
    df['datetime'] = pd.to_datetime(df['datetime'], format='%H:%M:%S:0%f')
    df = df.sort_values(by='datetime')

    # Save the dataframe
    df.to_csv(output_file, index=False)

# Perform the routine
# log_preprocessing(DATA_DIR / 'logs' / 'VU_GEM-STEP_NB_2022_Fall_AH_GroupB_Day11_221109_ComputerLogs')
for dir in (DATA_DIR / 'logs').iterdir():
    if dir.is_dir():
        log_preprocessing(dir)

100%|██████████| 2276323/2276323 [00:25<00:00, 88154.20it/s]
 99%|█████████▉| 625970/630194 [00:06<00:00, 91627.43it/s]


JSONDecodeError: Extra data: line 1 column 4 (char 3)

In [None]:
# Game state reconstruction routines
@dataclass
class Participant(DataClassJsonMixin):
    id: str
    position: Tuple[float, float]
    state: Literal['scientist'] = 'null'

@dataclass
class EnvironmentState(DataClassJsonMixin):
    sun_state: Optional[bool] = None

@dataclass
class GameState(DataClassJsonMixin):
    participants: Dict[str, Participant] = field(default_factory=dict)
    environment: EnvironmentState = field(default_factory=EnvironmentState)

def game_state_reconstruction(csv_file: pathlib.Path, period_ms: int):
    df = pd.read_csv(csv_file)
    df['datetime'] = pd.to_datetime(df['datetime'])

    # Output file
    output_file = csv_file.parent / 'game_state.csv'
    # if output_file.exists():
    #     return

    # Map from texture to state
    texture_to_state = {
        'PS_oxygen.png': 'O2',
        'PS_water.png': 'H2O',
        'PS_carbon_dioxide.png': 'CO2',
        'PS_sugar.png': 'Sugar',
        'PS_waterthinking.png': 'Thinking_H2O'
    }

    # Create a game state object
    game_state = GameState()
    prior_datetime = None
    game_data = {'datetime': [], 'state': []}

    for i, row in tqdm(df.iterrows(), total=len(df)):

        # Handle the different kinds of events
        if row['event_type'] == 'game_update':
            data = json.loads(row['event_data'])

            for actor in data:
                if 'skin' not in actor:
                    continue

                if actor['id'] not in game_state.participants:
                    state = texture_to_state[actor['skin']]
                    p = Participant(id=actor['id'], state=state, position=(0, 0))
                    game_state.participants[actor['id']] = p
                else:
                    p = game_state.participants[actor['id']]
                    p.state = texture_to_state[actor['skin']]

        elif row['event_type'] == 'touch':
            # data = json.loads(row['event_data'])
            ...
        elif row['event_type'] == 'position':
            data = json.loads(row['event_data'])
            
            if data['id'] not in game_state.participants:
                p = Participant(id=data['id'], position=(data['x'], data['y']))
                game_state.participants[data['id']] = p
            else:
                p = game_state.participants[data['id']]
                p.position = (data['x'], data['y'])
        else:
            raise ValueError(f'Unknown event type: {row["event_type"]}')
        
        # Save the game state
        if prior_datetime is None or (row['datetime'] - prior_datetime).total_seconds() >= period_ms:
            game_data['datetime'].append(row['datetime'])
            game_data['state'].append(game_state.to_json())
            prior_datetime = row['datetime']
        
    # Save the game state
    game_data = pd.DataFrame(game_data)
    game_data.to_csv(output_file, index=False)

# Perform game state reconstruction
for dir in (DATA_DIR / 'logs').iterdir():
    game_state_reconstruction(dir / 'game_logs.csv', period_ms=0.1)
# game_state_reconstruction(DATA_DIR / 'logs' / 'VU_GEM-STEP_NB_2022_Fall_AH_GroupB_Day11_221109_ComputerLogs' / 'game_logs.csv', period_ms=0.1)

100%|██████████| 1708702/1708702 [04:11<00:00, 6802.65it/s]
100%|██████████| 2118739/2118739 [05:05<00:00, 6946.58it/s]
100%|██████████| 727986/727986 [01:48<00:00, 6686.23it/s]
