<a href="https://colab.research.google.com/github/gitHubAndyLee2020/CrafterGPT/blob/main/CrafterGPT_SFT_Data_Engineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### CrafterGPT SFT Data Engineering

### Install SmartPlay and Dependencies

In [None]:
!pip install -q condacolab
import condacolab
condacolab.install()

In [None]:
import condacolab
condacolab.check()

In [None]:
!git clone https://github.com/microsoft/SmartPlay.git

In [None]:
!conda env update -n base -f ./SmartPlay/environment.yml

In [None]:
!pip install minedojo

In [None]:
!cd SmartPlay && pip install -e .

In [None]:
!pip install gym crafter datasets

In [None]:
!pip install cffi==1.16.0 # Solves cffi package version conflict that occurs

### Upload Crafter Human Dataset

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
!unzip crafter_human_dataset.zip
!mv dataset crafter_human_dataset

In [None]:
import numpy as np
import os

In [None]:
directory = 'crafter_human_dataset'
npz_files = [file for file in os.listdir(directory) if file.endswith('.npz')]

In [None]:
action_names = [
  "noop",
  "move_left",
  "move_right",
  "move_up",
  "move_down",
  "do",
  "sleep",
  "place_stone",
  "place_table",
  "place_furnace",
  "place_plant",
  "make_wood_pickaxe",
  "make_stone_pickaxe",
  "make_iron_pickaxe",
  "make_wood_sword",
  "make_stone_sword",
  "make_iron_sword",
]

In [None]:
action_list = [
  'Noop',
  'Move West',
  'Move East',
  'Move North',
  'Move South',
  'Do',
  'Sleep',
  'Place Stone',
  'Place Table',
  'Place Furnace',
  'Place Plant',
  'Make Wood Pickaxe',
  'Make Stone Pickaxe',
  'Make Iron Pickaxe',
  'Make Wood Sword',
  'Make Stone Sword',
  'Make Iron Sword',
]

In [None]:
def compose_prompt_and_answer(current_observation, past_actions, action_list, inventory, threats, goal, action_options, action_choice):
    inventory_item_names = [
				'sapling',
				'wood',
				'stone',
				'coal',
				'iron',
				'diamond',
				'wood_pickaxe',
				'stone_pickaxe',
				'iron_pickaxe',
				'wood_sword',
				'stone_sword',
				'iron_sword',
		]

    inventory_item_name_to_label = {
				 'sapling': 'Sapling',
				 'wood': 'Wood',
				 'stone': 'Stone',
				 'coal': 'Coal',
				 'iron': 'Iron',
				 'diamond': 'Diamond',
				 'wood_pickaxe': 'Wood Pickaxe',
				 'stone_pickaxe': 'Stone Pickaxe',
				 'iron_pickaxe': 'Iron Pickaxe',
				 'wood_sword': 'Wood Sword',
				 'stone_sword': 'Stone Sword',
				 'iron_sword': 'Iron Sword',
		}

    inventory_desc = ""

    for inventory_item_name in inventory_item_names:
      if inventory[inventory_item_name] > 0:
        inventory_desc += f"* {inventory_item_name_to_label[inventory_item_name]}: {inventory[inventory_item_name]}\n"

    if inventory_desc == "":
      inventory_desc +="* None in inventory.\n"

    inventory_desc = inventory_desc[:-1]

    newline = "\n"

    prompt = f"""
Environment Description:

1. You are playing a 2D survival game.
2. Your goal is to maximize rewards and stay alive.
3. You receive 1 point of reward for the first time you achieve the following achievements:
	- Collect Wood: No requirements.
	- Place Table: Requires Collect Wood.
	- Eat Cow: No requirements.
	- Collect Sampling: No requirements.
	- Collect Drink: No requirements.
	- Make Wood Pickaxe: Requires Place Table.
	- Make Wood Sword: Requires Place Table.
	- Place Plant: Requires Collect Sampling.
	- Defeat Zombie: No requirements.
	- Collect Stone: Requires Make Wood Pickaxe.
	- Place Stone: Requires Collect Stone.
	- Eat Plant: Requires Place Plant.
	- Defeat Skeleton: No requirements.
	- Make Stone Pickaxe: Requires Collect Stone.
	- Make Stone Sword: Requires Collect Stone.
	- Wake Up: No requirements.
	- Place Furnace: Requires Collect Stone.
	- Collect Coal: Requires Make Wood Pickaxe.
	- Collect Iron: Requires Make Stone Pickaxe.
	- Make Iron Pickaxe: Requires Place Furnace, Collect Coal, and Collect Iron.
	- Make Iron Sword: Requires Place Furnace, Collect Coal, and Collect Iron.
	- Collect Diamond: Requires Make Iron Pickaxe.
4. You will die if the health status reaches 0.
5. You will start to lose health if one of the following conditions is true:
	- Zombie or skeleton is attacking you.
	- Food status is 0.
	- Drink status is 0.
	- You fall into lava.
6. To resolve each situation, you can do the following:
	- If zombie or skeleton is attacking you, either run away or attack it. You must be one step away from zombie or skeleton to attack it.
	- If food status is 0, attack cow. You must be one step away from cow to attack it.
	- If drink status is 0, drink water. You must be one step away from water to drink it.
7. Within the environment, you can take one of the following actions:
	- Noop: Take no action, always applicable.
	- Move West: Requires no object or grass west of you.
	- Move East: Requires no object or grass east of you.
	- Move North: Requires no object or grass north of you.
	- Move South: Requires no object or grass south of you.
	- Do: Requires facing creature or object; (1) collects resource if necessary tools exist or (2) attacks creature
	- Sleep: Requires below maximum energy status; automatically taken if energy status reaches 0.
	- Place Stone: Requires stone in inventory.
	- Place Table: Requires 2 wood in inventory.
	- Place Furnace: Requires 4 stones in inventory.
	- Place Plant: Requires sapling in inventory.
	- Make Wood Pickaxe: Requires you to be 1 step away from table; requires wood in inventory.
	- Make Stone Pickaxe: Requires you to be 1 step away from table; requires wood, stone in inventory.
	- Make Iron Pickaxe: Requires you to be 1 step away from table and furnace; requires wood, coal, iron in inventory.
	- Make Wood Sword: Requires you to be 1 step away from table; requires wood in inventory.
	- Make Stone Sword: Requires you to be 1 step away from table; requires wood, stone in inventory.
	- Make Iron Sword: Requires you to be 1 step away from table and furnace; requires wood, coal, iron in inventory.
8. To choose which action to take, consider the following:
	- Take action that will resolve immediate threats.
	- Take action that will collect useful resources.
	- Take action that will achieve new achievements.

Current Observation:

{current_observation}

Action History:

{'- ' + (newline + '- ').join(past_actions)}

Instruction:

- Given the Environment Description, Current Observation, and Action History above, please describe your current status, the resources that you currently have, and potential threats that you currently face.
- Then, please describe the No.1 goal that you must work towards.
- Then, please list every action that you can take from the Action List below. Please exclude actions that you lack the resources or tools to take.
- Finally, please choose one action that aligns with your goal the most. Please output your final choice surrounded by quotation marks, such as \"Action Name\".

Action List:

{'- ' + (newline + '- ').join(action_list)}
"""

    answer = f"""
Current Status:

* Health: {inventory["health"]}/9
* Food: {inventory["food"]}/9
* Drink: {inventory["drink"]}/9
* Energy: {inventory["energy"]}/9

Resources:

{inventory_desc}

Threats:

{('* ' + (newline + '* ').join(threats)) if len(threats) > 0 else '* None observed.'}

Goal:
{goal}

Action Options:

{newline.join(action_options)}

Action Choice:
\"{action_choice}\"
"""

    prompt_and_answer = f"<s>[INST] {prompt} [/INST] {answer} </s>"

    return prompt_and_answer

In [None]:
def get_threats(observed_objects):
  threats = []
  for direction in observed_objects.keys():
    if (observed_objects[direction] is not None) and (observed_objects[direction]['name'] == 'zombie'):
      threats.append(f"Zombie {observed_objects[direction]['steps']} step to the {direction}.")
    elif (observed_objects[direction] is not None) and (observed_objects[direction]['name'] == 'skeleton'):
      threats.append(f"Skeleton {observed_objects[direction]['steps']} step to the {direction}.")
  return threats

In [None]:
def get_move_direction_goal(main_direction, sub_direction1, sub_direction2, observed_objects):
  goal = f"My No.1 goal is to move {main_direction} to "
  if (observed_objects[main_direction] is None) and (observed_objects[sub_direction1] is None) and (observed_objects[sub_direction2] is None):
    return goal + 'explore more.'
  goal += "approach "
  if observed_objects[main_direction] is not None:
    goal += f"{observed_objects[main_direction]['name']} {observed_objects[main_direction]['steps']} steps to the {main_direction}, "
  if observed_objects[sub_direction1] is not None:
    goal += f"{observed_objects[sub_direction1]['name']} {observed_objects[sub_direction1]['steps']} steps to the {sub_direction1}, "
  if observed_objects[sub_direction2] is not None:
    goal += f"{observed_objects[sub_direction2]['name']} {observed_objects[sub_direction2]['steps']} steps to the {sub_direction2}, "
  return goal[:-2] + "."

def get_do_goal(inventory, observed_objects):
  if observed_objects['front'] == "water":
    return "My No.1 goal is to drink water to refill my drink status."
  elif observed_objects['front'] == "stone":
    return "My No.1 goal is to mine stone for collecting resources."
  elif observed_objects['front'] == "tree":
    return "My No.1 goal is to cut down tree for collecting resources."
  elif observed_objects['front'] == "coal":
    return "My No.1 goal is to mine coal for collecting resources."
  elif observed_objects['front'] == "iron":
    return "My No.1 goal is to mine iron for collecting resources."
  elif observed_objects['front'] == "diamond":
    return "My No.1 goal is to mine diamond for collecting resources."
  elif observed_objects['front'] == "zombie":
    return "My No.1 goal is to defeat the zombie that is attacking me."
  elif observed_objects['front'] == "skeleton":
    return "My No.1 goal is to defeat the skeleton that is attacking me."
  elif observed_objects['front'] == "cow":
    return "My No.1 goal is to hunt the cow to refill my food status."
  elif observed_objects['front'] == "plant":
    return "My No.1 goal is to harvest the plant to refill my food status."
  elif observed_objects['front'] == "grass" and inventory['sapling'] > 0:
    return "My No.1 goal is to plant the sapling to grow food, harvest, and refill my food status in the future."
  else:
    return "My No.1 goal is to take action."

def get_goal(action, inventory, observed_objects):
  goal_list = {
    'Noop': 'There is no clear No.1 goal currently.',
    'Move West': get_move_direction_goal('west', 'south-west', 'north-west', observed_objects),
    'Move East': get_move_direction_goal('east', 'south-east', 'north-east', observed_objects),
    'Move North': get_move_direction_goal('north', 'north-west', 'north-east', observed_objects),
    'Move South': get_move_direction_goal('south', 'south-west', 'south-east', observed_objects),
    'Do': get_do_goal(inventory, observed_objects),
    'Sleep': 'My No.1 goal is to sleep and refill my sleep status',
    'Place Stone': 'My No.1 goal is to place stone and create a shelter to protect myself from zombies and skeletons.',
    'Place Table': 'My No.1 goal is to place table to craft useful tools.',
    'Place Furnace': 'My No.1 goal is to place furnace to craft iron tools.',
    'Place Plant': 'My No.1 goal is to place plant to harvest food later and refill my food status.',
    'Make Wood Pickaxe': 'My No.1 goal is to make wood pickaxe to be able to mine stone.',
    'Make Stone Pickaxe': 'My No.1 goal is to make stone pickaxe to be able to mine iron.',
    'Make Iron Pickaxe': 'My No.1 goal is to make iron pickaxe to be able to mine diamond.',
    'Make Wood Sword': 'My No.1 goal is to make wood sword to protect myself from zombies and skeletons.',
    'Make Stone Sword': 'My No.1 goal is to make stone sword to protect myself from zombies and skeletons.',
    'Make Iron Sword': 'My No.1 goal is to make iron sword to protect myself from zombies and skeletons.',
  }

  return goal_list[action]

In [None]:
def get_do_action_option(inventory, observed_objects):
  if (observed_objects['front'] is not None) and (observed_objects['front']['name'] == "water") and (inventory['drink'] < 9):
    return "I have water in front of me, and my drink status is below maximum."
  elif (observed_objects['front'] is not None) and (observed_objects['front']['name'] == "stone") and (inventory['wood_pickaxe'] > 0 or inventory['stone_pickaxe'] > 0 or inventory['iron_pickaxe'] > 0):
    return "I have stone in front of me, and I have pickaxe."
  elif (observed_objects['front'] is not None) and (observed_objects['front']['name'] == "tree"):
    return "I have tree in front of me."
  elif (observed_objects['front'] is not None) and (observed_objects['front']['name'] == "coal") and (inventory['wood_pickaxe'] > 0 or inventory['stone_pickaxe'] > 0 or inventory['iron_pickaxe'] > 0):
    return "I have coal in front of me, and I have pickaxe."
  elif (observed_objects['front'] is not None) and (observed_objects['front']['name'] == "iron") and (inventory['stone_pickaxe'] > 0 or inventory['iron_pickaxe'] > 0):
    return f"I have iron in front of me, and I have {'iron' if inventory['iron_pickaxe'] > 0 else 'stone'} pickaxe."
  elif (observed_objects['front'] is not None) and (observed_objects['front']['name'] == "diamond") and (inventory['iron_pickaxe'] > 0):
    return "I have diamond in front of me, and I have iron pickaxe."
  elif (observed_objects['front'] is not None) and (observed_objects['front']['name'] == "zombie"):
    return "There is zombie in front of me that is attacking me."
  elif (observed_objects['front'] is not None) and (observed_objects['front']['name'] == "skeleton"):
    return "There is skeleton in front of me that is attacking me."
  elif (observed_objects['front'] is not None) and (observed_objects['front']['name'] == "cow") and (inventory['food'] < 9):
    return "I have cow in front of me, and my food status is below maximum."
  elif (observed_objects['front'] is not None) and (observed_objects['front']['name'] == "plant") and (inventory['food'] < 9):
    return "I have plant in front of me that I can harvest, and my food status is below maximum."
  else:
    return None

def is_object_nearby(observed_object, target_object_name):
  for direction in observed_objects.keys():
    if (observed_objects[direction] is not None) and (observed_objects[direction]['name'] == target_object_name) and (observed_objects[direction]['steps'] == 1):
      return True
  return False

def get_action_options(inventory, observed_objects):
  all_action_options = {
      "Noop": "Take no action.",
      "Move West": "No object to my immediate west.",
      "Move East": "No object to my immediate east.",
      "Move North": "No object to my immediate north.",
      "Move South": "No object to my immediate south.",
      "Do": get_do_action_option(inventory, observed_objects),
      "Sleep": "My energy status is below maximum.",
      "Place Stone": f"I current have {inventory['stone']} stones in my inventory.",
      "Place Furnace": f"I current have {inventory['stone']} stones in my inventory.",
      "Place Table": f"I current have {inventory['wood']} wood in my inventory.",
      "Place Plant": f"I current have {inventory['sapling']} sapling in my inventory.",
      "Make Wood Pickaxe": f"I current have {inventory['wood']} wood in my inventory, and I am next to a table.",
      "Make Stone Pickaxe": f"I current have {inventory['wood']} wood and {inventory['stone']} stones in my inventory, and I am next to a table.",
      "Make Iron Pickaxe": f"I current have {inventory['wood']} wood, {inventory['iron']} iron, and {inventory['coal']} coals in my inventory, and I am next to a table and a furnace.",
      "Make Wood Sword": f"I current have {inventory['wood']} wood in my inventory, and I am next to a table.",
      "Make Stone Sword": f"I current have {inventory['wood']} wood and {inventory['stone']} stones in my inventory, and I am next to a table.",
      "Make Iron Sword": f"I current have {inventory['wood']} wood, {inventory['iron']} iron, and {inventory['coal']} coals in my inventory, and I am next to a table and a furnace.",
  }

  if (observed_objects['west'] is not None) and (observed_objects['west']['steps'] == 1) and (observed_objects['west']['name'] not in ['grass', 'sand', 'path']):
    del all_action_options['Move West']
  if (observed_objects['east'] is not None) and (observed_objects['east']['steps'] == 1) and (observed_objects['east']['name'] not in ['grass', 'sand', 'path']):
    del all_action_options['Move East']
  if (observed_objects['north'] is not None) and (observed_objects['north']['steps'] == 1) and (observed_objects['north']['name'] not in ['grass', 'sand', 'path']):
    del all_action_options['Move North']
  if (observed_objects['south'] is not None) and (observed_objects['south']['steps'] == 1) and (observed_objects['south']['name'] not in ['grass', 'sand', 'path']):
    del all_action_options['Move South']
  if all_action_options['Do'] is None:
    del all_action_options['Do']
  if inventory['energy'] == 9:
    del all_action_options['Sleep']
  if inventory['stone'] == 0 or ((observed_objects['front'] is not None) and (observed_objects['front']['name'] not in ['grass', 'sand', 'path', 'water', 'lava'])):
    del all_action_options['Place Stone']
  if inventory['stone'] < 4 or ((observed_objects['front'] is not None) and (observed_objects['front']['name'] not in ['grass', 'sand', 'path'])):
    del all_action_options['Place Furnace']
  if inventory['wood'] < 2 or ((observed_objects['front'] is not None) and (observed_objects['front']['name'] not in ['grass', 'sand', 'path'])):
    del all_action_options['Place Table']
  if (inventory['sapling'] == 0) or ((observed_objects['front'] is not None) and (observed_objects['front']['name'] == 'grass')):
    del all_action_options['Place Plant']
  if (inventory['wood'] == 0) or (not is_object_nearby(observed_objects, 'table')):
    del all_action_options['Make Wood Pickaxe']
  if (inventory['wood'] == 0) or (inventory['stone'] == 0) or (not is_object_nearby(observed_objects, 'table')):
    del all_action_options['Make Stone Pickaxe']
  if (inventory['wood'] == 0) or (inventory['coal'] == 0) or (inventory['iron'] == 0) or (not is_object_nearby(observed_objects, 'table')) or (not is_object_nearby(observed_objects, 'furnace')):
    del all_action_options['Make Iron Pickaxe']
  if (inventory['wood'] == 0) or (not is_object_nearby(observed_objects, 'table')):
    del all_action_options['Make Wood Sword']
  if (inventory['wood'] == 0) or (inventory['stone'] == 0) or (not is_object_nearby(observed_objects, 'table')):
    del all_action_options['Make Stone Sword']
  if (inventory['wood'] == 0) or (inventory['coal'] == 0) or (inventory['iron'] == 0) or (not is_object_nearby(observed_objects, 'table')) or (not is_object_nearby(observed_objects, 'furnace')):
    del all_action_options['Make Iron Sword']

  return [f"{i}. {key}: {value}" for i, (key, value) in enumerate(all_action_options.items())]

In [None]:
def parse_scene(input_string):
    # Splitting the input string into lines
    lines = input_string.split('\n')

    # Initializing the dictionary with None values
    scene = {
        "front": None,
        "west": None,
        "east": None,
        "south": None,
        "north": None,
        "south-west": None,
        "south-east": None,
        "north-west": None,
        "north-east": None,
    }

    # Flag to start processing the "You see:" section
    process = False
    for line in lines:
        if line.startswith("You see:"):
            process = True
            continue
        if process:
            if line.strip() == "":
                break  # End of the "You see:" section
            parts = line.split()
            # Extracting the name, steps, and direction
            name = parts[1]
            if name == "arrow":  # Skip if the object name is "arrow"
                continue
            steps = int(parts[2])  # Extracting the step count
            direction = parts[-1]
            # Updating the scene dictionary
            scene[direction] = {"name": name, "steps": steps}

    # Handling the "You face" case
    for line in lines:
        if line.startswith("You face"):
            parts = line.split()
            name = parts[2]
            # The steps will always be 1 for the "front" object
            scene["front"] = {"name": name, "steps": 1}
            break

    return scene

### Data Engineering

In [None]:
%cd ./SmartPlay/src
import smartplay
%cd ../..

In [None]:
%%time

import os
from tqdm import tqdm

training_dataset = []

valid_count = 0
out_of_map_error_count = 0

for file in tqdm(npz_files):
    file_path = os.path.join(directory, file)
    data = np.load(file_path)

    num_steps = len(data["image"])
    sleeping = False
    previous_player_pos = data["player_pos"][0]
    player_facing = (0, 1)
    actions_history = []

    for step_num in range(num_steps - 1):
        # Handle sleeping state
        if action_names[data["action"][step_num]] == "sleep" and data['inventory_energy'][step_num] < 9:
            sleeping = True
        elif sleeping and action_names[data["action"][step_num]] != "noop":
            sleeping = False

        # Handle player_facing state
        current_player_pos = data["player_pos"][step_num]
        if previous_player_pos[0] != current_player_pos[0] or previous_player_pos[1] != current_player_pos[1]:
            player_facing = (current_player_pos[0] - previous_player_pos[0], current_player_pos[1] - previous_player_pos[1])

        info = {
            "action": action_names[data["action"][step_num]], # convert to snake-cased action name
            "sleeping": sleeping,
            "dead": data["done"][step_num],
            "inventory": {
                'health': data['inventory_health'][step_num],
                'food': data['inventory_food'][step_num],
                'drink': data['inventory_drink'][step_num],
                'energy': data['inventory_energy'][step_num],
                'sapling': data['inventory_sapling'][step_num],
                'wood': data['inventory_wood'][step_num],
                'stone': data['inventory_stone'][step_num],
                'coal': data['inventory_coal'][step_num],
                'iron': data['inventory_iron'][step_num],
                'diamond': data['inventory_diamond'][step_num],
                'wood_pickaxe': data['inventory_wood_pickaxe'][step_num],
                'stone_pickaxe': data['inventory_stone_pickaxe'][step_num],
                'iron_pickaxe': data['inventory_iron_pickaxe'][step_num],
                'wood_sword': data['inventory_wood_sword'][step_num],
                'stone_sword': data['inventory_stone_sword'][step_num],
                'iron_sword': data['inventory_iron_sword'][step_num]
            },
            "player_pos": data["player_pos"][step_num],
            "player_facing": player_facing,
            "view": np.array([9, 9]),
            "semantic": data["semantic"][step_num],
        }

        action_choice = action_list[data["action"][step_num + 1]]

        actions_history.append(action_list[data["action"][step_num]])

        current_observation = smartplay.crafter.crafter_env.describe_frame(info, action_list[data["action"][step_num]])

        if current_observation == "Error, you are out of the map.":
          out_of_map_error_count += 1

        else:
          observed_objects = parse_scene(current_observation)

          threats = get_threats(observed_objects)

          goal = get_goal(action_list[data["action"][step_num]], info['inventory'], observed_objects)

          action_options = get_action_options(info['inventory'], observed_objects)

          prompt_and_answer = compose_prompt_and_answer(current_observation, actions_history[-10:], action_list, info['inventory'], threats, goal, action_options, action_choice)

          training_dataset.append({ 'text': prompt_and_answer })

          valid_count += 1

        previous_player_pos = data["player_pos"][step_num]

    data.close()

print(f"Valid Data Point: {valid_count}")
print(f"Out-Of-Map Error Data Point: {out_of_map_error_count}")

In [None]:
import pandas as pd

training_dataset_df = pd.DataFrame(training_dataset)
training_dataset_df.to_csv('training_dataset.csv', index=False)

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
from datasets import load_dataset

uploadable_training_dataset = load_dataset('csv', data_files='training_dataset.csv', split='train')
uploadable_training_dataset.push_to_hub("techandy42/CrafterGPT-Training-Dataset")