# COSC-523 P2 Group 13: 20 Questions
### Jared Bell, Imran Dewjee, Yair Elovic, Shiloh Parker

## Loading Model

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="auto", #Mark MPS for M series Mac's, cuda for NVIDIA gpus
    dtype="auto",
    trust_remote_code=False,
    temperature=0
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

# Create a pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_length=4000,
    max_new_tokens=15,
    do_sample=False,
)


  from .autonotebook import tqdm as notebook_tqdm
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.78s/it]
Some parameters are on the meta device because they were offloaded to the disk.
Device set to use mps


## Generating Decision Tree Questions from Properties

In [2]:
with open('noun_universe.txt', 'r') as f:
    nouns = [line.rstrip() for line in f]

with open('property_universe.txt', 'r') as f:
    properties = [line.rstrip() for line in f]
    
property_questions = {}

for prop in properties:
    prompt = [
        {"role": "system", "content": "You are building a series of questions to ask in a game of 20 questions where are user will provide Yes/No answers. You will not actually be asked these questions, but based on rankings of nouns, a binary decision tree will be created using your provided questions to determine the best guess. You will base the generated question from a single provided property. Provide only a single yes or no question to ask the user for the provided property. Provide no additional context or words."
        },
        {"role": "user", "content": f"Create a question to help sort a list of things based on yes or no answers. The question should be based on this property: {prop}"},
    ]
    question = pipe(prompt)[0]["generated_text"]
    print(question)
    property_questions[prop] = question

print(len(property_questions))


 Is the item easily accessible to people with disabilities?
 Is the item accessible globally?
 Is the item known for its high level of precision?
 Is the item known for its precision and correctness?
 "Is the item associated with high physical activity?"
 Is the item known for its aggressive behavior or characteristics?
 Is the item known for its agility?
 "Does the item shed a significant amount of hair or fur?"
 "Is the annual precipitation above 1000mm?"
 Is the item considered visually appealing?
 Is the item adapted for living in water?
 Is the item fragrant?
 Is the item artificially created?
 Is it capable of attacking?
 "Is the content suitable for a general audience?"
 Is it considered beautiful?
 Is the item considered beautiful?
 Is it big?
 Is the item rich in biodiversity?
 Is the object larger than a basketball?
 Is the item breakable?
 Is the item bright?
 Is the item designed to blend in with its surroundings?
 "Can the item hold a significant amount of content?"
 "Is t

In [3]:
import json

with open('property_questions.txt', 'w') as f:
    f.write(json.dumps(property_questions, indent=4))


## Property Generation

## Property Parsing

In [3]:
import json

file = open("all_properties_question_scored.json", "r")
data = file.read()
json_data = json.loads(data)

formatted_data = {}

for prop, scores in json_data.items():
    formatted_data[prop] = {}
    for score in scores:
        noun = score[0]
        value = score[1]
        formatted_data[prop][noun] = value
        
property_table = {}
threshold = 5
for prop, noun_scores in formatted_data.items():
    property_table[prop] = []
    for noun, score in noun_scores.items():
        if score>=threshold:
            property_table[prop].append(noun)
            
json_str = json.dumps(property_table)

with open("property_table.json", "w") as f:
    f.write(json_str)


## Example Property Comparison

In [None]:
import copy

property_json = open("property_table.json", "r").read()

property_table = json.loads(property_json)

# Using sets for easier comparison of noun lists
set_table = {k: set(v) for k, v in property_table.items()}

remainder = copy.deepcopy(set_table)
# No response to question
subtraction = set_table["accessibility"]
rem_table = {k: v-subtraction for k, v in remainder.items() if len(v-subtraction)>0}
remainder = rem_table

print(len(rem_table))

# Yes response to question
intersection = set_table["accuracy"]
rem_table = {k: v & intersection for k, v in remainder.items() if len(v & intersection)>0}
remainder = rem_table

print(len(rem_table))


227
227
