In [None]:
!pip install -U "huggingface_hub[cli]"



In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: write

In [None]:
import pandas as pd
import requests
import random
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from collections import Counter
import numpy as np

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_id = "meta-llama/Llama-3.2-1B-Instruct"
access_token = "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"  # Replace with your actual token

tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=access_token)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto",
    use_auth_token=access_token  # Required for gated models
)

In [1]:
# PROMPTING FUNCTION
def llm_output(words):
  prompt = "You are playing the NY Times Connections game. I will give you 16 words, and your task is to group them into 4 sets of exactly 4 words each, based on a common theme. \
          Find groups of four items that share something in common. \
          Category Examples:\
          FISH: Bass, Flounder, Salmon, Trout\
          FIRE ___: Ant, Drill, Island, Opal\
          Categories will always be more specific than ‘5-LETTER-WORDS’, ‘NAMES’, or ‘VERBS.’\
          Example: \
          **WORDS**: [BUCKS, HAIL, JAZZ, SHIFT, LEVEL, MOM, SNOW, RACECAR, SLEET, TAB, KAYAK, RETURN, OPTION, NETS, RAIN, HEAT]  \
          **SOLUTION**: [['HAIL', 'RAIN', 'SLEET', 'SNOW'], ['BUCKS', 'HEAT', 'JAZZ', 'NETS'], ['OPTION', 'RETURN', 'SHIFT', 'TAB'], ['KAYAK', 'LEVEL', 'MOM', 'RACECAR']]  \
          **EXPLANATION**: \
          WET WEATHER: 'HAIL', 'RAIN', 'SLEET', 'SNOW'\
          NBA TEAMS: 'BUCKS', 'HEAT', 'JAZZ', 'NETS'\
          KEYBOARD KEYS: 'OPTION', 'RETURN', 'SHIFT', 'TAB'\
          PALINDROMES:  'KAYAK', 'LEVEL', 'MOM', 'RACECAR'\
          \
          Categories share commonalities: \
          • There are 4 categories of 4 words each\
          • Every word will be in only 1 category \
          • One word will never be in two categories\
          • There may be a red herrings (words that seems to belong together but actually are in separate categories)\
          • Category may contain compound words with a common prefix or suffix word\
          • A few other common categories include word and letter patterns, pop culture clues (such as music and movie titles) and fill-in-the-blank phrases You will be given a new example (Example 4) with today’s list of words. Give your final answer following the structure below\
          [['word1', 'word2', 'word3', 'word4'],['word5', 'word6', 'word7', 'word8'],['word9', 'word10', 'word11', 'word12'],['word13', 'word14', 'word15', 'word16']]\
          Do NOT include **EXPLANATIONS** or anything except the **SOLUTION** in the above format. Only give one **SOLUTION**\
          Remember that the same word cannot be repeated across multiple categories, and you need to output 4 categories with 4 distinct words each. Also do not make up words not in the list. This is the most important rule. Please obey\
          Now, group the following words correctly:'\
          **WORDS**:" + str(words) + "\**SOLUTION**: "

  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

  output_ids = model.generate(**inputs, max_length=1000, temperature=0.7, do_sample=True)

  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)

  print("\nResponse:", response)
  return response

In [None]:
#GET DATA FROM DATASET
url = 'https://raw.githubusercontent.com/Eyefyre/NYT-Connections-Answers/main/connections.json'

response = requests.get(url)

if response.status_code == 200:
    try:
        json_data = response.json()
        df = pd.DataFrame(json_data)

        # print(df)
    except requests.exceptions.JSONDecodeError as e:
        print("JSON decoding failed:", e)
else:
    print(f"Failed to retrieve data: {response.status_code}")

df = df[:650]

In [None]:
# EVALUATION FUNCTION
def evaluation_scores(actual_clusters, predicted_clusters):
  if len(actual_clusters) == 0 or len(predicted_clusters) == 0:
    return 0
  score_matrix = np.zeros((len(actual_clusters), len(predicted_clusters)))
  jacc_similarity_score = 0

  # Evauating jaccard matching between each pair of sets
  for actual_idx in range(4):
    for predicted_idx in range(4):
      actual_group = actual_clusters[actual_idx]
      predicted_group = predicted_clusters[predicted_idx]
      score_matrix[actual_idx, predicted_idx] = len(set(actual_group) & set(predicted_group)) / len(set(actual_group) | set(predicted_group))

  # Success Rate
  succ_rate = np.sum(np.array(score_matrix) == 1.0)

  total_words = sum(len(cluster) for cluster in predicted_clusters)  # Should be 16 in this case
  max_overlaps = 0
  for pred_cluster in predicted_clusters:
      overlap_counts = Counter()
      for true_cluster in actual_clusters:
          overlap_counts[tuple(true_cluster)] = len(set(pred_cluster) & set(true_cluster))

      max_overlaps += max(overlap_counts.values())  # Take the largest overlap for each predicted cluster
  # print("SUCCESS RATE: ", succ_rate)
  return succ_rate , max_overlaps / total_words

def evaluation(actual_clusters_set, predicted_clusters_set):
  succ_rate = 0
  purity = 0
  num_samples = 0
  for actual_clusters, predicted_clusters in zip(actual_clusters_set, predicted_clusters_set):
    num_samples += 1
    print("ACTUAL CLUSTER:",actual_clusters, " \nPREDICTED CLUSTER: ", predicted_clusters, "\n")
    val1, val2 = evaluation_scores(actual_clusters, predicted_clusters)
    succ_rate += val1
    purity += val2
  print(num_samples)
  return succ_rate/num_samples, purity/num_samples

In [None]:
# FORMING TRAIN AND TEST SPLIT DATA
# Removal of emoji data
data_mod = []
data_labels_mod = []
flag = 0
for day_sample in df['answers']:
  for cluster in day_sample:
    if sum([True if x.isascii() else False for x in cluster['members']]) != 4:
      flag = 1
      break
  if flag == 1:
    flag = 0
    continue
  for cluster in day_sample:
    data_mod.append(cluster['members'])
    data_labels_mod.append(cluster['group'])

data_mod = np.array(data_mod)
data_labels_mod = np.array(data_labels_mod)
data = data_mod.reshape(int(data_mod.shape[0]/4), 4*4)
data_labels = data_labels_mod.reshape(int(data_labels_mod.shape[0]/4), 4)

train_data, temp_data, train_labels, temp_labels = train_test_split(data, data_labels, test_size=0.2, random_state=101)
# Then split (valid + test) into valid and test
valid_data, test_data, valid_labels, test_labels = train_test_split(temp_data, temp_labels, test_size=0.5, random_state=101)

train_data = train_data.reshape(-1,4)
train_labels = train_labels.reshape(-1,1)

valid_data = valid_data.reshape(-1,4)
valid_labels = valid_labels.reshape(-1,1)

test_data = test_data.reshape(-1,4)
test_labels = test_labels.reshape(-1,1)

train_x = np.array(train_data[:len(train_data) - len(train_data)%4])
train_label = np.array(train_labels[:len(train_data) - len(train_data)%4])
train_x_m1 = train_x.reshape(int(train_x.shape[0]/4),4*4).copy()
train_x_actual_m1 = train_x.reshape(int(train_x.shape[0]/4),4,4).copy()
for i in range(train_x_m1.shape[0]):
  random.shuffle(train_x_m1[i])
print(train_x_m1[0])
print(train_x_m1.shape)

valid_x = np.array(valid_data[:len(valid_data) - len(valid_data)%4])
valid_label = np.array(valid_labels[:len(valid_data) - len(valid_data)%4])
valid_x_m1 = valid_x.reshape(int(valid_x.shape[0]/4),4*4).copy()
valid_x_actual_m1 = valid_x.reshape(int(valid_x.shape[0]/4),4,4).copy()
for i in range(valid_x_m1.shape[0]):
  random.shuffle(valid_x_m1[i])
print(valid_x_m1[0])
print(valid_x_m1.shape)

test_x = np.array(test_data[:len(test_data) - len(test_data)%4])
test_label = np.array(test_labels[:len(test_data) - len(test_data)%4])
test_x_m1 = test_x.reshape(int(test_x.shape[0]/4),4*4).copy()
test_x_actual_m1 = test_x.reshape(int(test_x.shape[0]/4),4,4).copy()
for i in range(test_x_m1.shape[0]):
  random.shuffle(test_x_m1[i])
print(test_x_m1[0])
print(test_x_actual_m1[0])
print(test_x_m1.shape)

tot_x = np.concatenate((train_x, valid_x, test_x), axis=0)
tot_label = np.concatenate((train_label, valid_label, test_label), axis=0)
tot_x_m1 = tot_x.reshape(int(tot_x.shape[0]/4),4*4).copy()
tot_x_actual_m1 = tot_x.reshape(int(tot_x.shape[0]/4),4,4).copy()
for i in range(tot_x_m1.shape[0]):
  random.shuffle(tot_x_m1[i])
print(tot_x_m1[0])
print(tot_x_m1.shape)

['GUILT' 'EGO' 'MUST' 'GUIDE' 'EXPRESSION' 'POWER' 'TOKEN' 'MOLD' 'SYMBOL'
 'MIGHT' 'MODEL' 'HEAD' 'GESTURE' 'BLUEPRINT' 'WILL' 'CAN']
(516, 16)
['MOON' 'MONTY' 'VIPER' 'TEDDY' 'MAMBA' 'ABSTRACT' 'RUNDOWN' 'DIGEST'
 'CIRCLE' 'BRIEF' 'PYTHON' 'HOUSE' 'GARTER' 'THONG' 'SLIP' 'BOA']
(65, 16)
['PROGRESS' 'ABSURDITY' 'GARBAGE' 'FEET' 'SIX' 'NONSENSE' 'PUSH' 'FOLLY'
 'FAKE' 'ADVANCE' 'CHEESE' 'CUT' 'MADNESS' 'MARCH' 'FREEZE' 'SKUNK']
[['ABSURDITY' 'FOLLY' 'MADNESS' 'NONSENSE']
 ['ADVANCE' 'MARCH' 'PROGRESS' 'PUSH']
 ['CHEESE' 'FEET' 'GARBAGE' 'SKUNK']
 ['CUT' 'FAKE' 'FREEZE' 'SIX']]
(65, 16)
['MIGHT' 'WILL' 'SYMBOL' 'MODEL' 'CAN' 'MOLD' 'TOKEN' 'POWER' 'BLUEPRINT'
 'EXPRESSION' 'GUIDE' 'GUILT' 'HEAD' 'GESTURE' 'EGO' 'MUST']
(646, 16)


In [None]:
# TO STORE LLM RESPONSE
LLM_RESPONSE = []
llm_responses_text_list = []

In [None]:
# TO EXTRACT RESPONSES SAVED IN TEXT FILE
import re

def extract_responses(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        text = file.read()

    # Use regex to find all responses
    responses = re.findall(r"Response:\s*(.*?)(?=\nResponse:|\Z)", text, re.DOTALL)
    return [response.strip() for response in responses]

# Example usage
file_path = "RESPONSES (1).txt"  # Replace with your actual file path
responses = extract_responses(file_path)
# print(responses)
# for i, response in enumerate(responses, 1):
#     print(f"Response {i}:\n{response}\n{'-'*80}")
responses[1]

"You are playing the NY Times Connections game. I will give you 16 words, and your task is to group them into 4 sets of exactly 4 words each, based on a common theme.           Find groups of four items that share something in common.           Category Examples:          FISH: Bass, Flounder, Salmon, Trout          FIRE ___: Ant, Drill, Island, Opal          Categories will always be more specific than ‘5-LETTER-WORDS’, ‘NAMES’, or ‘VERBS.’          Example:           **WORDS**: [BUCKS, HAIL, JAZZ, SHIFT, LEVEL, MOM, SNOW, RACECAR, SLEET, TAB, KAYAK, RETURN, OPTION, NETS, RAIN, HEAT]            **SOLUTION**: [['HAIL', 'RAIN', 'SLEET', 'SNOW'], ['BUCKS', 'HEAT', 'JAZZ', 'NETS'], ['OPTION', 'RETURN', 'SHIFT', 'TAB'], ['KAYAK', 'LEVEL', 'MOM', 'RACECAR']]            **EXPLANATION**:           WET WEATHER: 'HAIL', 'RAIN', 'SLEET', 'SNOW'          NBA TEAMS: 'BUCKS', 'HEAT', 'JAZZ', 'NETS'          KEYBOARD KEYS: 'OPTION', 'RETURN', 'SHIFT', 'TAB'          PALINDROMES:  'KAYAK', 'LEVEL', '

In [None]:
# TO EXTRACT BEST POSSIBLE SOLUTION FROM THE LLM RESPONSE. THIS IS IN CASE LLM PROVIDES MULTIPLE RESPONSES
import re
import ast

def extract_best_solution(response_text):
    # Regular expression to find **SOLUTION** blocks containing a 4x4 matrix
    solution_pattern = r"\*\*SOLUTION\*\*:\s*(\[\[.*?\]\])"

    # Find all matches
    matches = re.findall(solution_pattern, response_text, re.DOTALL)

    best_solution = None
    n = len(matches)
    for i in range(1,n):
        match = matches[i]
        # if i==0:
        #   continue
        try:
            # Convert string to Python list
            solution = ast.literal_eval(match)

            # Ensure it's a 4x4 matrix
            if isinstance(solution, list) and len(solution) == 4 and all(len(group) == 4 for group in solution):
                best_solution = solution
                break  # Stop at the first valid 4x4 matrix

        except (SyntaxError, ValueError):
            continue  # Skip invalid solutions

    return best_solution

# # Example usage
# for resp in responses:
#   # response_text = responses[0]  # Replace with actual LLM response text
#   best_solution = extract_best_solution(resp)
#   # print(type(best_solution[0]))
#   if best_solution:
#       pass
#       print("Best Solution Found:")
#       for group in best_solution:
#           print(group)
#   else:
#       print("No valid solution found.")

In [None]:
import ast
import re
# CONVERTS THE SOLUTION GIVEN BY LLM FROM STRING TO A 2D ARRAY
# def convert_to_2d_array(input_string):
#     # Add double quotes around each word using regex
#     formatted_string = re.sub(r'(\w+)', r'"\1"', input_string)

#     # Ensure proper list syntax
#     formatted_string = formatted_string.replace("'","")
#     formatted_string = formatted_string.replace('[', '[ ').replace(']', ' ]')

#     try:
#         # Safely evaluate the formatted string as a list of lists
#         output_list = ast.literal_eval(formatted_string)

#         # Convert to a numpy array
#         np_array = np.array(output_list)
#         return np_array

#     except Exception as e:
#         print(f"Error while processing the input string: {e}")
#         return None

# EITHER CALLS LLM OR CHECKS RESPONSE FROM SAVED TEXT FILE AND PROCESSES IT TO RETURN THE BEST SUGGESTED SOLUTION IN 2D MATRIX FORMAT
def llm_model(words, file, count, flag):
  try:
    if flag:
      output = llm_output(words)
    else:
      output = responses[count]
    LLM_RESPONSE.append(output)
    file.write(output)

    solution_output = extract_best_solution(output)
    modified_out = np.array(solution_output)

    if modified_out.shape != (4,4):
        modified_out = np.zeros((4,4), dtype=str)
  except Exception as e:
    print(f"An error occurred: {e}")
    modified_out = np.zeros((4,4), dtype=str)
  return modified_out

# def llm_model(words, file, count, flag):
#   try:
#     if flag:
#       output = llm_output(words)
#     else:
#       output = responses[count]
#     LLM_RESPONSE.append(output)
#     file.write(output)
#     # print("gurfjrhjb",output)
#     # output = "You are playing the NY Times Connections game. I will give you 16 words, and your task is to group them into 4 sets of exactly 4 words each, based on a common theme.           Find groups of four items that share something in common.           Category Examples:          FISH: Bass, Flounder, Salmon, Trout          FIRE ___: Ant, Drill, Island, Opal          Categories will always be more specific than ‘5-LETTER-WORDS’, ‘NAMES’, or ‘VERBS.’          Example:           Words: [BUCKS, HAIL, JAZZ, SHIFT, LEVEL, MOM, SNOW, RACECAR, SLEET, TAB, KAYAK, RETURN, OPTION, NETS, RAIN, HEAT]            Solution: [['HAIL', 'RAIN', 'SLEET', 'SNOW'], ['BUCKS', 'HEAT', 'JAZZ', 'NETS'], ['OPTION', 'RETURN', 'SHIFT', 'TAB'], ['KAYAK', 'LEVEL', 'MOM', 'RACECAR']]            Explanation:           WET WEATHER: 'HAIL', 'RAIN', 'SLEET', 'SNOW'          NBA TEAMS: 'BUCKS', 'HEAT', 'JAZZ', 'NETS'          KEYBOARD KEYS: 'OPTION', 'RETURN', 'SHIFT', 'TAB'          PALINDROMES:  'KAYAK', 'LEVEL', 'MOM', 'RACECAR'                    Categories share commonalities:           • There are 4 categories of 4 words each          • Every word will be in only 1 category           • One word will never be in two categories          • There may be a red herrings (words that seems to belong together but actually are in separate categories)          • Category may contain compound words with a common prefix or suffix word          • A few other common categories include word and letter patterns, pop culture clues (such as music and movie titles) and fill-in-the-blank phrases You will be given a new example (Example 4) with today’s list of words. Give your final answer following the structure below          **SOLUTION**: [['word1', 'word2', 'word3', 'word4'],['word5', 'word6', 'word7', 'word8'],['word9', 'word10', 'word11', 'word12'],['word13', 'word14', 'word15', 'word16']]          Do NOT include explanations for your groupings          Remember that the same word cannot be repeated across multiple categories, and you need to output 4 categories with 4 distinct words each. Also do not make up words not in the list. This is the most important rule. Please obey          Now, group the following words correctly:            **Words:**['MARCH' 'FOLLY' 'NONSENSE' 'CHEESE' 'SIX' 'GARBAGE' 'PROGRESS' 'MADNESS' 'FEET' 'PUSH' 'ADVANCE' 'FREEZE' 'ABSURDITY' 'CUT' 'SKUNK' 'FAKE']\**SOLUTION**:  [['MARCH', 'FOLLY', 'NONSENSE', 'SIX'], ['CHEESE', 'GARBAGE', 'FEET', 'PROGRESS'], ['MADNESS', 'ADVANCE', 'FREEZE', 'SKUNK'], ['ABSURDITY', 'PUSH', 'CUT', 'FAKE']]"
#     # print("kjrfuerjb", type(output))
#     solution_output = output.split("**SOLUTION**:")[-1]
#     solution_output = solution_output.split("]]")[0]
#     solution_output += "]]"
#     print("\nEXTRACTED OUTPUT",solution_output)
#     modified_out = convert_to_2d_array(solution_output)
#     #np.array([x + ['0']*(4-len(x)) for x in ast.literal_eval((output).replace('\n','').replace(' ',''))])
#     # print("kguegwdjn",modified_out)
#     # print("kfheuhiwn", modified_out.shape)
#     if modified_out.shape != (4,4):
#         modified_out = np.zeros((4,4), dtype=str)
#   except Exception as e:
#     # Handle any exception
#     print(f"An error occurred: {e}")
#     modified_out = np.zeros((4,4), dtype=str)
#   return modified_out

In [None]:
import time
# CALLS LLM ONE BY ONE FOR EACH PUZZLE IN CURRENT SET AND STORES ALL RESPONSES IN AN ARRAY. CLUSTERS_M1 WILL BE A 3D ARRAY
def llm_model_eval(train_x, missed_samples_file_path = 'sample_data/missed.txt'):
  # data processing m1
  clusters_m1 = []
  errored_m1 = []
  file = open('llm_response.txt', 'w')
  cnt = 0
  for x in tqdm(train_x):
    # print("lehiwhn",x)
    out = llm_model(x, file, cnt, False)
    # print(out)
    # time.sleep(2)
    clusters_m1.append(out)
    cnt+=1
  file.close()

  # print missed inputs
  f = open(missed_samples_file_path, 'w')
  for x in errored_m1:
    f.write(str(x) + '\n')
  f.close()
  print(f'Number of errored samples: {len(errored_m1)}')
  return clusters_m1

In [None]:
def llm_pipeline(x_data, actual_data, numpy_save_file = 'sample_data/train_x_predicted_m1.npy'):
  clusters_m1 = llm_model_eval(x_data)
  succ, purity = evaluation(actual_data, clusters_m1)
  print(f'Success Rate: {succ}')
  print(f'Purity: {purity}')

  np.save(numpy_save_file, clusters_m1)

  missed_cnt = []
  for i, x in enumerate(clusters_m1):
    if np.sum(x == np.zeros((4,4), dtype=str)) == 16:
      missed_cnt.append(i)
  print(f'Count of Missed samples: {len(missed_cnt)}')
  return succ, purity, missed_cnt

In [None]:
# print('llm: train')
# train_succ, train_jacc, train_missing = llm_pipeline(train_x_m1, train_x_actual_m1,'sample_data/train_x_predicted_m1.npy')
# print('llm: valid')
# valid_succ, valid_jacc, valid_missing  = llm_pipeline(valid_x_m1, valid_x_actual_m1,'sample_data/valid_x_predicted_m1.npy')
print('llm: test')
test_succ, test_purity, test_missing = llm_pipeline(test_x_m1, test_x_actual_m1,'sample_data/test_x_predicted_m1.npy')
# print('llm: All')
# tot_succ, tot_jacc, tot_missing = llm_pipeline(tot_x_m1, tot_x_actual_m1,'sample_data/tot_x_predicted_m1.npy')

llm: test


100%|██████████| 65/65 [00:00<00:00, 8046.21it/s]

Number of errored samples: 0
ACTUAL CLUSTER: [['ABSURDITY' 'FOLLY' 'MADNESS' 'NONSENSE']
 ['ADVANCE' 'MARCH' 'PROGRESS' 'PUSH']
 ['CHEESE' 'FEET' 'GARBAGE' 'SKUNK']
 ['CUT' 'FAKE' 'FREEZE' 'SIX']]  
PREDICTED CLUSTER:  [['FAKE' 'PROGRESS' 'FEET' 'MADNESS']
 ['ABSURDITY' 'SIX' 'CUT' 'SKUNK']
 ['ADVANCE' 'GARBAGE' 'MARCH' 'PUSH']
 ['CHEESE' 'FOLLY' 'NONSENSE' 'FREEZE']] 

ACTUAL CLUSTER: [['BUNK' 'CROCK' 'HOGWASH' 'HORSEFEATHERS']
 ['BATON' 'HAMMER' 'HURDLE' 'POLE']
 ['GOATEE' 'HORNS' 'PITCHFORK' 'TAIL']
 ['BEND' 'BOWLINE' 'HITCH' 'SHEEPSHANK']]  
PREDICTED CLUSTER:  [['POLE' 'HORSEFEATHERS' 'GOATEE' 'HURDLE']
 ['POLE' 'BATON' 'HORNS' 'HITCH']
 ['POLE' 'BOWLINE' 'CROCK' 'HAMMER']
 ['POLE' 'HORSEFEATHERS' 'SHEEPSHANK' 'BEND']] 

ACTUAL CLUSTER: [['BRIGHT' 'CLEVER' 'QUICK' 'SHARP']
 ['ALASKA' 'FRONTIER' 'SOUTHWEST' 'SPIRIT']
 ['COWBOY' 'DRIFTER' 'OUTLAW' 'SHERIFF']
 ['LASSO' 'MARS' 'ROGERS' 'SMART']]  
PREDICTED CLUSTER:  [['SOUTHWEST' 'BRIGHT' 'ALASKA' 'MARS']
 ['SHERIFF' 'DRIFTER' 'QUICK


