# Wordle Solver

In [1]:
# import functions
from wordle_helpers import *

In [2]:
# help(main)

In [11]:
# Main function that returns a list of possible words
green_letters = 'E'
green_positions = [5,]
yellow_letters = 'RAG'
yellow_positions = [2,3,1,]
bad_letters = 'CNUIDS'

# Main function that returns a list of possible words
words = main(green_letters, green_positions, yellow_letters, yellow_positions, bad_letters)
print(words)

['agree', 'barge', 'large']


In [12]:
# Repetitive letters
letters_df = repetitive_letters(wordle_list=words)
print(letters_df.to_string(index=False))

Letters  Count
      E      4
      A      3
      G      3
      R      3
      B      1
      L      1


#### Choosing next word

In [10]:
# Main function that returns a list of possible words
green_letters = ''
green_positions = []
yellow_letters = 'GISDU'
yellow_positions = []
bad_letters = ''

# Main function that returns a list of possible words
words = main(
    green_letters,
    green_positions,
    yellow_letters,
    yellow_positions,
    bad_letters,
    wordle_list_path="wordle-possible-words",
)
print(words)

['guids']


In [6]:
# help(score)

In [7]:
df = load_data(file_name='wordle_ranking.csv')
df

Unnamed: 0,Names,Games_Won
0,Murilo,217
1,Barbara,89
2,Draw,221


In [8]:
# Update score
score_df = score()
score_df

Unnamed: 0,Names,Games_Won
0,Murilo,217
1,Barbara,89
2,Draw,222


In [9]:
# Print results
print(score_df.to_string(index=False))

  Names  Games_Won
 Murilo        217
Barbara         89
   Draw        222


In [10]:
# help(reset_score)

#### Set custom score

In [11]:
# help(set_score)

In [12]:
# # Set custom score
# df = set_score(m_score=196, b_score=79, draw_score=204)
# df

#### Reset score

In [13]:
## Reset score
# reset_score()

Given sentences in a source s and target t languages and a source-to-target tokens alignment, implement a function that can look quickly up which source token(s) are aligned to a target token.

Provide a function that can do the same for a target token, i.e. return which source token(s) are aligned to a given target token. An example use case may be to analyze large corpora of aligned text to build multi-lingual dictionaries, etc. (Note for interviewer: the idea behind that comment is to encourage them to realize that for a single word may occur multiple times throughout the input text and is not string-unique.)

Use your implementation to print the aligned token(s) for "barking" (s2t), "is" (s2t), and "Katzen" (t2s).

Example of inputs

s = "the small dog is barking at all the cats"
t = "alle Katzen bellt der kleine Hund an"
a = "0-3 1-4 2-5 4-2 4-6 5-0 6-0 7-1 8-1"

In [14]:
def get_aligned_tokens(s, t, a, token, direction='s2t'):
    # Splitting sentences into tokens and alignment pairs
    s_tokens = s.split()
    t_tokens = t.split()
    alignment_pairs = a.split()

    # Creating a dictionary to store alignments
    alignments = {}
    for pair in alignment_pairs:
        s_idx, t_idx = map(int, pair.split('-'))
        source_token = s_tokens[s_idx]
        target_token = t_tokens[t_idx]
        if direction == 's2t':
            # If the source token is not in the dictionary, add it with an empty list
            if source_token not in alignments:
                alignments[source_token] = []
            # Append the target token to the list of aligned tokens
            alignments[source_token].append(target_token)
        else:
            # If the target token is not in the dictionary, add it with an empty list
            if target_token not in alignments:
                alignments[target_token] = []
            # Append the source token to the list of aligned tokens
            alignments[target_token].append(source_token)

    # Returning the aligned tokens for the given token
    return alignments.get(token, [])

# Test inputs
s = "the small dog is barking at all the cats"
t = "alle Katzen bellt der kleine Hund an"
a = "0-3 1-4 2-5 4-2 4-6 5-0 6-0 7-1 8-1"

# Using the function to find aligned tokens
aligned_to_barking_s2t = get_aligned_tokens(s, t, a, "barking", "s2t")
aligned_to_is_s2t = get_aligned_tokens(s, t, a, "is", "s2t")
aligned_to_Katzen_t2s = get_aligned_tokens(s, t, a, "Katzen", "t2s")

aligned_to_barking_s2t, aligned_to_is_s2t, aligned_to_Katzen_t2s

(['bellt', 'an'], [], ['the', 'cats'])

In [21]:
import re
import numpy as np
from collections import defaultdict

class SimpleNaiveBayesClassifier:
    def __init__(self):
        self.class_probabilities = {}
        self.word_probabilities = {}
        self.vocabulary = set()

    def preprocess(self, text):
        # Convert text to lowercase and tokenize (split into words)
        return re.findall(r'\b\w+\b', text.lower())

    def train(self, documents, labels):
        # Count the frequency of each word in each class
        word_counts = defaultdict(lambda: defaultdict(int))
        class_counts = defaultdict(int)

        for document, label in zip(documents, labels):
            words = self.preprocess(document)
            class_counts[label] += 1
            for word in words:
                word_counts[label][word] += 1
                self.vocabulary.add(word)

        # Calculate probabilities for each class
        total_documents = len(documents)
        for label in class_counts:
            self.class_probabilities[label] = class_counts[label] / total_documents

        # Calculate conditional probabilities for each word given a class
        for label in word_counts:
            total_words_in_class = sum(word_counts[label].values())
            self.word_probabilities[label] = {
                word: (word_counts[label][word] + 1) / (total_words_in_class + len(self.vocabulary))
                for word in self.vocabulary
            }

    def predict(self, document):
        words = self.preprocess(document)
        class_scores = {label: np.log(prob) for label, prob in self.class_probabilities.items()}

        for word in words:
            if word in self.vocabulary:
                for label in class_scores:
                    class_scores[label] += np.log(self.word_probabilities[label].get(word, 1 / (len(self.vocabulary) + 1)))

        return max(class_scores, key=class_scores.get)

# Example usage with a simple dataset
documents = ["I love this movie", "This movie is very good", "I hate this movie", "This movie is terrible"]
labels = ["positive", "positive", "negative", "negative"]

classifier = SimpleNaiveBayesClassifier()
classifier.train(documents, labels)

# Test the classifier with a new document
test_document = "This movie is good"
predicted_class = classifier.predict(test_document)
predicted_class

'positive'