In [1]:
import sys
sys.executable

'/usr/bin/python3'

In [2]:
import time

In [3]:
module_names = [name for name in sys.modules.keys() if 'fasttext' in name.lower()]
print(module_names)

[]


In [4]:
import fastText

In [5]:
mock_data = [
    {
        "input_text": "Rising tide lifts all boats.",
        "small_model_accuracy": 91,
        "large_model_accuracy": 92
    },
    {
        "input_text": "United there is little we cannot do in a host of cooperative ventures. Divided there is little we can do—for we dare not meet a powerful challenge at odds and split asunder.",
        "small_model_accuracy": 76,
        "large_model_accuracy": 89
    },
    {
        "input_text": "The quick brown fox jumps over the lazy dog.",
        "small_model_accuracy": 95,
        "large_model_accuracy": 96
    },
    {
        "input_text": "To be or not to be, that is the question.",
        "small_model_accuracy": 88,
        "large_model_accuracy": 90
    },
    {
        "input_text": "In the beginning, God created the heavens and the earth.",
        "small_model_accuracy": 89,
        "large_model_accuracy": 91
    },
    {
        "input_text": "This above all: to thine own self be true, and it must follow, as the night the day, thou canst not then be false to any man.",
        "small_model_accuracy": 72,
        "large_model_accuracy": 85
    },
    {
        "input_text": "All animals are equal, but some animals are more equal than others.",
        "small_model_accuracy": 86,
        "large_model_accuracy": 88
    },
    {
        "input_text": "The only thing we have to fear is fear itself.",
        "small_model_accuracy": 90,
        "large_model_accuracy": 92
    },
    {
        "input_text": "I think, therefore I am.",
        "small_model_accuracy": 92,
        "large_model_accuracy": 93
    },
    {
        "input_text": "It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness...",
        "small_model_accuracy": 78,
        "large_model_accuracy": 88
    },
    {
        "input_text": "Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal.",
        "small_model_accuracy": 82,
        "large_model_accuracy": 90
    },
    {
        "input_text": "Ask not what your country can do for you—ask what you can do for your country.",
        "small_model_accuracy": 87,
        "large_model_accuracy": 89
    },
    {
        "input_text": "That's one small step for man, one giant leap for mankind.",
        "small_model_accuracy": 93,
        "large_model_accuracy": 94
    },
    {
        "input_text": "I have a dream that one day this nation will rise up and live out the true meaning of its creed: 'We hold these truths to be self-evident, that all men are created equal.'",
        "small_model_accuracy": 80,
        "large_model_accuracy": 89
    },
    {
        "input_text": "Give me liberty, or give me death!",
        "small_model_accuracy": 88,
        "large_model_accuracy": 90
    }
]

In [6]:
with open("fasttext_train.txt", "w") as f:
    for entry in mock_data:
        # for fasttext to recognize labels we have to alter the labels to have __label__
        # however, only one column is accepted as the labels, so despite small_model_accuracy also having __label__, large_model_accuracy is the only label column
        f.write(f"__label__{entry['small_model_accuracy']} {entry['input_text']}\n")
        f.write(f"__label__{entry['large_model_accuracy']} {entry['input_text']}\n")

In [7]:
with open("fasttext_train.txt", "r") as f:
  for entry in mock_data:
    print(entry)

{'input_text': 'Rising tide lifts all boats.', 'small_model_accuracy': 91, 'large_model_accuracy': 92}
{'input_text': 'United there is little we cannot do in a host of cooperative ventures. Divided there is little we can do—for we dare not meet a powerful challenge at odds and split asunder.', 'small_model_accuracy': 76, 'large_model_accuracy': 89}
{'input_text': 'The quick brown fox jumps over the lazy dog.', 'small_model_accuracy': 95, 'large_model_accuracy': 96}
{'input_text': 'To be or not to be, that is the question.', 'small_model_accuracy': 88, 'large_model_accuracy': 90}
{'input_text': 'In the beginning, God created the heavens and the earth.', 'small_model_accuracy': 89, 'large_model_accuracy': 91}
{'input_text': 'This above all: to thine own self be true, and it must follow, as the night the day, thou canst not then be false to any man.', 'small_model_accuracy': 72, 'large_model_accuracy': 85}
{'input_text': 'All animals are equal, but some animals are more equal than others.

In [8]:
import fastText

model = fastText.train_supervised(input="fasttext_train.txt", epoch=25, lr=1.0, wordNgrams=2)

model.save_model("fasttext_accuracy_predictor.bin")

Read 0M words
Number of words:  159
Number of labels: 17
start training...
Progress: 100.0% words/sec/thread:  827925 lr:  0.000000 loss:  2.420599 ETA:   0h 0m


In [9]:
text = "An apple a day, keeps the doctor away."
predicted_label = model.predict(text)[0][0]
predicted_accuracy = int(predicted_label.replace('__label__', ''))

print(f"Predicted Accuracy: {predicted_accuracy}%")

Predicted Accuracy: 91%


In [10]:
single_sample = [
    {
        "input_text": "Knowledge is power, and with great power comes great responsibility.",
        "small_model_accuracy": 85,
        "large_model_accuracy": 90
    }
]

In [18]:
def stepSGD(sample):

  with open("fasttext_train.txt", "w") as f:
    for entry in single_sample:
        f.write(f"__label__{entry['small_model_accuracy']} {entry['input_text']}\n")
        f.write(f"__label__{entry['large_model_accuracy']} {entry['input_text']}\n")
      
  model = fastText.train_supervised(input="fasttext_train.txt", epoch=1, lr=1.0, wordNgrams=2, inputModel="fasttext_accuracy_predictor.bin")
  #model.save_model("fasttext_accuracy_predictor.bin")

In [19]:
start_time = time.time()

stepSGD(single_sample)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"stepSGD execution time: {elapsed_time:.4f} seconds")

Read 0M words
Number of words:  10
Number of labels: 2


stepSGD execution time: 2.2166 seconds


start training...
Progress: 100.0% words/sec/thread:   32454 lr:  0.000000 loss:  0.625789 ETA:   0h 0m0m


In [12]:
def predict(text):
  predicted_label = model.predict(text)[0][0]
  predicted_accuracy = int(predicted_label.replace('__label__', ''))

  print(f"Predicted Accuracy: {predicted_accuracy}%")

In [13]:
predict(text)

Predicted Accuracy: 90%
