In [1]:
import nltk
from nltk.corpus import reuters
from nltk.tokenize import word_tokenize
from nltk.util import ngrams
from collections import defaultdict
import pickle

In [2]:
# Download NLTK data
nltk.download('reuters')
nltk.download('punkt')

[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [3]:

# Function to train the n-gram language model
def train_ngram_model(corpus, n):
    model = defaultdict(lambda: defaultdict(int))
    for sentence in corpus:
        tokens = ['<s>'] * (n - 1) + word_tokenize(sentence.lower()) + ['</s>']
        for i in range(len(tokens) - n + 1):
            context = tuple(tokens[i:i + n - 1])
            next_word = tokens[i + n - 1]
            model[context][next_word] += 1
    return model

In [4]:
# Function to predict the next words given a context
def predict_next_words(model, context):
    if context in model:
        return list(model[context].keys())
    else:
        return []


In [5]:
# Function to autocomplete a sentence
def autocomplete_sentence(model, n, sentence, max_words=5):
    tokens = word_tokenize(sentence.lower())
    context = tokens[-(n - 1):]
    for _ in range(max_words):
        next_words = predict_next_words(model, tuple(context))
        if not next_words or '</s>' in next_words:
            break
        next_word = next_words[0]  # Select the first word as the prediction
        sentence += ' ' + next_word
        context = context[1:] + [next_word]
    return sentence


In [6]:
# Load Reuters corpus for training
reuters_corpus = reuters.sents()

In [7]:
# Train a trigram model
n = 5
reuters_sentences = [' '.join(sentence) for sentence in reuters_corpus]
model = train_ngram_model(reuters_sentences, n)


In [8]:
# Test the autocomplete feature
input_sentence = "The picture is very"
# completed_sentence = autocomplete_sentence(model, n, input_sentence)
# print("Autocompleted Sentence:", completed_sentence)



In [10]:

# Get suggestions for the next word
context = input_sentence.split()[-(n - 1):]
suggestions = predict_next_words(model, tuple(context))
#print("Suggested words:", suggestions)

In [None]:
!pip install dill

In [36]:
import dill as pickle

# File path to save the pickled model
model_file_path = "ngram_model.pkl"

# Pickle the model
with open(model_file_path, 'wb') as f:
    pickle.dump(model, f)

print("Model pickled successfully.")

Model pickled successfully.


In [11]:

# Provide option to select a word from suggestions and autocomplete the sentence
if suggestions:
    print("Select a word from suggestions:")
    for i, word in enumerate(suggestions):
        print(f"{i+1}. {word}")

    # Ensure the user's input is valid
    while True:
        choice = input("Enter the number corresponding to your choice: ")
        if choice.isdigit() and 1 <= int(choice) <= len(suggestions):
            choice = int(choice)
            break
        else:
            print("Invalid input. Please enter a number corresponding to your choice.")

    selected_word = suggestions[choice - 1]
    completed_sentence =input_sentence+ " " + selected_word  # Append the selected word to the completed sentence

    # Autocomplete the sentence based on the updated completed sentence
    answer_sentence = autocomplete_sentence(model, n, completed_sentence)
    print("Autocompleted Sentence with selected word:", answer_sentence)
else:
    print("No suggestions available.")



Select a word from suggestions:
1. important
2. helpful
3. management
4. healthy
5. difficult
6. much
7. fair
8. low
9. bad
10. vulnerable
11. light
12. strong
13. close
14. interesting
15. optimistic
16. sensitive
17. unstable
18. far
19. pleased
20. strained
Enter the number corresponding to your choice: 14
Autocompleted Sentence with selected word: The picture is very interesting new approach of vice president


In [21]:
# Load the pickled model
model_file_path = "ngram_model.pkl"
with open(model_file_path, 'rb') as f:
    model = pickle.load(f)

In [22]:
# Test the unpickled model
input_sentence = "The picture is very fair"
completed_sentence = autocomplete_sentence(model, n, input_sentence)
print("Autocompleted Sentence:", completed_sentence)

# Get suggestions for the next word
context = input_sentence.split()[-(n - 1):]
suggestions = predict_next_words(model, tuple(context))
print("Suggested words:", suggestions)

# Provide option to select a word from suggestions and autocomplete the sentence
if suggestions:
    print("Select a word from suggestions:")
    for i, word in enumerate(suggestions):
        print(f"{i+1}. {word}")

    # Ensure the user's input is valid
    while True:
        choice = input("Enter the number corresponding to your choice: ")
        if choice.isdigit() and 1 <= int(choice) <= len(suggestions):
            choice = int(choice)
            break
        else:
            print("Invalid input. Please enter a number corresponding to your choice.")

    selected_word = suggestions[choice - 1]
    completed_sentence = input_sentence + " " + selected_word  # Append the selected word to the completed sentence

    # Autocomplete the sentence based on the updated completed sentence
    answer_sentence = autocomplete_sentence(model, n, completed_sentence)
    print("Autocompleted Sentence with selected word:", answer_sentence)
else:
    print("No suggestions available.")

Autocompleted Sentence: The picture is very fair and open its farm products
Suggested words: ['and']
Select a word from suggestions:
1. and
Enter the number corresponding to your choice: hello my
Invalid input. Please enter a number corresponding to your choice.
Enter the number corresponding to your choice: 1
Autocompleted Sentence with selected word: The picture is very fair and open its farm products market


[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


PicklingError: Can't pickle <function <lambda> at 0x7dbe58aac430>: attribute lookup <lambda> on __main__ failed