In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.layers import Embedding

# Sample legal text
legal_text = """In the heart of the bustling city, amidst the cacophony of car horns and chatter of pedestrians, lies a hidden sanctuary—a quaint little café with a charm all its own. The aroma of freshly brewed coffee mingles with the sweet scent of pastries, welcoming patrons with open arms. Inside, the atmosphere is cozy, with soft lighting and comfortable seating inviting visitors to linger awhile. The walls are adorned with local artwork, adding a touch of character to the space. As patrons sip their drinks and nibble on delicious treats, they engage in lively conversations or simply lose themselves in the pages of a book. Time seems to stand still within these walls, offering respite from the chaos outside. The café becomes a refuge—a place where friends gather to catch up, where lovers steal moments of intimacy, where artists find inspiration, and where weary souls find solace. Here, in this little corner of the world, life's worries fade away, replaced by a sense of warmth and contentment that only a truly special place can provide."""

# Preprocess the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([legal_text])
sequence = tokenizer.texts_to_sequences([legal_text])
padded_sequence = pad_sequences(sequence, maxlen=100, padding='post')

# Build the LSTM model
latent_dim = 256

encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=latent_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=latent_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(len(tokenizer.word_index)+1, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Train the model (you need a dataset with input-output pairs)
# model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=64, epochs=50, validation_split=0.2)

# Inference: Generate summary
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(decoder_embedding, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]

decoder_outputs = decoder_dense(decoder_outputs)

decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

# Function to decode sequence with word count limit and return word count
def decode_sequence_with_word_count(input_seq, max_words=100):
    states_value = encoder_model.predict(input_seq)
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = 0  # Start token index
    stop_condition = False
    decoded_sentence = ''
    word_count = 0  # Initialize word count
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = tokenizer.index_word.get(sampled_token_index, '')
        if sampled_word != '':
            decoded_sentence += sampled_word + ' '
            word_count += 1  # Increment word count
        if sampled_word == '<end>' or word_count >= max_words:
            stop_condition = True
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index
        states_value = [h, c]
    return decoded_sentence.strip(), word_count  # Return decoded sentence and word count

# Example usage
input_seq = padded_sequence
decoded_summary, word_count = decode_sequence_with_word_count(input_seq, max_words=100)
print('Generated summary:', decoded_summary)
print('Word count:', word_count)




Generated summary: seems welcoming seems place place place place their their lies lies lies lies lies from lies lies lies lies heart from lies lies lies intimacy heart intimacy intimacy scent own own own own comfortable comfortable comfortable comfortable adorned adorned freshly truly adorned still lose life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's life's
Word count: 100


In [None]:
input= """In the heart of the bustling city, amidst the cacophony of car horns and chatter of pedestrians, lies a hidden sanctuary—a quaint little café with a charm all its own. The aroma of freshly brewed coffee mingles with the sweet scent of pastries, welcoming patrons with open arms. Inside, the atmosphere is cozy, with soft lighting and comfortable seating inviting visitors to linger awhile. The walls are adorned with local artwork, adding a touch of character to the space. As patrons sip their drinks and nibble on delicious treats, they engage in lively conversations or simply lose themselves in the pages of a book. Time seems to stand still within these walls, offering respite from the chaos outside. The café becomes a refuge—a place where friends gather to catch up, where lovers steal moments of intimacy, where artists find inspiration, and where weary souls find solace. Here, in this little corner of the world, life's worries fade away, replaced by a sense of warmth and contentment that only a truly special place can provide."""egal_text = """In the heart of the bustling city, amidst the cacophony of car horns and chatter of pedestrians, lies a hidden sanctuary—a quaint little café with a charm all its own. The aroma of freshly brewed coffee mingles with the sweet scent of pastries, welcoming patrons with open arms. Inside, the atmosphere is cozy, with soft lighting and comfortable seating inviting visitors to linger awhile. The walls are adorned with local artwork, adding a touch of character to the space. As patrons sip their drinks and nibble on delicious treats, they engage in lively conversations or simply lose themselves in the pages of a book. Time seems to stand still within these walls, offering respite from the chaos outside. The café becomes a refuge—a place where friends gather to catch up, where lovers steal moments of intimacy, where artists find inspiration, and where weary souls find solace. Here, in this little corner of the world, life's worries fade away, replaced by a sense of warmth and contentment that only a truly special place can provide."""egal_text = """In the heart of the bustling city, amidst the cacophony of car horns and chatter of pedestrians, lies a hidden sanctuary—a quaint little café with a charm all its own. The aroma of freshly brewed coffee mingles with the sweet scent of pastries, welcoming patrons with open arms. Inside, the atmosphere is cozy, with soft lighting and comfortable seating inviting visitors to linger awhile. The walls are adorned with local artwork, adding a touch of character to the space. As patrons sip their drinks and nibble on delicious treats, they engage in lively conversations or simply lose themselves in the pages of a book. Time seems to stand still within these walls, offering respite from the chaos outside. The café becomes a refuge—a place where friends gather to catch up, where lovers steal moments of intimacy, where artists find inspiration, and where weary souls find solace. Here, in this little corner of the world, life's worries fade away, replaced by a sense of warmth and contentment that only a truly special place can provide."""

In [1]:

from transformers import AutoTokenizer, BartForConditionalGeneration

model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")


inputs = tokenizer([input], max_length=1024, return_tensors="pt",truncation=True)

# Generate Summary
summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=500, max_length=1024)
tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

ValueError: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).