In [1]:
import shutil
import os

In [2]:
def test_model(tokenizer, model, input_text, max_length=128):
    """
    Generates a prediction from the trained model.
    :param tokenizer: Loaded tokenizer.
    :param model: Loaded model.
    :param input_text: Input text to summarize or query.
    :param max_length: Maximum length of the generated output.
    :return: Generated summary or answer.
    """
    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(inputs, max_length=max_length, num_beams=4, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [3]:
def unarchive_model(archive_path, extract_dir):
    """
    Extracts a model archive into a directory.
    :param archive_path: Path to the .tar.gz archive.
    :param extract_dir: Directory to extract the model files.
    """
    shutil.unpack_archive(archive_path, extract_dir)
    print(f"Model unarchived to {extract_dir}")

# Example usage
unarchive_model("final_model_archive.tar.gz", "extracted_model")

# Reload the model
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

def load_model_from_extracted(extracted_dir):
    tokenizer = AutoTokenizer.from_pretrained(extracted_dir)
    model = AutoModelForSeq2SeqLM.from_pretrained(extracted_dir)
    return tokenizer, model

# Example usage
tokenizer, model = load_model_from_extracted("extracted_model")

# Test the model
input_text = """Log  processing  has  become  a  critical  component  of  the  data \
pipeline for consumer internet companies. We introduce Kafka, a \
distributed messaging system that we developed for collecting and \
delivering high volumes of log data with low latency. Our system \
incorporates  ideas  from  existing  log  aggregators  and  messaging \ 
systems,  and  is  suitable  for  both  offline  and  online  message \
consumption.  We  made  quite  a  few  unconventional  yet  practical \
design choices in Kafka to make our system efficient and scalable. \
Our experimental results show that Kafka has superior \
performance  when  compared  to  two  popular  messaging  systems. \ 
We  have  been  using  Kafka  in  production  for  some  time  and  it  is \ 
processing hundreds of gigabytes of new data each day"""
output = test_model(tokenizer, model, input_text)
print(f"Model Output: {output}")

  input_text = """Log  processing  has  become  a  critical  component  of  the  data \


Model unarchived to extracted_model


  from .autonotebook import tqdm as notebook_tqdm


Model Output: .  We have been using Kafka in production for some time and it is  processing hundreds of gigabytes of new data each day.
