In [1]:
import fitz  # PyMuPDF
from googletrans import Translator
import time

# Step 1: Extract text from PDF
def extract_text_from_pdf(file_path):
    text = ""
    with fitz.open(file_path) as pdf:
        for page in pdf:
            text += page.get_text()
    return text

# Step 2: Split text into manageable chunks
def split_text(text, max_length=4500):
    chunks = []
    while len(text) > max_length:
        split_index = text.rfind("\n", 0, max_length)
        if split_index == -1:
            split_index = max_length
        chunks.append(text[:split_index])
        text = text[split_index:]
    chunks.append(text)
    return chunks

# Step 3: Translate text safely
def translate_chinese_to_english(text):
    translator = Translator()
    chunks = split_text(text)
    translations = []

    for chunk in chunks:
        try:
            translated = translator.translate(chunk, src='zh-cn', dest='en')
            translations.append(translated.text)
            time.sleep(1)  # Add delay to avoid request blocking
        except Exception as e:
            print("Error translating chunk:", e)
            translations.append("[Translation failed for this part]")
    
    return "\n\n".join(translations)

# Example usage
pdf_text = extract_text_from_pdf("./otapatent.pdf")
translated_text = translate_chinese_to_english(pdf_text)

# Output
print("------ Translated Text ------")
print(translated_text)


------ Translated Text ------
(19) State Intellectual Property Administration of the People's Republic of China
(12) Invention Patent Application
(10) Application announcement number
(43) Application announcement date
(21) Application No. 202111204659.5
(22) Application date 2021.10.15
(71) Applicant Sichuan Qiruike Technology Co., Ltd.
Address 610000 Chengdu City, Sichuan Province China (Sichuan)
From the Pilot Trade Zone, Chengdu Hi-tech Zone
33rd Floor, Building 1, No. 199
(72) Inventor Wang Qianhui Deng Xiaohong Hu Tao Liu Mingming
(74) Patent agency Tiance Trademark Patent in Chengdu, Sichuan Province
Firm (Limited Partnership) 51213
Agent Zhao Yipeng
(51)Int.Cl.
G06F 8/65(2018.01)
G06F 8/71(2018.01)

(54) Invention Name
An OTA upgrade system and method for edge equipment
(57) Abstract
The present invention discloses an OTA upgrade system for edge devices
and methods, including unified virtual device management module and distributed consumption
Information bus module, upgrade ser

In [24]:
import re
from collections import Counter

def basic_summarizer(text, num_sentences=5):
    # Clean and split into sentences
    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
    words = re.findall(r'\w+', text.lower())

    # Basic stopwords
    stopwords = set([
        "the", "is", "in", "and", "to", "a", "of", "that", "on", "for", "with",
        "as", "this", "by", "an", "be", "are", "was", "it", "from", "at", "or"
    ])

    # Score words
    word_freq = Counter(word for word in words if word not in stopwords)

    # Score sentences by sum of important words
    sentence_scores = {}
    for sentence in sentences:
        sentence_words = re.findall(r'\w+', sentence.lower())
        score = sum(word_freq[word] for word in sentence_words if word in word_freq)
        sentence_scores[sentence] = score

    # Select top sentences
    top_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:num_sentences]
    return "\n".join(top_sentences)

# Example usage
summary = basic_summarizer(translated_text, num_sentences=5)
print("------ Summary ------")
print(summary)


------ Summary ------
G06F 8/65(2018.01)
G06F 8/71(2018.01)

(54) Invention Name
An OTA upgrade system and method for edge equipment
(57) Abstract
The present invention discloses an OTA upgrade system for edge devices
and methods, including unified virtual device management module and distributed consumption
Information bus module, upgrade service module; the unified virtual device
The management module is used to transfer product information of each edge device, version
Report information to the distributed message bus module to receive distributed transmission
Messages sent by the information bus module are batch upgraded to edge devices;
The distributed message bus module is mainly used for unified virtual device pipe
The communication between the management module and the upgrade service module is responsible for the message
Receive, filter process and distribute; the upgrade service module is used for
Receive product and version information of edge devices and access regularly
The

In [33]:
!pip install safetensors "tokenizers>=0.21,<0.22" "huggingface-hub>=0.30,<1.0" --user




Collecting safetensors


[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip



  Obtaining dependency information for safetensors from https://files.pythonhosted.org/packages/69/e2/b011c38e5394c4c18fb5500778a55ec43ad6106126e74723ffaee246f56e/safetensors-0.5.3-cp38-abi3-win_amd64.whl.metadata
  Downloading safetensors-0.5.3-cp38-abi3-win_amd64.whl.metadata (3.9 kB)
Collecting tokenizers<0.22,>=0.21
  Obtaining dependency information for tokenizers<0.22,>=0.21 from https://files.pythonhosted.org/packages/13/c3/cc2755ee10be859c4338c962a35b9a663788c0c0b50c0bdd8078fb6870cf/tokenizers-0.21.2-cp39-abi3-win_amd64.whl.metadata
  Downloading tokenizers-0.21.2-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting huggingface-hub<1.0,>=0.30
  Obtaining dependency information for huggingface-hub<1.0,>=0.30 from https://files.pythonhosted.org/packages/44/f4/5f3f22e762ad1965f01122b42dae5bf0e009286e2dba601ce1d0dba72424/huggingface_hub-0.33.2-py3-none-any.whl.metadata
  Downloading huggingface_hub-0.33.2-py3-none-any.whl.metadata (14 kB)
Downloading safetensors-0.5.3-cp38-abi3-wi

In [36]:
pip install "huggingface_hub==0.30.0" --force-reinstall --user


Note: you may need to restart the kernel to use updated packages.Collecting huggingface_hub==0.30.0
  Obtaining dependency information for huggingface_hub==0.30.0 from https://files.pythonhosted.org/packages/05/54/ab179a3cbc6cb285b34dfb1686176db4c3e7d9634b23cabf5aff0d00c776/huggingface_hub-0.30.0-py3-none-any.whl.metadata
  Downloading huggingface_hub-0.30.0-py3-none-any.whl.metadata (13 kB)
Collecting filelock (from huggingface_hub==0.30.0)
  Obtaining dependency information for filelock from https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl.metadata
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec>=2023.5.0 (from huggingface_hub==0.30.0)
  Obtaining dependency information for fsspec>=2023.5.0 from https://files.pythonhosted.org/packages/bb/61/78c7b3851add1481b048b5fdc29067397a1784e2910592bc81bb3f608635/fsspec-2025.5.1-py3-none-any.whl.metadata
  Using cached fs

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
httpx 0.13.3 requires idna==2.*, but you have idna 3.10 which is incompatible.
lightning 2.5.1.post0 requires packaging<25.0,>=20.0, but you have packaging 25.0 which is incompatible.
openvino-dev 2024.6.0 requires networkx<=3.1.0, but you have networkx 3.5 which is incompatible.

[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
!pip install --upgrade "huggingface_hub>=0.30,<1.0"


Collecting huggingface_hub<1.0,>=0.30
  Obtaining dependency information for huggingface_hub<1.0,>=0.30 from https://files.pythonhosted.org/packages/44/f4/5f3f22e762ad1965f01122b42dae5bf0e009286e2dba601ce1d0dba72424/huggingface_hub-0.33.2-py3-none-any.whl.metadata
  Using cached huggingface_hub-0.33.2-py3-none-any.whl.metadata (14 kB)
Using cached huggingface_hub-0.33.2-py3-none-any.whl (515 kB)
Installing collected packages: huggingface_hub
  Attempting uninstall: huggingface_hub
    Found existing installation: huggingface-hub 0.30.0
    Uninstalling huggingface-hub-0.30.0:
      Successfully uninstalled huggingface-hub-0.30.0
Successfully installed huggingface_hub-0.23.1



[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
pip install --upgrade "huggingface_hub>=0.30,<1.0"


Collecting huggingface_hub<1.0,>=0.30
  Obtaining dependency information for huggingface_hub<1.0,>=0.30 from https://files.pythonhosted.org/packages/44/f4/5f3f22e762ad1965f01122b42dae5bf0e009286e2dba601ce1d0dba72424/huggingface_hub-0.33.2-py3-none-any.whl.metadata
  Using cached huggingface_hub-0.33.2-py3-none-any.whl.metadata (14 kB)
Using cached huggingface_hub-0.33.2-py3-none-any.whl (515 kB)
Installing collected packages: huggingface_hub
  Attempting uninstall: huggingface_hub
    Found existing installation: huggingface-hub 0.23.1
    Uninstalling huggingface-hub-0.23.1:
      Successfully uninstalled huggingface-hub-0.23.1
Successfully installed huggingface_hub-0.33.2
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
pip install tf-keras


Collecting tf-kerasNote: you may need to restart the kernel to use updated packages.

  Obtaining dependency information for tf-keras from https://files.pythonhosted.org/packages/45/6b/d245122d108a94df5969ee7408ad343af1627730e91478e01ef098976bfa/tf_keras-2.19.0-py3-none-any.whl.metadata
  Downloading tf_keras-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Downloading tf_keras-2.19.0-py3-none-any.whl (1.7 MB)
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
   ---- ----------------------------------- 0.2/1.7 MB 6.3 MB/s eta 0:00:01
   ----------- ---------------------------- 0.5/1.7 MB 6.0 MB/s eta 0:00:01
   -------------------- ------------------- 0.9/1.7 MB 7.0 MB/s eta 0:00:01
   ------------------------------- -------- 1.3/1.7 MB 7.7 MB/s eta 0:00:01
   ---------------------------------------  1.7/1.7 MB 8.4 MB/s eta 0:00:01
   ---------------------------------------- 1.7/1.7 MB 7.3 MB/s eta 0:00:00
Installing collected packages: tf-keras
Successfully installed tf-ke


[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
from transformers import pipeline

summarizer = pipeline(
    "summarization",
    model="sshleifer/distilbart-cnn-12-6",
    device=-1  # CPU
)

text = """
Speaker recognition systems often require long utterances, but in edge devices like Raspberry Pi,
we work with short voice clips. To improve speaker verification performance on such short segments,
we apply signal enhancement and dynamic thresholding techniques optimized for small-footprint inference.
"""

summary = summarizer(text, max_length=80, min_length=30, do_sample=False)
print("Summary:", summary[0]['summary_text'])


  from .autonotebook import tqdm as notebook_tqdm





Device set to use cpu
Your max_length is set to 80, but your input_length is only 59. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=29)


Summary:  Raspberry Pi uses signal enhancement and dynamic thresholding techniques optimized for small-footprint inference . We apply signal enhancement to improve speaker verification performance on such short segments .


In [4]:
from transformers import pipeline

# Load summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Limit input to 1024 tokens, so chunk if needed
def summarize_with_transformers(text):
    max_len = 1024
    chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
    summary = []
    for chunk in chunks:
        result = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
        summary.append(result[0]['summary_text'])
    return "\n\n".join(summary)

# Usage
summary = summarize_with_transformers(translated_text)
print("------ Summary ------")
print(summary)

Device set to use cpu
Your max_length is set to 130, but your input_length is only 66. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=33)


------ Summary ------
An OTA upgrade system and method for edge equipment. Inventor Wang Qianhui Deng Xiaohong Hu Tao Liu Mingming. Patent agency Tiance Trademark Patent in Chengdu.

The management module is used to transfer product information of each edge device, version information to the distributed message bus module to receive distributed transmission. Messages sent by the information bus module are batch upgraded to edge devices. The distribution module is mainly used for unified virtual device pipe.

The unified virtual device management module is used to report product information and version information of each edge device. The distributed message bus module is mainly used for communication between the unified virtualDevice management module and the upgrade service module.

The backup management module includes multiple types and multiple devices, including but not limited to Bluetooth devices, ZigBee devices, spherical lights, and gateways. The OTA upgrade system of an edge 