Skip to content

docx.opc.exceptions.PackageNotFoundError #758

@abodsakah

Description

@abodsakah

I am trying to read a header from a word document using python-docx and watchdog. What I am doing is, whenever a new file is created or modified the script reads the file and get the contents in the header, but I am getting an
docx.opc.exceptions.PackageNotFoundError: Package not found at 'Test6.docx'
error and I tried everything including opening it as a stream but nothing has worked, and yes the document is populated. For reference, this is my code.
main.py

import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import os
from docx import Document


class Watcher:

    def __init__(self):
        self.observer = Observer()

    def run(self):
        event_handler = Handler()
        self.observer.schedule(event_handler,path= r'../../../', recursive=True)
        self.observer.start()
        try:
            while True:
                time.sleep(5)
        except:
            self.observer.stop()
            print ("Error")

        self.observer.join()

    class Handler(FileSystemEventHandler):

    @staticmethod
    def on_any_event(event):
        if event.is_directory:
            return None

        elif event.event_type == 'created':
            # Take any action here when a file is first created.
            path = event.src_path
            extenstion = '.docx'
            base = os.path.basename(path)

            if extenstion in path:
                print ("Received created event - %s." % event.src_path)
                print(base)
                doc = Document(base)
                print(doc)
                
                doc = Document(base)
                section = doc.sections[0]
                header = section.header
                print(header.paragraphs[0].text)

        elif event.event_type == 'modified':
            # Taken any action here when a file is modified.
            path = event.src_path
            extenstion = '.doc'
            base = os.path.basename(path)
            if extenstion in base:
                print ("Received modified event - %s." % event.src_path)
                print(base)

                doc = Document(base)
                section = doc.sections[0]
                header = section.header
                print(header.paragraphs[0].text)


if __name__ == '__main__':
    w = Watcher()
    w.run()

Tried to change the extension from doc to docx and that worked but is there anyway to open docx because thats what i am finding.

another thing. When opening the ".doc" file and trying to read the header all i am getting is

<docx.document.Document object at 0x03195488> <docx.section._Header object at 0x0319C088>

and what i am trying to do is to extract the text from the header

Full error list:

Exception in thread Thread-1:
Traceback (most recent call last):
  File "C:\Program Files (x86)\Python38-32\lib\threading.py", line 932, in _bootstrap_inner
    self.run()
  File "C:\Program Files (x86)\Python38-32\lib\site-packages\watchdog\observers\api.py", line 199, in run
    self.dispatch_events(self.event_queue, self.timeout)
  File "C:\Program Files (x86)\Python38-32\lib\site-packages\watchdog\observers\api.py", line 368, in dispatch_events
    handler.dispatch(event)
  File "C:\Program Files (x86)\Python38-32\lib\site-packages\watchdog\events.py", line 322, in dispatch
    self.on_any_event(event)
  File "c:/Users/abdsak11/OneDrive - Lärande/Dokument/GitHub/word-automation/main.py", line 65, in on_any_event
    doc = Document(base)
  File "C:\Program Files (x86)\Python38-32\lib\site-packages\docx\api.py", line 25, in Document
    document_part = Package.open(docx).main_document_part
  File "C:\Program Files (x86)\Python38-32\lib\site-packages\docx\opc\package.py", line 128, in open
    pkg_reader = PackageReader.from_file(pkg_file)
  File "C:\Program Files (x86)\Python38-32\lib\site-packages\docx\opc\pkgreader.py", line 32, in from_file
    phys_reader = PhysPkgReader(pkg_file)
  File "C:\Program Files (x86)\Python38-32\lib\site-packages\docx\opc\phys_pkg.py", line 32, in __new__
    raise PackageNotFoundError(
docx.opc.exceptions.PackageNotFoundError: Package not found at 'test 1.doc'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions