In [1]:
import socket
from fpdf import FPDF
from docx import Document
import xml.etree.ElementTree as ET

In [2]:
HOST = "127.0.0.1"
PORT = 34512
BUFFER_SIZE = 65536

def create_xml_from_table(table_data, xml_filename):
    root = ET.Element("annotation")
    
    for row in table_data:
        object_elem = ET.Element("object")
        root.append(object_elem)
        
        name_elem = ET.SubElement(object_elem, "name")
        name_elem.text = row[0]  # Numele obiectului din prima coloana
        
        size_elem = ET.SubElement(object_elem, "size")
        width_elem = ET.SubElement(size_elem, "width")
        width_elem.text = row[1]  # Latimea din a doua coloana
        height_elem = ET.SubElement(size_elem, "height")
        height_elem.text = row[2]  # Inaltimea din a treia coloana
    
    tree = ET.ElementTree(root)
    tree.write(xml_filename)

def extract_table_data(docx_filename):
    doc = Document(docx_filename)
    
    # Lista pentru a stoca datele din tabel
    table_data = []
    
    # Parcurge tabelul si extrage datele
    table = doc.tables[0]  # Presupunand ca avem un singur tabel in document
    
    for row in table.rows:
        row_data = []
        for cell in row.cells:
            row_data.append(cell.text)
        table_data.append(row_data)
    
    return table_data

def extract_images_from_docx(docx_filename):
    doc = Document(docx_filename)
    
    image_names = []
    
    for rel in doc.part.rels.values():
        if "image" in rel.reltype:
            image_data = rel.target_part.blob
            image_name = f"image{len(image_names) + 1}.png"
            with open(image_name, "wb") as img_file:
                img_file.write(image_data)
            image_names.append(image_name)
    
    return image_names

def read_docx_content(docx_filename):
    doc = Document(docx_filename)
    
    # Initializeaza un sir pentru stocarea continutului din docx
    content = ""
    
    for paragraph in doc.paragraphs:
        # Ignora textul din tabel
        if paragraph.text.strip():
            content += paragraph.text + "\n"
    
    return content

def create_pdf_from_docx(docx_filename, image_filenames, pdf_filename):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Helvetica", size=12)
    
    # Citirea continutului docx
    content = read_docx_content(docx_filename)

    # Adaugarea continutului in fisierul PDF
    pdf.multi_cell(0, 10, content)
    
    for image in image_filenames:
        pdf.image(image, x=10, y=pdf.get_y()+10, w=0, h=80)
    
    # Salveaza fisierul PDF
    pdf.output(pdf_filename)

In [3]:
if __name__ == "__main__":
    docx_filename = "server_docx.docx"
    pdf_filename = "output.pdf"
    xml_filename = "output.xml"
    
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as tcp_sock:
        tcp_sock.bind((HOST, PORT))
        tcp_sock.listen()
        conn, addr = tcp_sock.accept()
        
        with conn:
            print("Info client conectat (IP Client: ", addr[0], ", Port client: ", addr[1], ")")
            data = conn.recv(BUFFER_SIZE)
            
            if data:
                print("Fisier citit cu succes!")
                with open(docx_filename, "wb") as file:
                    file.write(data)

                image_names = extract_images_from_docx(docx_filename)
                # Creaza si scrie in fisierul PDF
                create_pdf_from_docx(docx_filename, image_names, pdf_filename)
                
                print("Fisierul PDF a fost creat cu succes.")

                # Extrage datele din tabelul din fisierul DOCX
                table_data = extract_table_data(docx_filename)

                # Creeaza fisierul XML utilizand datele extrase
                create_xml_from_table(table_data, xml_filename)
                print("Fisierul XML a fost creat cu succes.")
    tcp_sock.close()

Info client conectat (IP Client:  127.0.0.1 , Port client:  5312 )
Fisier citit cu succes!
Fisierul PDF a fost creat cu succes.
Fisierul XML a fost creat cu succes.
