In [1]:
import subprocess
import os
import sys
import shutil
import argparse
import xlrd
from openpyxl import Workbook
import markdown2
from weasyprint import HTML
import stat
import time
import pandas as pd
import soundfile as sf

In [2]:
def transform_file_path(file_path):

    components = file_path.split(os.sep)
    
    preservation_index = components.index('preservation')
    
    components[preservation_index] = r"access\nearline"
    access = r"access\nearline"
    new_file_path = os.sep.join(components[:-1])  # Remove the last directory
    #new_file_path = os.path.join(new_file_path, access)
    print(new_file_path)
    return new_file_path

def conversion_log_modify(name, input, output, format):
    global conversion_log
    file_stat = os.stat(input)
    print(file_stat)
    print(name)
    creation_time = file_stat.st_mtime
    # Convert to a human-readable format
    readable_time = time.ctime(creation_time)
    print(f"The file was created on: {readable_time}")
    print(f"Original Format: {format}")
    current_time_seconds = time.time()
    local_time = time.strftime("%a %b %d %H:%M:%S %Y", time.localtime(current_time_seconds))
    conversion_log = pd.concat([conversion_log, pd.DataFrame([{
                        'File Name': name,
                        'Original Path': input,
                        'New Path': output,
                        'File Conversion': format,
                        'Original File Created': readable_time,
                        'New File Created': local_time,
                    }])], ignore_index=True)

In [3]:
def convert_files(folder_path):

    if not os.path.exists(folder_path):
        print(f"Folder {folder_path} does not exist.")
        return
    global conversion_log
    contents = os.listdir(folder_path)
    
    for content in contents:
        
        content_path = os.path.join(folder_path, content)
        
        if os.path.isdir(content_path):
            print(f"Entering subfolder: {content_path}")
            create = content_path.replace("preservation", "access/nearline")
            os.makedirs(create,mode=0o775, exist_ok=True)
            convert_files(content_path)
        else:
            print("Found file:", content_path)
            if(content.endswith('.wpd') or content.endswith('.WPD')):
                try: 
                        
                    input_file = content_path
                    name = content.replace('.wpd', '')
                    name = name.replace('.WPD', '')
                    output_file_name = f'{name}.pdf'
                    folder_path = os.path.dirname(content_path)
                    output_file = os.path.join(folder_path, output_file_name)
                    output_file = transform_file_path(output_file)
                    output_file_pdf = os.path.join(output_file, output_file_name)
                    print(output_file)
                    convert_wpd_to_pdf(input_file, output_file_pdf)
                    conversion_log_modify(content, input_file, output_file_pdf, 'WordPerfect to PDF')
            
                except Exception as E:
                    print(E)
                    print("Could not convert!")
            
                

In [4]:
def convert_wpd_to_pdf(input_path, output_path):
    try:
        # Ensure the input file has a .wpd extension
        # if not input_path.lower().endswith(".wpd"):
        #     raise ValueError("Input file must have a .wpd extension.")

        # Ensure the output file has a .pdf extension
        if not output_path.lower().endswith(".pdf"):
            raise ValueError("Output file must have a .pdf extension.")

        # Check if unoconv is installed
        subprocess.run(["python", r"C:\Users\pal10\Desktop\tkapp\unoconv", "-f", "pdf", "-o", output_path, input_path], check=True)

        # unoconv_path = r"C:\Users\pal10\AppData\Roaming\Python\Python312\Scripts\unoconv"  # You may need to provide the full path to unoconv
        # subprocess.run([unoconv_path, "--version"], check=True)

        # Convert .wpd to .pdf using unoconv
        # subprocess.run([unoconv_path, "-f", "pdf", "-o", output_path, input_path], check=True)

        print(f"Conversion successful: {input_path} -> {output_path}")
        

    except Exception as e:
        print(f"Conversion failed: {e}")

In [5]:
path1 = r"C:\Users\pal10\Desktop\testlotus\preservation"

In [6]:
conversion_log = pd.DataFrame(columns=['File Name', 'Original Path', 'New Path', 'File Conversion', 'Original File Created', 'New File Created'])
path2 = path1.replace('preservation','ObjectDocumentation')
os.makedirs(path2,mode=0o775, exist_ok=True)

In [7]:
convert_files(path1)

Entering subfolder: C:\Users\pal10\Desktop\testlotus\preservation\30112119503834
Entering subfolder: C:\Users\pal10\Desktop\testlotus\preservation\30112119503834\ObjectDocumentation
Found file: C:\Users\pal10\Desktop\testlotus\preservation\30112119503834\ObjectDocumentation\diskTimestampInfo.txt.txt
Entering subfolder: C:\Users\pal10\Desktop\testlotus\preservation\30112119503834\ObjectDocumentation\ObjectJPG
Found file: C:\Users\pal10\Desktop\testlotus\preservation\30112119503834\ObjectDocumentation\ObjectJPG\30112119503834.JPG
Entering subfolder: C:\Users\pal10\Desktop\testlotus\preservation\30112119503834\OriginalFiles
Entering subfolder: C:\Users\pal10\Desktop\testlotus\preservation\30112119503834\OriginalFiles\CBAK
Entering subfolder: C:\Users\pal10\Desktop\testlotus\preservation\30112119503834\OriginalFiles\CBAK\BATFILES
Found file: C:\Users\pal10\Desktop\testlotus\preservation\30112119503834\OriginalFiles\CBAK\BATFILES\AUTOEXEC.BAT
Found file: C:\Users\pal10\Desktop\testlotus\pre

In [8]:
print(conversion_log)

      File Name                                      Original Path  \
0   ALB_SPE.wpd  C:\Users\pal10\Desktop\testlotus\preservation\...   
1  AWARD_PR.wpd  C:\Users\pal10\Desktop\testlotus\preservation\...   
2    MRL_PR.wpd  C:\Users\pal10\Desktop\testlotus\preservation\...   
3     SPOON.wpd  C:\Users\pal10\Desktop\testlotus\preservation\...   
4    TOM_PR.wpd  C:\Users\pal10\Desktop\testlotus\preservation\...   

                                            New Path     File Conversion  \
0  C:\Users\pal10\Desktop\testlotus\access\nearli...  WordPerfect to PDF   
1  C:\Users\pal10\Desktop\testlotus\access\nearli...  WordPerfect to PDF   
2  C:\Users\pal10\Desktop\testlotus\access\nearli...  WordPerfect to PDF   
3  C:\Users\pal10\Desktop\testlotus\access\nearli...  WordPerfect to PDF   
4  C:\Users\pal10\Desktop\testlotus\access\nearli...  WordPerfect to PDF   

      Original File Created          New File Created  
0  Thu Jul  7 09:50:14 1994  Wed May 15 09:55:35 2024  
1  Thu Jul