In [1]:
import subprocess
import os
import sys
import shutil
import argparse
import xlrd
from openpyxl import Workbook
import markdown2
from weasyprint import HTML
import stat
import time
import pandas as pd
import soundfile as sf

In [2]:
def transform_file_path(file_path):

    components = file_path.split(os.sep)
    
    preservation_index = components.index('preservation')
    
    components[preservation_index] = r"access\nearline"
    access = r"access\nearline"
    new_file_path = os.sep.join(components[:-1])  # Remove the last directory
    #new_file_path = os.path.join(new_file_path, access)
    print(new_file_path)
    return new_file_path

def conversion_log_modify(name, input, output, format):
    global conversion_log
    file_stat = os.stat(input)
    print(file_stat)
    print(name)
    creation_time = file_stat.st_mtime
    # Convert to a human-readable format
    readable_time = time.ctime(creation_time)
    print(f"The file was created on: {readable_time}")
    print(f"Original Format: {format}")
    current_time_seconds = time.time()
    local_time = time.strftime("%a %b %d %H:%M:%S %Y", time.localtime(current_time_seconds))
    conversion_log = pd.concat([conversion_log, pd.DataFrame([{
                        'File Name': name,
                        'Original Path': input,
                        'New Path': output,
                        'File Conversion': format,
                        'Original File Created': readable_time,
                        'New File Created': local_time,
                    }])], ignore_index=True)

In [3]:
def convert_files(folder_path):

    if not os.path.exists(folder_path):
        print(f"Folder {folder_path} does not exist.")
        return
    global conversion_log
    contents = os.listdir(folder_path)
    
    for content in contents:
        
        content_path = os.path.join(folder_path, content)
        
        if os.path.isdir(content_path):
            print(f"Entering subfolder: {content_path}")
            create = content_path.replace("preservation", "access/nearline")
            os.makedirs(create,mode=0o775, exist_ok=True)
            convert_files(content_path)
        else:
            print("Found file:", content_path)
            if(content.endswith('.wav') or content.endswith('.WAV')):
                try: 
                        
                    input_file = content_path
                    name = content.replace('.wav', '')
                    name = name.replace('.WAV', '')
                    output_file_name = f'{name}.mp3'
                    folder_path = os.path.dirname(content_path)
                    output_file = os.path.join(folder_path, output_file_name)
                    output_file = transform_file_path(output_file)
                    output_file_mp3 = os.path.join(output_file, output_file_name)
                    #print(output_file)
                    convert_wav_to_mp3(input_file, output_file_mp3)
                    conversion_log_modify(content, input_file, output_file_mp3, 'WAV to MP3')
            
                except Exception as E:
                    print(E)
                    print("Could not convert!")
            
                

In [4]:
def convert_wav_to_mp3(input_file, output_file, target_sr=44100):
    data, samplerate = sf.read(input_file)
    # Resample the audio data if the sample rate is not supported
    if samplerate != target_sr:
        num = target_sr
        den = samplerate
        data = resample_poly(data, num, den)
        samplerate = target_sr
    sf.write(output_file, data, samplerate, format='mp3')

In [5]:
path1 = r"C:\Users\pal10\Desktop\testlotus\preservation"

In [6]:
conversion_log = pd.DataFrame(columns=['File Name', 'Original Path', 'New Path', 'File Conversion', 'Original File Created', 'New File Created'])
path2 = path1.replace('preservation','ObjectDocumentation')
os.makedirs(path2,mode=0o775, exist_ok=True)

In [7]:
convert_files(path1)

Entering subfolder: C:\Users\pal10\Desktop\testlotus\preservation\30111WAV
Entering subfolder: C:\Users\pal10\Desktop\testlotus\preservation\30111WAV\Bill Hall Interview
Entering subfolder: C:\Users\pal10\Desktop\testlotus\preservation\30111WAV\Bill Hall Interview\.ipynb_checkpoints
Entering subfolder: C:\Users\pal10\Desktop\testlotus\preservation\30111WAV\Bill Hall Interview\OriginalFiles
Found file: C:\Users\pal10\Desktop\testlotus\preservation\30111WAV\Bill Hall Interview\OriginalFiles\ZOOM0005.WAV
C:\Users\pal10\Desktop\testlotus\access\nearline\30111WAV\Bill Hall Interview\OriginalFiles
os.stat_result(st_mode=33206, st_ino=14636698789098029, st_dev=16016024109827288881, st_nlink=1, st_uid=0, st_gid=0, st_size=558069248, st_atime=1715786960, st_mtime=1294549628, st_ctime=1715786905)
ZOOM0005.WAV
The file was created on: Sat Jan  8 23:07:08 2011
Original Format: WAV to MP3
Found file: C:\Users\pal10\Desktop\testlotus\preservation\30111WAV\Bill Hall Interview\OriginalFiles\ZOOM0006.W

In [8]:
print(conversion_log)

      File Name                                      Original Path  \
0  ZOOM0005.WAV  C:\Users\pal10\Desktop\testlotus\preservation\...   
1  ZOOM0006.WAV  C:\Users\pal10\Desktop\testlotus\preservation\...   
2  ZOOM0003.WAV  C:\Users\pal10\Desktop\testlotus\preservation\...   
3  ZOOM0004.WAV  C:\Users\pal10\Desktop\testlotus\preservation\...   
4  ZOOM0005.WAV  C:\Users\pal10\Desktop\testlotus\preservation\...   

                                            New Path File Conversion  \
0  C:\Users\pal10\Desktop\testlotus\access\nearli...      WAV to MP3   
1  C:\Users\pal10\Desktop\testlotus\access\nearli...      WAV to MP3   
2  C:\Users\pal10\Desktop\testlotus\access\nearli...      WAV to MP3   
3  C:\Users\pal10\Desktop\testlotus\access\nearli...      WAV to MP3   
4  C:\Users\pal10\Desktop\testlotus\access\nearli...      WAV to MP3   

      Original File Created          New File Created  
0  Sat Jan  8 23:07:08 2011  Wed May 15 10:30:16 2024  
1  Sat Jan  8 23:53:12 2011  Wed M