In [20]:
from datetime import datetime
import csv
import json
import os

In [21]:
from functools import wraps
from time import time

def timing(f):
    @wraps(f)
    def wrap(*args, **kw):
        ts = time()
        result = f(*args, **kw)
        te = time()
        print('func:%r args:[%r, %r] took: %2.4f sec' % \
          (f.__name__, args, kw, te-ts))
        return result
    return wrap

In [22]:
@timing
def get_path():
    '''
    Let user select an initial (folder) path; or provide default PWD
    '''
    
#    initial_path='C:\\Users\\Maarten\\Documents\\GitHub\\demo-databases'
    print('Please enter path to folder where *.ipynb files can be found that need translation')
    initial_path=input()
    
    attempts=0
    
    while os.path.exists(initial_path) == False: # or attempts <5:
        print('Please enter path to folder where *.ipynb files can be found.')
        initial_path=input()
        attempts+=1
        
        if not os.path.exists(initial_path):
            print('Invalid path provided. Please try again') 
        else: break

        if attempts == 5:
            print('To many attempts and invalid paths provided. Stopped.')
            initial_path=''
            break
    
    if initial_path == None:
        initial_path = pwd()

    return initial_path

In [23]:
@timing
def get_files(initial_path=''):
    '''
    Use a initial folder path to create a list of file paths.
    
    Filters file paths to resulst with only *.IPYNB files/extentions
    
    Keyword arguments:
       initial_path : filename including extention (default: saved_file<datetime>.csv)
    
    Returns
      filepaths : list of pathnames 
    '''
      
    base_dir=os.path.realpath(initial_path)+'\\'
    filepaths=[]
    
    if os.path.isfile(initial_path):
         if str(initial_path).find('.ipynb') > 0:
            filepaths=[initial_path]
    
    elif os.path.isdir(initial_path):
            if len(os.listdir(initial_path)) == 0:
                print('No valid files found in {}.\nStopped.\n\n'\
                      .format(str(initial_path)))
            else:        
                for file in os.listdir(initial_path):
                    if str(file).find('.ipynb') > 0:
                        filepaths.append(base_dir+file)
    else: 
        print('No valid files found in {}.\nStopped.\n\n'\
                      .format(str(initial_path)))
    
    return filepaths

In [24]:
@timing
def create_json_data(filename):
    
    with open(filename, 'r') as file:
        json_data = json.load(file)
    
    return json_data

In [25]:
@timing
def save_json_data(json_data, filename):
    
    with open(filename, 'w') as file:
        json.dump(json_data, file, indent=2)
    
    return print('Saved json_data to file: {}'.format(filename))

In [43]:
@timing
def process_json_data(json_data, search_words={}):
    '''
    Find and replace search_word values in a json object
    
    Keyword arguments:
      json_data : json object
      search_words : dict where key can be used to find a value in the values of the JSON object.
                      Values in this dict contain the 'to replace with' values.
                      
    Example:
      In order to translates Dutch 'onderdelen' to English 'parts'
      
      search_words = {'onderdelen':'parts'}
      str.lower.replace('onderdelen' with 'parts') for a value in the a json_data['element']
    
    Returns
      json_data : json object with updated values 
    '''
    
    for cell in json_data['cells']:

         if cell['cell_type'] in ['code']:

                for search_words_key in list(search_words.keys()):
                    for val in cell['source']:
                        if str.lower(val).find(str.lower(search_words_key)) >= 0:
                            cell['source'][cell['source'].index(val)] = val.replace(search_words_key, search_words[search_words_key])

    return json_data

In [27]:
def read_csv_to_dict(filename=''):
    '''
    Read CSV file (containing dict structured data) and create dict object.  
    
    Keyword arguments:
      filename : filename including extention (default: '')
    
    Returns
      dict_out : dictionary from CSV file content
    '''
    
    dict_out={}
    
    with open(filename, mode='r') as infile:
        reader = csv.reader(infile)
#         with open('new_'+filename, mode='w') as outfile:
#             writer = csv.writer(outfile)
#             mydict = {rows[0]:rows[1] for rows in reader}
        for row in reader:
            for val in row:
                key, value = val.split('|')
                dict_out[key] = value
    
    infile.close()
    
    return dict_out

In [28]:
@timing
def check_filename(filename, file_extention=''):
    '''
    Check a filename (str) for several minimum requirements and 
       add the file_extention (str) w/o period character '.' to it.
    
    Keyword arguments:
      filename : filename excluding extention (default: saved_file<datetime>)
      file_extention : requested file extention (default: .csv)  
    
    Return filename (string)
    '''

    if type(filename) is not str:
        filename=str(filename)
    
    if len(filename) == 0:
        filename='saved_file'+datetime.now().strftime("%d-%b-%Y_%H-%M").upper()
    
    if str(filename).find(file_extention)<0:
        if str(file_extention).find('.')<0:
            file_extention='.'+file_extention
        if file_extention == '.' or len(file_extention)<4:
            file_extention = '.csv'
        filename=filename+file_extention
    
    return filename

In [29]:
@timing
def save_list_to_csv(input_list=[], filename=''):
    '''
    Save a list object to an CSV file
    
    Keyword arguments:
      input_list : list of values to write in .CSV file
      filename : filename including extention (default: saved_file<datetime>.csv)
    
    Returns
      Nothing
    '''

    filename=check_filename(filename=filename, file_extention='csv')
    
    # transpose vertical list to horizontal list for CSV file 
    
    with open(filename, 'w', newline='') as myfile:
        wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
        for val in input_list:
            wr.writerow(str(val))
    myfile.close()
    
    print('Saved list with < {} > values to: {}'.format(
                                                    len(input_list),
                                                    filename)
         )
    

In [30]:
@timing
def save_dict_to_json(input_dict={}, filename=''):
    '''
    Save a list object to JSON file
    '''
    
    filename=check_filename(filename=filename, file_extention='json')

    with open(filename, 'w') as fp:
        json.dump(input_dict, fp)
        
    print('Saved list with < {} > key values to: {}'.format(
                                                len(input_dict),
                                                filename)
     )

In [31]:
@timing
def generate_word_mapping(input_text='', inputfile=''):
    '''
    Process a (set of) file path(s) to processed_result
    '''  
    
    # Check if all characters are UTF-8 encoded
    # Return any errors, line and example if a non UTF-8 character is found, so user can correct it
    
    # Filter code words out of input_text to only keep variabales; headers and other values that need transalation
    # Filter out reservered python words

    if inputfile == '':
        words_list = input_text.split()
    
        words_list_NLD = list(dict.fromkeys(words_list))

        # save words_list_NLD to csv file
        save_list_to_csv(words_list_NLD)

        # Let user translate words in words_list_NLD
        
        print('Please add the translated values for the word_list created,\n'+\
          'separated by pipe character ( | ) in the created CSV file.\n')     
        print('Please provide path to the updated CSV file.')
        inputfile=input()
        
        translation_dict = read_csv_to_dict(filename=inputfile)
    
    # Translate words_list_NLD via API service to ENG https://translate.yandex.com/?lang=nl-en&text=onderdeel   

    else:
        translation_dict = read_csv_to_dict(filename=inputfile)
    
#     # save translation_dict to json file
#     save_dict_to_json(input_dict=translation_dict, filename='')
    
    return translation_dict

In [32]:
def process_files(filepaths=[], search_words={}):
    '''
    Process a (set of) file path(s) to processed_result
    '''
    
    for path in filepaths:
        
        try:
#             with open(path, 'r') as file:
#                 file.close()

            json_data = create_json_data(path)
            json_data = process_json_data(json_data, search_words)
            save_json_data(json_data, str(path).replace('.ipynb', '_translated.ipynb'))

        except OSError:
            # handle error here
            print('Unable to process file: {}\n'.format(path))
        
        processed_result = json_data
        
    return processed_result

In [42]:
inputfile='translation_input_text.csv'


@timing
def main():
    print("Start translation script @"+datetime.now().strftime("%H:%M:%S").upper())
    
    initial_path = 'C:\\Users\\Maarten\\Documents\\GitHub\\demo-databases'

    initial_path = get_path()
    
    filepaths = get_files(initial_path)
    
    translation_dict = generate_word_mapping(input_text='', inputfile=inputfile)

    process_files(filepaths=filepaths, search_words=translation_dict)

    print("Finished translation script @"+datetime.now().strftime("%H:%M:%S").upper())
    
if __name__ == "__main__":
    main()

Start translation script @16:06:03
Please enter path to folder where *.ipynb files can be found that need translation
C:\Users\Maarten\Documents\GitHub\demo-databases\create_dataframes.ipynb
func:'get_path' args:[(), {}] took: 4.0653 sec
func:'get_files' args:[('C:\\Users\\Maarten\\Documents\\GitHub\\demo-databases\\create_dataframes.ipynb',), {}] took: 0.0000 sec
func:'generate_word_mapping' args:[(), {'input_text': '', 'inputfile': 'translation_input_text.csv'}] took: 0.0010 sec
Saved json_data to file: C:\Users\Maarten\Documents\GitHub\demo-databases\create_dataframes_translated.ipynb
Finished translation script @16:06:07
func:'main' args:[(), {}] took: 4.0713 sec


In [None]:
# # Select the .ipynb notebook cells you need for a singel script .py
# #   once selected the cells, use SHIFT + M to merge these INPUT cells to a single input cell.
# #   Then apply the magic function %writefile [-a] filename

# %writefile [-a] 'translate_demo-database_scripts_NLD_2_ENG.py'