In [1]:
# installation
!pip install openpyxl

# import statements, install these libraries using pip install 'library_name'
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile
import pickle as pkl

# combine script file with its title
def get_script(path, dic={}):
    """(str, dict) -> dict
    Combines script file from the given path with their corresponding title
    
    Params:
        path (str) path where the scripts are
        dic (dict) dict mapping script file with 
                   their test case title, initially 
                   empty
    
    Returns:
        dic (dict)
        dict mapping script file with their
        test case title
    """
    for dir in listdir(path):
        # if directory has a test case title
        if dir.startswith('TC'):
            for file in listdir(path+dir+'/'):
                # align test case title with the script file inside it
                dic[dir] = path+dir+'/'+file
        # if directory is not a test case title and has a nested folder
        elif not isfile(path+dir+'/'):
            # change the path to the nested folder and update the dictionary
            get_script(path+dir+'/', dic)
    return dic

# read Katalon's manual test cases file and convert it into dataframe
test_cases_df = pd.read_excel('Katalon Capstone - Manual Test Cases.xlsx')

# remove the unnamed columns from the dataframe
for column in test_cases_df.columns:
    if 'Unnamed: ' in column:
        test_cases_df.drop(column, inplace=True, axis=1)

# remove first two rows
test_cases_df.drop(test_cases_df.index[[0, 1]], inplace=True)

# reset index
test_cases_df.reset_index(drop=True, inplace=True)

# manually edit the test case title
test_cases_df.iloc[131]['Title'] = 'TC132_Verify Question is timed out_P1'

# get the path_dict dictionary from get_script function
path_dict = get_script('ubc-script-generation/ontest-main/Automation/Scripts/Main Test Cases/')

scripts = []
# go through all rows in the dataframe
for i in range(test_cases_df.shape[0]):
    # if the row is not null
    if test_cases_df.iloc[i]['Title'] in path_dict.keys():
        # use the test case title as the key to the path_dict
        with open(path_dict[test_cases_df.iloc[i]['Title']]) as f:
            # read scripts, add to the list
            scripts.append(f.read())
    else:
        scripts.append(np.nan)
# create new column in the dataframe that aligns scripts with test case
test_cases_df['test_scripts'] = scripts



In [4]:
def segregate(case):
    """ (str) -> str, str, str
    Segregates a manual test case string into three sub-strings
    
    Params:
        case (str) a manual test case string
        
    Returns:
        pre_condition_str, steps_str, expected_result_str (str, str, str) 
        Three sub-strings corresponding to the pre-condition, steps and
        expected results section of a manual test case        
    """
    pre_condition = False
    steps = False
    expected_result = False

    pre_condition_str = ''
    steps_str = ''
    expected_result_str = ''
    
    # split the test case string into separate lines
    for line in case.splitlines():
        # if line empty
        if line.strip() == '':
            continue
        # if its a pre-condition statement
        if 'pre-condition' in line.strip().lower():
            # if pre-condition points start from the same line where pre-condition is mentioned
            if len(line.split(':')) > 1 and not ((line.split(':')[1]).strip() == ''):
                # we add that line as a pre_condition_str without 'pre-condition'
                pre_condition_str += line.split(':')[1]
            # indicates we found pre_condition
            pre_condition = True
            continue
        # if pre_condition is found or line starts with 'steps'
        if pre_condition or line.strip().lower().startswith('steps'):
            # if line starts with 'steps'
            if line.strip().lower().startswith('steps'):
                # pre_condition is set to False
                pre_condition = False
                # if 'steps' line also includes a step
                if len(line.split(':')) > 1 and not ((line.split(':')[1]).strip() == ''):
                    # we add it to the steps_str
                    steps_str += line.split(':')[1]
                # steps is set to True
                steps = True
                continue
            # if pre_condition string is empty
            if pre_condition_str != '':
                # add next line to pre_condition string
                pre_condition_str += '\n'
            # strip extra whitespace from right side from pre_condition_str
            pre_condition_str += line.rstrip()
        # if steps is true or line starts with expected result
        if steps or line.strip().lower().startswith('expected result'):
            # if line starts with expected result
            if line.strip().lower().startswith('expected result'):
                # steps is made false
                steps = False
                # if expected result also includes an expected result
                if len(line.split(':')) > 1 and not ((line.split(':')[1]).strip() == ''):
                    expected_result_str += (line.split(':')[1])
                # expected result is set as True
                expected_result = True
                continue
            # if steps_str is empty
            if steps_str != '':
                # nextLine is added
                steps_str += '\n'
            # extra whitespace from the steps_str is removed
            steps_str += line.rstrip()
        # if expected_result is true
        if expected_result:
            # if expected result string is empty
            if expected_result_str != '':
                # add nextLine
                expected_result_str += '\n'
            # removes whitespace from the expected result string
            expected_result_str += line.rstrip()   
    
    return pre_condition_str, steps_str, expected_result_str

def process_test_cases(case_list):
    """ (list) -> list, list, list
    Processes list of manual test cases into three separate lists
    
    Params:
        case_list (list) a list of the manual test cases
        
    Returns:
        pre_condition_list, steps_list, expected_result_list (list, list, list) 
        Three lists of pre-condition, steps and expected results corresponding
        to each example in a list of the manual test cases 
    """
    pre_condition_list, steps_list, expected_result_list = [], [], []
    # going through each manual test case
    for case in case_list:
        # if manual test case is null
        if isinstance(case, float):
            pre_condition_list.append(np.nan)
            steps_list.append(np.nan)
            expected_result_list.append(np.nan)
        # if manual test case is not null
        else:
            # get segregated pre_condition, steps and expected_result section
            pre_condition, steps, expected_result = segregate(case)
            # if pre_condition is empty
            if pre_condition == '':
                pre_condition_list.append(np.nan)
            # add it to the pre_condition_list
            else:
                pre_condition_list.append(pre_condition)
            # if expected_result is empty
            if expected_result == '':
                expected_result_list.append(np.nan)
            # add it to the expected_result_list
            else:
                expected_result_list.append(expected_result)
            # add it to the steps_list 
            steps_list.append(steps)
    return pre_condition_list, steps_list, expected_result_list

# adds segregated pre-conditions, steps and expected-results to the dataframe
test_cases_df['pre_conditions'], test_cases_df['steps'], test_cases_df['expected_results'] = process_test_cases(test_cases_df['Full manual test cases'].tolist())


In [1068]:
import re

def remove_numbering(string):
    """ (str) -> str
    Strips the numerical numbering from the beginning of a string
    
    Params:
        string (str) a string
        
    Returns:
        (str)
        A string with the numerical numbering at its beginning removed
    """
    return re.sub('(\d+(\.\d+)?\.?)', '', string, count=1).strip()

def process(symbol, string):
    """ (str, str) -> str
    Strips the provided symbol from the beginning of a string
    
    Params:
        symbol (str) symbol to be stripped
        string (str) a string
    Returns:
        (str)
        A string with the symbol from its beginning removed
    """
    return string.replace(symbol, '', 1).strip()

In [7]:
def get_lists(column):
    """ (list) -> list
    Converts string blocks inside the list to nested lists of strings
    
    Params:
        column (list) a list of string blocks
    Returns:
        (list)
        A list containing nested lists of strings
    """
    lists = []
    # takes a string block from column list
    for str_block in column:
        a_list = []
        # if the string block is null
        if isinstance(str_block, float):
            lists.append(np.nan)
            continue
        else:
            a_nested_list = []
            another_nested_list = []
            # double nesting is set as False
            double_nested = False
            # if there's only line in the string block
            if len(str_block.splitlines()) == 1:
                # add it directly to the main list
                lists.append(str_block)
                continue
            
            for a_line in str_block.splitlines():
                # if line starts with '-'
                if a_line.strip().startswith('-'):
                    # set double nested as FALSE
                    double_nested = False
                    # if a_nested_list contains something
                    if len(a_nested_list) > 0:
                        # append a_nested_list to the a_list
                        a_list.append(a_nested_list.copy())
                    # clear the a_nested_list
                    a_nested_list.clear()
                    # string in a_list is appended after removing '-'
                    a_list.append(process('-', a_line))
                # if line starts with '+'
                elif a_line.strip().startswith('+'):
                    # if double_nested is set to True
                    if double_nested:
                        # if line starts with '++'
                        if a_line.strip().startswith('++'):
                            # line is appended to another_nested_list after removing '++'
                            another_nested_list.append(process('++', a_line))
                        else:
                            # line is appended to another_nested_list after removing '+'
                            another_nested_list.append(process('+', a_line))
                    # if double_nested is set to false
                    else:
                        if a_line.strip().startswith('++'):
                            # line is appended to a_nested_list after removing '++'
                            a_nested_list.append(process('++', a_line))
                        else:
                            # line is appended to a_nested_list after removing '+'
                            a_nested_list.append(process('+', a_line))
                # if line starts with '*'
                elif a_line.strip().startswith('*'):
                    # set double_nested to True
                    double_nested = True
                    # if another_nested_list contains something
                    if len(another_nested_list) > 0:
                        # append another_nested_list to a_nested_list
                        a_nested_list.append(another_nested_list.copy())
                    # empty another_nested_list
                    another_nested_list.clear()
                    # add line to another_nested_list after removing '*'
                    another_nested_list.append(process('*', a_line))
            # if double_nested is True
            if double_nested:
                # if another_nested_list contains something
                if len(another_nested_list) > 0:
                    # add another_nest_list to a_nested_list
                    a_nested_list.append(another_nested_list.copy())
            # if a_nested_list contains something
            if len(a_nested_list) > 0:
                # add the list to the a_list
                a_list.append(a_nested_list.copy())
        # add a_list to the main list 
        lists.append(a_list)
    return lists

# pre_conditions processed
pre_conditions_list = get_lists(test_cases_df['pre_conditions'].tolist())
# expected_results processed
expected_results_list = get_lists(test_cases_df['expected_results'].tolist())

In [1103]:
def process_steps(steps_str_list):
    """(list) -> (list)
    Converts a list of string of steps into a list of list of steps
    
    Params:
        steps_str_list (list) a list of steps in string format
    
    Returns:
        steps_list (list) a list of list of steps 
    """
    # list for steps of all test cases
    steps_list = []
    for steps_str in steps_str_list:
        # list for steps of each test case
        step_list = []
        # if element in the list is null
        if isinstance(steps_str, float):
            steps_list.append(np.nan)
        else:
            # splitting the step string on \n
            for step in steps_str.splitlines():
                # removing numbering from each step and adding it to step list
                step_list.append(remove_numbering(step))
            steps_list.append(step_list)
    return steps_list

In [None]:
# process steps
steps_list = process_steps(test_cases_df['steps'].tolist())

In [10]:
with open('../../pickle_files/pickle_files_for_parsing/steps_updated.pkl', 'wb') as handle:
    pkl.dump(steps_list, handle)

In [11]:
with open('../../pickle_files/pickle_files_for_parsing/steps.pkl', 'wb') as handle:
    pkl.dump(steps_list, handle)
with open('../../pickle_files/pickle_files_for_parsing/pre_conditions.pkl', 'wb') as handle:
    pkl.dump(pre_conditions_list, handle)
with open('../../pickle_files/pickle_files_for_parsing/expected_results.pkl', 'wb') as handle:
    pkl.dump(expected_results_list, handle)

In [1067]:
script_list = test_cases_df['test_scripts'].tolist()
with open('script_list.pkl', 'wb') as handle:
    pkl.dump(script_list, handle)