### Note from the author
This is my recruitment task for Junior Data Engineer position in Cloudfide.
Function presented below executes the task exactly as required. It means that it understands only '+','-' and '*' operations. However, it is possible to use multiple columns and to utilize different operators e.g. col_a + col_b - col_c.
I haven't used any packages other than pandas (this package was included in the code in the task description), as it wasn't clearly stated in the instruction, whether it's accepted. Nevertheless, I just wanted to emphasize it as I am consciouss that some parts would be more concise with e.g. regex. Those are however just minor things and the code should be clear and relatively simple and I hope that it meets your expectations.
Kind regards.

In [8]:
import pandas as pd

In [9]:
def add_virtual_column(df: pd.DataFrame, role: str, new_column: str) -> pd.DataFrame:
    
    #Validating data types of inputs
    if not isinstance(df, pd.DataFrame):
        return pd.DataFrame()
    if not isinstance(role, str):
        return pd.DataFrame()
    if not isinstance(new_column, str):
        return pd.DataFrame()

    #Is new_column name correct
    if not new_column.replace('_', '').isalpha():
        return pd.DataFrame()
        
    #Extracting columns from role
    cols_raw = role
    for sep in "+-*":
        cols_raw = cols_raw.replace(sep, " ")
    cols = [c.strip() for c in cols_raw.split()]

    #Validation of columns in role
    #are columns present in original df
    if not all(col in df.columns for col in cols):
        return pd.DataFrame()
    #are column names correct
    if not all(col.replace('_', '').isalpha() for col in cols):
        return pd.DataFrame()
    
    #Everything is correct -> execute formula
    new_df = df.copy()
    try:
        new_df[new_column] = new_df.eval(role)
    except Exception:
        return pd.DataFrame()
    return new_df

### Testing function

In [10]:
def test_sum_of_two_columns():
    df = pd.DataFrame([[1, 1]] * 2, columns = ["label_one", "label_two"])
    df_expected = pd.DataFrame([[1, 1, 2]] * 2, columns = ["label_one", "label_two", "label_three"])
    df_result = add_virtual_column(df, "label_one+label_two", "label_three")
    assert df_result.equals(df_expected), f"The function should sum the columns: label_one and label_two.\n\nResult:\n\n{df_result}\n\nExpected:\n\n{df_expected}"


def test_multiplication_of_two_columns():
    df = pd.DataFrame([[1, 1]] * 2, columns = ["label_one", "label_two"])
    df_expected = pd.DataFrame([[1, 1, 1]] * 2, columns = ["label_one", "label_two", "label_three"])
    df_result = add_virtual_column(df, "label_one * label_two", "label_three")
    assert df_result.equals(df_expected), f"The function should multiply the columns: label_one and label_two.\n\nResult:\n\n{df_result}\n\nExpected:\n\n{df_expected}"


def test_subtraction_of_two_columns():
    df = pd.DataFrame([[1, 1]] * 2, columns = ["label_one", "label_two"])
    df_expected = pd.DataFrame([[1, 1, 0]] * 2, columns = ["label_one", "label_two", "label_three"])
    df_result = add_virtual_column(df, "label_one - label_two", "label_three")
    assert df_result.equals(df_expected), f"The function should subtract the columns: label_one and label_two.\n\nResult:\n\n{df_result}\n\nExpected:\n\n{df_expected}"


def test_empty_result_when_invalid_labels():
    df = pd.DataFrame([[1, 2]] * 3, columns = ["label_one", "label_two"])
    df_result = add_virtual_column(df, "label_one + label_two", "label3")
    assert df_result.empty, f"Should return an empty df when the \"new_column\" is invalid.\n\nResult:\n\n{df_result}\n\nExpected:\n\nEmpty df"


def test_empty_result_when_invalid_rules():
    df = pd.DataFrame([[1, 1]] * 2, columns = ["label_one", "label_two"])
    df_result = add_virtual_column(df, "label&one + label_two", "label_three")
    assert df_result.empty, f"Should return an empty df when the role have invalid character: '&'.\n\nResult:\n\n{df_result}\n\nExpected:\n\nEmpty df"
    df_result = add_virtual_column(df, "label_five + label_two", "label_three")
    assert df_result.empty, f"Should return an empty df when the role have a column which isn't in the df: 'label_five'.\n\nResult:\n\n{df_result}\n\nExpected:\n\nEmpty df"


def test_when_extra_spaces_in_rules():
    df = pd.DataFrame([[1, 1]] * 2, columns = ["label_one", "label_two"])
    df_expected = pd.DataFrame([[1, 1, 2]] * 2, columns = ["label_one", "label_two", "label_three"])
    df_result = add_virtual_column(df, "label_one + label_two ", "label_three")
    assert df_result.equals(df_expected), f"Should work when the role have spaces between the operation and the column.\n\nResult:\n\n{df_result}\n\nExpected:\n\n{df_expected}"
    df_result = add_virtual_column(df, "  label_one + label_two ", "label_three")
    assert df_result.equals(df_expected), f"Should work when the role have extra spaces in the start/end.\n\nResult:\n\n{df_result}\n\nExpected:\n\n{df_expected}"

In [11]:
test_sum_of_two_columns()

In [12]:
test_multiplication_of_two_columns()

In [13]:
test_subtraction_of_two_columns()

In [14]:
test_empty_result_when_invalid_labels()

In [15]:
test_empty_result_when_invalid_rules()

In [16]:
test_when_extra_spaces_in_rules()