In [None]:
import pandas as pd

def make_bidirectional_map_one_to_many(df):

    input_columns = list(df.columns)
    df.columns = [0, 1]

    # Remove identities and duplicate rows
    df = df.loc[df[0] != df[1], :].drop_duplicates()

    # Check if the input dataframe is already a one-to-many map
    if not df[1].duplicated().any() and not df[1].isin(df[0]).any():
        df.columns = input_columns
        return df

    # Stack the values so we can find duplicate values regardless of the
    # column
    dummy = df.stack()
    dummy = dummy.loc[dummy.duplicated(keep=False)]

    ##############################################################
    # Locate two different types of duplicate values in the second
    # column
    ##############################################################

    # Get first instance in the second column of values duplicated
    # anywhere in the dataframe. "dummy" refers to duplicate values in
    # the stacked dataframe
    first_instance_of_any_dplct_in_c1 = (
        dummy.loc[(slice(None), 1)].drop_duplicates()
    )

    # Get values from the second column which are duplicated anywhere
    # in the dataframe but which occurred first in the second column.
    # "dummy" refers to duplicate values in the stacked dataframe
    frst_instnc_of_dplct_found_in_c1_frst = dummy.drop_duplicates()
    if 1 in frst_instnc_of_dplct_found_in_c1_frst.index.get_level_values(1):
        frst_instnc_of_dplct_found_in_c1_frst = (
            frst_instnc_of_dplct_found_in_c1_frst.loc[(slice(None), 1)]
        )

    else:
        frst_instnc_of_dplct_found_in_c1_frst = pd.Series([], dtype=int)

    # If values duplicated anywhere in the dataframe exist in the
    # second column, map the duplicates to the first-column
    # partner of the first instance of the value in the second column
    if df[1].duplicated().any():

        ##############################################################
        # Make maps between the different kinds of first second-column
        # instances and their first-column partners
        ##############################################################

        # Map from the first instance in the second column of values
        # duplicated anywhere in the dataframe to their first-column
        # partners.
        # "dummy" refers to duplicate values in the stacked dataframe
        dummy = df.loc[first_instance_of_any_dplct_in_c1.index]
        map_to_c0_from_frst_instnc_of_any_dplct_in_c1 = pd.Series(
            dummy[0].array,
            index=dummy[1].array
        )

        # Map from duplicate values that occurred first in the second
        # column to the first-column partners of their first instance
        map_to_c0_from_frst_instnc_of_dplct_found_in_c1_frst = df.loc[
            frst_instnc_of_dplct_found_in_c1_frst.index,
            :
        ]
        map_to_c0_from_frst_instnc_of_dplct_found_in_c1_frst = pd.Series(
            map_to_c0_from_frst_instnc_of_dplct_found_in_c1_frst[0].array,
            index=map_to_c0_from_frst_instnc_of_dplct_found_in_c1_frst[1].array
        )

        ################
        # Apply the maps
        ################

        # Get rows with values in the first column that appeared
        # first in the second column
        c0_value_was_found_in_c1_first = df.loc[
            df[0].isin(first_instance_of_any_dplct_in_c1),
            :
        ]

        # Map those values to the first-column partners of their first
        # instances in the second column
        mapped = c0_value_was_found_in_c1_first[0].map(
            map_to_c0_from_frst_instnc_of_any_dplct_in_c1
        )
        df.loc[c0_value_was_found_in_c1_first.index, 0] = mapped.array

        # Get all duplicate values in the second column which appeared first
        # in the second column
        c1_value_was_found_in_c1_first = df.loc[
            df[1].isin(frst_instnc_of_dplct_found_in_c1_frst),
            :
        ]

        # Drop the first instances to isolate only the duplicates
        c1_value_was_found_in_c1_first = c1_value_was_found_in_c1_first.drop(
            frst_instnc_of_dplct_found_in_c1_frst.index
        )

        # If there are duplicate values in the second column which
        # appeared first in the second column, map those values to the
        # first-column partners of their first instances and then swap
        # values in those rows so the first instance's first-column
        # partner is in the first column

        if not c1_value_was_found_in_c1_first.empty:
            mapped = c1_value_was_found_in_c1_first[1].map(
                map_to_c0_from_frst_instnc_of_dplct_found_in_c1_frst
            )
            df.loc[c1_value_was_found_in_c1_first.index, :] = list(zip(
                mapped.array,
                df.loc[c1_value_was_found_in_c1_first.index, 0]
            ))
            df.drop_duplicates(inplace=True)
        
        # Remove any identities or duplicate rows created above
        df = df.loc[df[0] != df[1], :].drop_duplicates()

    # Rearrange the dataframe so that values which appear in both
    # columns appear first in the second column

    dummy = df[1].isin(df[0])
    df = pd.concat([df.loc[dummy], df.loc[~dummy]])

    dummy = df[0].isin(df[1])
    df = pd.concat([df.loc[~dummy], df.loc[dummy]])

    # In some cases the above algorithm will loop indefinitely if there
    # are values that occur in both columns but there are no repeated
    # values in the second column. Avoid this finding rows with values
    # in the first column that are present in the second column and, if
    # any exist, swapping values in the first row found.
    dummy = df[0].isin(df[1])

    if dummy.any():
        to_swap = df.loc[dummy].index[0]
        df.loc[to_swap, :] = df.loc[to_swap, [1, 0]].to_numpy()

    df.columns = input_columns

    # Remap the modified dataframe
    return make_bidirectional_map_one_to_many(df)

df = pd.DataFrame({'tail': [1, 4, 2, 4, 6, 5, 7, 8], 'head': [2, 1, 3, 3, 8, 4, 6, 7]})
make_bidirectional_map_one_to_many(df)

In [15]:
import pandas as pd


def recursive_universalizer(df):

    input_columns = list(df.columns)
    df.columns = [0, 1]

    # Remove identities and duplicate rows
    df = df.loc[df[0] != df[1], :].drop_duplicates()

    # Check if the input dataframe is already a one-to-many map
    if not df[1].duplicated().any() and not df[1].isin(df[0]).any():
        df.columns = input_columns
        return df
    
    # Rearrange the dataframe so that values which appear in both
    # columns appear first in the second column

    dummy = df[1].isin(df[0])
    df = pd.concat([df.loc[dummy], df.loc[~dummy]])

    dummy = df[0].isin(df[1])
    df = pd.concat([df.loc[~dummy], df.loc[dummy]])

    # Stack the values so we can find duplicate values regardless of the
    # column
    dummy = df.stack()
    dummy = dummy.loc[dummy.duplicated(keep=False)]

    # isolate multiply connected nodes at the head of at least one
    # edge
    possible_leaves = dummy.loc[(slice(None), 1)].drop_duplicates()
        
    # isolate multiply connected nodes that appear first at the head
    # of an edge
    possible_roots = dummy.drop_duplicates()
    if 1 in possible_roots.index.get_level_values(1):
        possible_roots = possible_roots.loc[(slice(None), 1)]
    else:
        possible_roots = pd.Series([], dtype=int)

    # Map from the first instance in the second column of values
    # duplicated anywhere in the dataframe to their first-column
    # partners.
    # "dummy" refers to duplicate values in the stacked dataframe
    # moves nodes from the head of the first edge pointing to 
    # multiply connected nodes to the tail of that edge
    leaves_to_branches = df.loc[possible_leaves.index]
    leaves_to_branches = pd.Series(
        leaves_to_branches[0].array,
        index=leaves_to_branches[1].array
    )

    # Get rows with values in the first column that appear at least
    # once in the second column
    # These are edges whose tails are at multiply connected nodes
    # attached to at least one head
    possible_leaves = df.loc[df[0].isin(possible_leaves), :]

    # Map those values to the first-column partners of their first
    # instances in the second column
    df.loc[possible_leaves.index, 0] = (
        possible_leaves[0].map(leaves_to_branches).array
    )

    # If there are shared leaves
    if df[1].duplicated().any():
        print('heads duplicated')

        ##############################################################
        # Make maps between the different kinds of first second-column
        # instances and their first-column partners
        ##############################################################

        '''# Map from the first instance in the second column of values
        # duplicated anywhere in the dataframe to their first-column
        # partners.
        # "dummy" refers to duplicate values in the stacked dataframe
        # moves nodes from the head of the first edge pointing to 
        # multiply connected nodes to the tail of that edge
        leaves_to_branches = df.loc[possible_leaves.index]
        leaves_to_branches = pd.Series(
            leaves_to_branches[0].array,
            index=leaves_to_branches[1].array
        )'''

        # Map from duplicate values that occurred first in the second
        # column to the first-column partners of their first instance

        branches_to_roots = df.loc[possible_roots.index]
        branches_to_roots = pd.Series(
            branches_to_roots[0].array,
            index=branches_to_roots[1].array
        )

        ################
        # Apply the maps
        ################

        '''# Get rows with values in the first column that appear at least
        # once in the second column
        # These are edges whose tails are at multiply connected nodes
        # attached to at least one head
        possible_leaves = df.loc[df[0].isin(possible_leaves), :]

        # Map those values to the first-column partners of their first
        # instances in the second column
        df.loc[possible_leaves.index, 0] = (
            possible_leaves[0].map(leaves_to_branches).array
        )'''

        # Get all duplicate values in the second column which appeared
        # first in the second column and drop the first instances to
        # isolate only the duplicates
        shared_leaves = df.loc[df[1].isin(possible_roots), :].drop(
            possible_roots.index
        )

        # If there are duplicate values in the second column which
        # appeared first in the second column, map those values to the
        # first-column partners of their first instances and then swap
        # values in those rows so the first instance's first-column
        # partner is in the first column

        if not shared_leaves.empty:
            df.loc[shared_leaves.index, :] = list(zip(
                shared_leaves[1].map(branches_to_roots).array,
                df.loc[shared_leaves.index, 0]
            ))
            df.drop_duplicates(inplace=True)
        
        # Remove any identities or duplicate rows created above
        df = df.loc[df[0] != df[1], :].drop_duplicates()

    else:
        print('no duplicate heads')
        print(df)
        dummy = df.stack()

        first_occurrences = dummy.drop_duplicates()

        first_occurrences = first_occurrences.loc[
            first_occurrences.index.get_level_values(0).duplicated()
        ]
        to_swap = first_occurrences.index.get_level_values(0)
        print(to_swap)
        df.loc[to_swap, :] = df.loc[to_swap, [1, 0]].to_numpy()
        #return df

    '''# In some cases the above algorithm will loop indefinitely if there
    # are values that occur in both columns but there are no repeated
    # values in the second column. Avoid this finding rows with values
    # in the first column that are present in the second column and, if
    # any exist, swapping values in the first row found.
    dummy = df[0].isin(df[1])

    if dummy.any():
        print('kludged!')
        to_swap = df.loc[dummy].index[0]
        print(df.loc[dummy], '\n')
        df.loc[to_swap, :] = df.loc[to_swap, [1, 0]].to_numpy()

    df.columns = input_columns'''

    # Remap the modified dataframe
    print('loop')
    return recursive_universalizer(df)


df = pd.DataFrame({0: [1, 4, 2, 4, 6, 5, 7, 8, 10, 11, 12], 1: [2, 1, 3, 3, 8, 4, 6, 7, 11, 12, 10]})
#df = pd.DataFrame({0: [1, 2, 3, 4, 5, 6, 7], 1: [4, 1, 2, 3, 7, 5, 6]})
#df = pd.DataFrame({0: [5,9,1,3,3,8,6,4,9,1], 1: [8,0,5,1,1,9,8,6,0,0]})
''' a  b
0  5  8
1  9  0
2  1  5
3  3  1
4  3  1
5  8  9
6  6  8
7  4  6
8  9  0
9  1  0'''
recursive_universalizer(df)

heads duplicated
loop
no duplicate heads
     0   1
5    1   4
1    1   1
4    6   8
6    7   6
7    8   7
8   10  11
9   11  12
10  12  10
3    5   5
0    5   2
2    5   3
Int64Index([5, 4, 8], dtype='int64')
loop
heads duplicated
loop


Unnamed: 0,0,1
7,8,7
9,11,12
5,4,1
4,8,6
8,11,10
0,5,2
2,5,3


In [None]:
import time
import pandas as pd

number_of_runs = 1e3

def run_it():
    pop_size = 1e1
    sample_size = 1e1

    t = str(time.time()).split('.')
    s1 = int(t[0])
    s2 = int(t[1])
    pop = pd.Series(pd.RangeIndex(stop=pop_size))
    sampled = pd.DataFrame({
        'a': pop.sample(int(sample_size), replace=True, random_state=s1).array,
        'b': pop.sample(int(sample_size), replace=True, random_state=s2).array
    })
    print(sampled)

    try:
        #make_bidirectional_map_one_to_many(sampled)
        recursive_universalizer(sampled)

    except:
        raise RuntimeError

for i in range(1, int(number_of_runs) + 1):
    time.sleep(.005)
    run_it()