1. Look at locations, what is upstream/downstream
2. Write directionality rules based on loc_code

eg:
1. [41, 42, 43] - [us, us]
2. [22, 21, 22] - [ds, us]

In [1]:
import pandas as pd

In [2]:
dwell_loc = pd.read_csv("data/loc_code_detection_patterns.csv")

In [5]:
direction = dwell_loc[['tag_id', 'loc_code']]
direction

Unnamed: 0,tag_id,loc_code
0,989.001007,7A
1,989.001033,21
2,989.001007,22
3,989.001007,232
4,989.001007,231
...,...,...
36754,989.001043,44
36755,989.001043,45
36756,989.001043,44
36757,989.001043,45


In [8]:
direction['category'] = direction['loc_code'].apply(lambda x: x[0] if x[0].isdigit() else 'others')

categorized_lists = direction.groupby('category')['loc_code'].apply(lambda x: sorted(set(x))).to_dict()

for category, loc_codes in categorized_lists.items():
    print(f"Category '{category}': {loc_codes}")

Category '1': ['11', '12', '13']
Category '2': ['201', '202', '21', '22', '231', '232', '233']
Category '3': ['301', '302', '3A', '3B', '3M']
Category '4': ['401', '402', '41', '42', '44', '45']
Category '5': ['501', '51', '5A', '5B']
Category '6': ['60', '61', '62', '6A', '6B', '6C', '6D', '6F']
Category '7': ['701', '702', '71', '7A', '7B', '7C', '7D']
Category '8': ['81', '82', '83', '84']
Category '9': ['91', '92', '921', '922', '9A', '9B', '9C', '9D', '9F']
Category 'others': ['A1', 'A2', 'AA', 'AB', 'C1', 'C2']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  direction['category'] = direction['loc_code'].apply(lambda x: x[0] if x[0].isdigit() else 'others')


In [9]:
sequence_data = direction.groupby("tag_id").apply(
    lambda x: {
        "loc_code": x["loc_code"].tolist(),
    }
).reset_index()

print(sequence_data)

          tag_id                                                  0
0     989.001006                               {'loc_code': ['21']}
1     989.001007       {'loc_code': ['60', '6A', '6B', '6A', '60']}
2     989.001007                 {'loc_code': ['22', '202', '201']}
3     989.001007       {'loc_code': ['6A', '6B', '6A', '6B', '6A']}
4     989.001007                   {'loc_code': ['60', '6A', '6B']}
...          ...                                                ...
6781  989.002028                               {'loc_code': ['91']}
6782  989.002028            {'loc_code': ['91', '922', '9A', '9B']}
6783  989.002028     {'loc_code': ['91', '921', '922', '9A', '9B']}
6784  989.002028                               {'loc_code': ['91']}
6785  989.002028  {'loc_code': ['91', '922', '921', '922', '9B',...

[6786 rows x 2 columns]


In [10]:
sequence_df = pd.DataFrame(sequence_data)
sequence_df["loc_code"] = sequence_df[0].apply(lambda x: x["loc_code"])
sequence_df = sequence_df.drop(columns=[0])

sequence_df

Unnamed: 0,tag_id,loc_code
0,989.001006,[21]
1,989.001007,"[60, 6A, 6B, 6A, 60]"
2,989.001007,"[22, 202, 201]"
3,989.001007,"[6A, 6B, 6A, 6B, 6A]"
4,989.001007,"[60, 6A, 6B]"
...,...,...
6781,989.002028,[91]
6782,989.002028,"[91, 922, 9A, 9B]"
6783,989.002028,"[91, 921, 922, 9A, 9B]"
6784,989.002028,[91]


In [20]:
loc_order = {
    '11': ['12','13'], '12': ['13'], '13': [],
    '21': ['201', '202', '231', '232', '233'], '22': ['201', '202', '231', '232', '233'],
    '201': ['202','231', '232', '233'], '202': ['231', '232', '233'],
    '231': ['232', '233'], '232': ['233'], '233': [],
    '301': ['302'], '302': [],
    '401': ['402', '42', '44', '45'], '402': ['42', '44', '45'], 
    '42': ['44', '45'], '44': ['45'], '45': [],
    '501': ['5A', '5B', '51'], '51': [], '5A': ['5B','51'], '5B': ['51'],
    '60': ['62', '6A', '6B', '6F'], '61': ['62', '6A', '6B', '6F'], '62': ['6A', '6B', '6F'],
    '6A': ['6B', '6F'], '6B': ['6F'], '6C': [], '6D': [], '6F': [],
    '701': ['702', '71', '7A', '7B'], '702': ['71', '7A', '7B'], '71': [],
    '7A': ['7B'], '7B': [],
    '81': ['82', '83', '84'], '82': ['83', '84'], '83': ['84'], '84': [],
    '91': ['92','921', '922', '9A', '9B', '9F'], '92': ['921', '922', '9A', '9B', '9F'],
    '921': ['922', '9A', '9B', '9F'], '922': ['9A', '9B', '9F'],
    '9A': ['9B'], '9B': ['9F'], '9F': [],
    'A1' : ['A2', 'AA', 'AB','C1', 'C2'], 'A2' : ['AA', 'AB','C1', 'C2'], 'AA' : ['AB','C2'], 'AB': [],
    'C1' : ['C2', 'AB'], 'C2' : []
}


In [None]:
def get_movement_direction(loc_sequence, loc_order):
    directions = []
    for i in range(len(loc_sequence) - 1):
        loc1 = str(loc_sequence[i])
        loc2 = str(loc_sequence[i + 1])
        
        if loc1 in loc_order and loc2 in loc_order[loc1]:  
            directions.append('us')  # Moving upstream
        elif loc2 in loc_order and loc1 in loc_order[loc2]:  
            directions.append('ds')  # Moving downstream
        else:
            directions.append('unknown')  # Undefined movement
        
    return directions


sequence_df['movement_direction'] = sequence_df['loc_code'].apply(lambda x: get_movement_direction(x, loc_order))

sequence_df[['tag_id', 'loc_code', 'movement_direction']]


Unnamed: 0,tag_id,loc_code,movement_direction
0,989.001006,[21],[]
1,989.001007,"[60, 6A, 6B, 6A, 60]","[us, us, ds, ds]"
2,989.001007,"[22, 202, 201]","[us, ds]"
3,989.001007,"[6A, 6B, 6A, 6B, 6A]","[us, ds, us, ds]"
4,989.001007,"[60, 6A, 6B]","[us, us]"
...,...,...,...
6781,989.002028,[91],[]
6782,989.002028,"[91, 922, 9A, 9B]","[us, us, us]"
6783,989.002028,"[91, 921, 922, 9A, 9B]","[us, us, us, us]"
6784,989.002028,[91],[]
