1. Look at locations, what is upstream/downstream
2. Write directionality rules based on loc_code

eg:
1. [41, 42, 43] - [us, us]
2. [22, 21, 22] - [ds, us]

In [1]:
import pandas as pd

In [2]:
dwell_loc = pd.read_csv("data/loc_code_detection_patterns.csv")

In [3]:
direction = dwell_loc[['tag_id', 'loc_code']]
direction = direction[direction['loc_code']!='3M'].replace('60','61') # remove 3M, 60 and 61 are the same-so replace
direction

Unnamed: 0,tag_id,loc_code
0,989.001007,7A
1,989.001033,21
2,989.001007,22
3,989.001007,232
4,989.001007,231
...,...,...
37242,989.002028,202
37243,989.002028,201
37244,989.001034,401
37245,989.001034,402


In [4]:
direction['category'] = direction['loc_code'].apply(lambda x: x[0] if x[0].isdigit() else 'others')

categorized_lists = direction.groupby('category')['loc_code'].apply(lambda x: sorted(set(x))).to_dict()

for category, loc_codes in categorized_lists.items():
    print(f"Category '{category}': {loc_codes}")

Category '1': ['11', '12', '13']
Category '2': ['201', '202', '21', '22', '231', '232', '233']
Category '3': ['301', '302', '3A', '3B']
Category '4': ['401', '402', '41', '42', '44', '45']
Category '5': ['501', '51', '5A', '5B']
Category '6': ['61', '62', '6A', '6B', '6C', '6D', '6F']
Category '7': ['701', '702', '71', '7A', '7B', '7C', '7D']
Category '8': ['81', '82', '83', '84']
Category '9': ['91', '92', '921', '922', '9A', '9B', '9C', '9D', '9F']
Category 'others': ['A1', 'A2', 'AA', 'AB', 'C1', 'C2', '_C']


In [5]:
sequence_data = direction.groupby("tag_id").apply(
    lambda x: {
        "loc_code": x["loc_code"].tolist(),
    }
).reset_index()

print(sequence_data)

          tag_id                                                  0
0     989.001007       {'loc_code': ['61', '6A', '6B', '6A', '61']}
1     989.001007                 {'loc_code': ['22', '202', '201']}
2     989.001007       {'loc_code': ['6A', '6B', '6A', '6B', '6A']}
3     989.001007                   {'loc_code': ['61', '6A', '6B']}
4     989.001007                       {'loc_code': ['202', '201']}
...          ...                                                ...
6996  989.002028                               {'loc_code': ['91']}
6997  989.002028            {'loc_code': ['91', '922', '9A', '9B']}
6998  989.002028     {'loc_code': ['91', '921', '922', '9A', '9B']}
6999  989.002028                               {'loc_code': ['91']}
7000  989.002028  {'loc_code': ['91', '922', '921', '922', '9B',...

[7001 rows x 2 columns]


In [6]:
sequence_df = pd.DataFrame(sequence_data)
sequence_df["loc_code"] = sequence_df[0].apply(lambda x: x["loc_code"])
sequence_df = sequence_df.drop(columns=[0])

sequence_df

Unnamed: 0,tag_id,loc_code
0,989.001007,"[61, 6A, 6B, 6A, 61]"
1,989.001007,"[22, 202, 201]"
2,989.001007,"[6A, 6B, 6A, 6B, 6A]"
3,989.001007,"[61, 6A, 6B]"
4,989.001007,"[202, 201]"
...,...,...
6996,989.002028,[91]
6997,989.002028,"[91, 922, 9A, 9B]"
6998,989.002028,"[91, 921, 922, 9A, 9B]"
6999,989.002028,[91]


In [7]:
loc_order = {
    '11': ['12','13'], '12': ['13'], '13': [],
    '21': ['201', '202', '231', '232', '233'], '22': ['201', '202', '231', '232', '233'],
    '201': ['202','231', '232', '233'], '202': ['231', '232', '233'],
    '231': ['232', '233'], '232': ['233'], '233': [],
    '301': ['302'], '302': [],
    '401': ['402', '42', '44', '45'], '402': ['42', '44', '45'], 
    '42': ['44', '45'], '44': ['45'], '45': [],
    '501': ['5A', '5B', '51'], '51': [], '5A': ['5B','51'], '5B': ['51'],
    '60': ['62', '6A', '6B', '6F'], '61': ['62', '6A', '6B', '6F'], '62': ['6A', '6B', '6F'],
    '6A': ['6B', '6F'], '6B': ['6F'], '6C': [], '6D': [], '6F': [],
    '701': ['702', '71', '7A', '7B'], '702': ['71', '7A', '7B'], '71': [],
    '7A': ['7B'], '7B': [],
    '81': ['82', '83', '84'], '82': ['83', '84'], '83': ['84'], '84': [],
    '91': ['92','921', '922', '9A', '9B', '9F', '9C', '9D'], '92': ['921', '922', '9A', '9B', '9F'],
    '921': ['922', '9A', '9B', '9F'], '922': ['9A', '9B', '9F'],
    '9A': ['9B'], '9B': ['9F'], '9F': [],
    'A1' : ['A2', 'AA', 'AB','C1', 'C2'], 'A2' : ['AA', 'AB','C1', 'C2'], 'AA' : ['AB','C2'], 'AB': [],
    'C1' : ['C2', 'AB'], 'C2' : []
}


In [8]:
loc_order['91']

['92', '921', '922', '9A', '9B', '9F', '9C', '9D']

In [9]:
def get_movement_direction(loc_sequence, loc_order):
    directions = []
    for i in range(len(loc_sequence) - 1):
        loc1 = str(loc_sequence[i])
        loc2 = str(loc_sequence[i + 1])
        
        if loc1 in loc_order and loc2 in loc_order[loc1]:  
            directions.append('us')  
        elif loc2 in loc_order and loc1 in loc_order[loc2]:  
            directions.append('ds')  
        else:
            directions.append('unknown')  
        
    return directions


sequence_df['movement_direction'] = sequence_df['loc_code'].apply(lambda x: get_movement_direction(x, loc_order))

sequence_df[['tag_id', 'loc_code', 'movement_direction']]


Unnamed: 0,tag_id,loc_code,movement_direction
0,989.001007,"[61, 6A, 6B, 6A, 61]","[us, us, ds, ds]"
1,989.001007,"[22, 202, 201]","[us, ds]"
2,989.001007,"[6A, 6B, 6A, 6B, 6A]","[us, ds, us, ds]"
3,989.001007,"[61, 6A, 6B]","[us, us]"
4,989.001007,"[202, 201]",[ds]
...,...,...,...
6996,989.002028,[91],[]
6997,989.002028,"[91, 922, 9A, 9B]","[us, us, us]"
6998,989.002028,"[91, 921, 922, 9A, 9B]","[us, us, us, us]"
6999,989.002028,[91],[]


In [11]:
sequence_df[sequence_df["movement_direction"].apply(lambda x: "unknown" in x)]

Unnamed: 0,tag_id,loc_code,movement_direction
94,989.001034,"[61, 61, 61]","[unknown, unknown]"
131,989.001034,"[7B, 7A, 71]","[ds, unknown]"
192,989.001034,"[7B, 7A, 7B, 7A, 7B, 71]","[ds, us, ds, us, unknown]"
208,989.001034,"[9D, 9A, 9B]","[unknown, us]"
212,989.001034,"[9C, 9D, 91]","[unknown, ds]"
...,...,...,...
6687,989.001046,"[9D, 9C, 91, 922, 921, 922, 9A, 9B, 9A]","[unknown, ds, us, ds, us, us, us, ds]"
6689,989.002027,"[61, 61, 61]","[unknown, unknown]"
6711,989.002028,"[6A, 501]",[unknown]
6912,989.002028,"[922, 921, 9D, 9C, 9D, 9C, 9D, 9C, 9D, 9C, 9D,...","[ds, unknown, unknown, unknown, unknown, unkno..."
