In [5]:
import pandas as pd  
from scipy.spatial.distance import cdist 
import os 
import numpy as np

class TerritoryProcessor:  # Defining a class named TerritoryProcessor
    def __init__(self, file_path):  # Constructor method to initialize the object
        self.df = pd.read_csv(file_path, skiprows=[0], header=None, names=['old', 'frame', 'pos_x', 'pos_y', 'territory_id'])  # Reading CSV file into a DataFrame with specific column names
        self.df['pos_x'] = self.df['pos_x'] * 3  # Scaling 'pos_x' values by 3
        self.df['pos_y'] = self.df['pos_y'] * 3  # Scaling 'pos_y' values by 3
        self.df['territory_id'] = self.df['territory_id'].astype(int)  # Converting 'territory_id' column to integer type

    def calculate_distance_ratios(self, territory_df, distances_matrix):  # Method to calculate distance ratios
        territory_df = territory_df.reset_index(drop=True)  # Resetting the index of territory DataFrame
        ratios = []  # Initializing an empty list to store ratios
        for index, row in territory_df.iterrows():  # Iterating over rows of territory DataFrame
            territory_id = row['territory_id']  # Getting territory ID
            closest_ids = distances_matrix[index, :].argsort()[1:3]  # Getting indices of closest territories
            closest_id_1, closest_id_2 = territory_df.iloc[closest_ids]['territory_id'].values  # Getting territory IDs of closest territories
            larger_distance = max(distances_matrix[index, closest_ids[0]], distances_matrix[index, closest_ids[1]])  # Calculating larger distance
            smaller_distance = min(distances_matrix[index, closest_ids[0]], distances_matrix[index, closest_ids[1]])  # Calculating smaller distance
            distance_ratio = smaller_distance / larger_distance  # Calculating distance ratio
            ratios.append({  # Appending ratio information to the list
                'territory_id': territory_id,
                'closest_id_1': closest_id_1,
                'closest_id_2': closest_id_2,
                'distance_ratio': distance_ratio
            })
        return ratios  # Returning the list of ratios

    def process_initial_frame(self):  # Method to process initial frame data
        initial_frame_data = self.df[self.df['frame'] == self.df['frame'].max()]  # Selecting rows with the maximum frame number
        initial_distances_matrix = cdist(initial_frame_data[['pos_x', 'pos_y']], initial_frame_data[['pos_x', 'pos_y']], 'euclidean')  # Calculating pairwise distances
        self.reference_ratios_df = pd.DataFrame(self.calculate_distance_ratios(initial_frame_data, initial_distances_matrix))  # Calculating and storing distance ratios

    def find_unique_mislabeled_ids(self):  # Method to find unique mislabeled IDs
        unique_mislabeled_id_tuples = set()  # Initializing a set to store unique mislabeled ID tuples
        for frame_num, frame_data in self.df.groupby('frame', sort=False, group_keys=False):  # Iterating over frames
            if len(frame_data) > 1:  # Checking if there are multiple entries in the frame
                distances_matrix = cdist(frame_data[['pos_x', 'pos_y']], frame_data[['pos_x', 'pos_y']], 'euclidean')  # Calculating pairwise distances
                current_ratios = self.calculate_distance_ratios(frame_data, distances_matrix)  # Calculating distance ratios for the frame

                for ratio_profile in current_ratios:  # Iterating over ratio profiles
                    territory_id = ratio_profile['territory_id']  # Getting territory ID

                    if territory_id not in self.reference_ratios_df['territory_id'].values:  # Checking if territory ID is not in reference ratios DataFrame
                        is_new_id = True  # Flag to indicate if the territory ID is new
                        for _, reference_profile in self.reference_ratios_df.iterrows():  # Iterating over reference ratio profiles
                            if (reference_profile['closest_id_1'] == ratio_profile['closest_id_1'] and
                                reference_profile['closest_id_2'] == ratio_profile['closest_id_2'] and
                                abs(reference_profile['distance_ratio'] - ratio_profile['distance_ratio']) < 0.05):  # Checking similarity in distance ratios

                                if reference_profile['territory_id'] in frame_data['territory_id'].values:  # Checking if the reference territory ID is present in the frame
                                    is_new_id = False  # Setting flag to False as the territory ID is not new
                                else:
                                    unique_mislabeled_id_tuples.add((reference_profile['territory_id'], territory_id))  # Adding unique mislabeled ID tuple
                                break

                        if is_new_id:  # If territory ID is new
                            self.reference_ratios_df = pd.concat([self.reference_ratios_df, pd.DataFrame([ratio_profile])], ignore_index=True)  # Adding new ratio profile to reference ratios DataFrame

        return unique_mislabeled_id_tuples  # Returning set of unique mislabeled ID tuples

    def update_territory_ids(self, unique_mislabeled_id_tuples):  # Method to update territory IDs
        for original_id, mislabeled_id in unique_mislabeled_id_tuples:  # Iterating over unique mislabeled ID tuples
            self.df['territory_id'] = self.df['territory_id'].replace(mislabeled_id, original_id)  # Replacing mislabeled IDs with original IDs

    def remove_low_count_ids(self, threshold):  # Method to remove low count IDs
        total_frames = self.df['frame'].nunique()  # Getting total unique frames
        for territory_id, count in self.df['territory_id'].value_counts().items():  # Iterating over territory IDs and their counts
            if count < threshold * total_frames:  # Checking if count is below threshold
                self.df = self.df[self.df['territory_id'] != territory_id]  # Removing rows with territory ID below threshold count

    def check_and_correct_sudden_movement(self, subset_df, threshold_distance=200):  # Method to check and correct sudden movements 
        distances = cdist(subset_df[['pos_x', 'pos_y']], subset_df[['pos_x', 'pos_y']], 'euclidean')  # Calculating pairwise distances
        new_territory_id = max(self.df['territory_id']) + 1  # Generating a new territory ID
        ongoing_change = False  # Flag to indicate ongoing territory change

        for i in range(1, len(subset_df)):  # Iterating over subset DataFrame
            distance = distances[i - 1, i]  # Getting distance between consecutive points
            if distance > threshold_distance:  # Checking if distance exceeds threshold
                old_territory_id = subset_df.at[subset_df.index[i], 'territory_id']  # Getting old territory ID
                new_territory_id = max(self.df['territory_id']) + 1  # Generating new territory ID
                print(f"Correction started: Frame {subset_df.at[subset_df.index[i], 'frame']}, "
                      f"Original Territory ID: {old_territory_id}, New Territory ID: {new_territory_id}")  # Printing correction information
                ongoing_change = True  # Setting ongoing change flag to True

            if ongoing_change:  # If ongoing territory change
                self.df.at[subset_df.index[i], 'territory_id'] = new_territory_id  # Updating territory ID

    def process_and_save_updated_dataframe(self, output_folder, threshold=0.3):  # Method to process and save updated DataFrame
        self.process_initial_frame()  # Processing initial frame data
        unique_mislabeled_id_tuples = self.find_unique_mislabeled_ids()  # Finding unique mislabeled ID tuples
        self.update_territory_ids(unique_mislabeled_id_tuples)  # Updating territory IDs
        self.remove_low_count_ids(threshold)  # Removing low count IDs
        for unique_id in self.df['territory_id'].unique():  # Iterating over unique territory IDs
            subset_df = self.df[self.df['territory_id'] == unique_id]  # Getting subset DataFrame for each territory ID
            self.check_and_correct_sudden_movement(subset_df)  # Checking and correcting sudden movements
       
    
        #Second round of correction to clean the detection errors caused by the algorithm
        self.process_initial_frame()  # Re-processing initial frame data to update reference ratios after corrections
        unique_mislabeled_id_tuples_after_correction = self.find_unique_mislabeled_ids()  # Finding unique mislabeled ID tuples after correction 
        self.update_territory_ids(unique_mislabeled_id_tuples_after_correction)  # Updating territory IDs after correction
        self.remove_low_count_ids(0.5)  # Removing low count IDs after correction

        # Save the updated DataFrame to a new CSV file
        file_name, file_extension = os.path.splitext(os.path.basename(file_path))  # Getting file name and extension
        new_file_name = f'{file_name}_updated{file_extension}'  # Generating new file name
        new_file_path = os.path.join(output_folder, new_file_name)  # Generating new file path
        self.df.to_csv(new_file_path, index=False)  # Saving updated DataFrame to CSV
        print(f'DataFrame saved to {new_file_path}')  # Printing save confirmation message

        # Return the updated DataFrame
        return self.df  # Returning updated DataFrame

# Usage example:
file_path = 'D:/MELA/Territory/20230311_SM_Lek1_P2D4_DJI_0857.csv'  # File path
output_folder = 'D:/MELA/Territory/'  # Output folder path
processor = TerritoryProcessor(file_path)  # Creating TerritoryProcessor object
updated_df = processor.process_and_save_updated_dataframe(output_folder, threshold=0.5)  # Processing and saving updated DataFrame

# Now you can use the 'updated_df' for further analysis
# Example: Print the first few rows of the updated DataFrame
print(updated_df)  # Printing updated DataFrame


Correction started: Frame 191.0, Original Territory ID: 224, New Territory ID: 225
Correction started: Frame 192.0, Original Territory ID: 224, New Territory ID: 226
Correction started: Frame 3038.0, Original Territory ID: 224, New Territory ID: 227
Correction started: Frame 3041.0, Original Territory ID: 224, New Territory ID: 228
Correction started: Frame 1006.0, Original Territory ID: 14, New Territory ID: 229
Correction started: Frame 1006.0, Original Territory ID: 14, New Territory ID: 230
Correction started: Frame 1007.0, Original Territory ID: 14, New Territory ID: 231
Correction started: Frame 1008.0, Original Territory ID: 14, New Territory ID: 232
Correction started: Frame 1008.0, Original Territory ID: 14, New Territory ID: 233
Correction started: Frame 1009.0, Original Territory ID: 14, New Territory ID: 234
Correction started: Frame 1009.0, Original Territory ID: 14, New Territory ID: 235
Correction started: Frame 1010.0, Original Territory ID: 14, New Territory ID: 236
Co