In [2]:
import csv
from datetime import datetime
from collections import defaultdict

def parse_timestamp(timestamp_str):
    try:
        return datetime.strptime(timestamp_str, '%Y-%m-%dT%H:%M:%S.%fZ')
    except ValueError as e:
        print(f"Error parsing timestamp {timestamp_str}: {e}")
        return None

def parse_entry(entry):
    try:
        timestamp = parse_timestamp(entry[0])
        obj_id = entry[1]
        x = float(entry[2])
        y = float(entry[3])
        unique_id = entry[4] if entry[4] != 'temp' else '0'  # Handle 'temp' case
        sensor_id = entry[5] if len(entry) > 5 else None  # Handle optional sensor_id

        return [timestamp, obj_id, x, y, unique_id, sensor_id]
    except ValueError as e:
        print(f"Error parsing entry {entry}: {e}")
        return None

def cluster_data(data):
    clusters = []
    unique_id_clusters = defaultdict(list)

    for entry in data:
        if entry is None:
            continue

        timestamp, obj_id, x, y, unique_id, sensor_id = entry

        # Check if the object is already in a cluster
        found_cluster = None
        for cluster in clusters:
            for point in cluster:
                if point[4] == unique_id:
                    found_cluster = cluster
                    break
            if found_cluster:
                break

        if found_cluster:
            # Add the current point to the existing cluster
            found_cluster.append([x, y, sensor_id])
            # Update the timestamp for the cluster
            found_cluster[0][0] = (found_cluster[0][0] + timestamp) / 2
            # Update the unique_id_clusters dictionary
            unique_id_clusters[unique_id].append(found_cluster)
        else:
            # Create a new cluster
            new_cluster = [[timestamp, obj_id, x, y, unique_id, sensor_id, []]]
            clusters.append(new_cluster)
            # Update the unique_id_clusters dictionary
            unique_id_clusters[unique_id].append(new_cluster)

    return clusters, unique_id_clusters

def fuse_clusters(clusters):
    fused_data = []

    for cluster in clusters:
        f_timestamp = cluster[0][0]
        f_id = hash(tuple(cluster[0][1:5]))  # Randomly generate f_id based on obj_id, x, y, and unique_id
        f_u_id = cluster[0][4]  # Use unique_id

        # Flatten the cluster data for cluster_data field
        cluster_data = [point[2:5] + [point[5]] for point in cluster[1:]]

        fused_data.append([f_timestamp, f_id, cluster_data, f_u_id])

    return fused_data

def save_to_csv(data, output_file):
    with open(output_file, 'a', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerows(data)

def main(input_file, output_file):
    try:
        with open(input_file, 'r') as csvfile:
            csv_reader = csv.reader(csvfile)
            next(csv_reader)  # Skip the header row
            data = [parse_entry(row) for row in csv_reader]

        # Filter out None values (entries that could not be parsed)
        data = [entry for entry in data if entry is not None]

        print(f"Total valid entries in the CSV file: {len(data)}")

        if not data:
            print("No valid entries found. Exiting.")
            return

        # Sort the data based on timestamp
        data.sort(key=lambda x: x[0])

        # Cluster the data
        clusters, _ = cluster_data(data)

        # Fuse the clusters
        fused_data = fuse_clusters(clusters)

        # Save the fused data to a new CSV file
        save_to_csv(fused_data, output_file)
        print("Processing complete. Fused data saved to:", output_file)
    except FileNotFoundError:
        print("File not found. Please provide a valid input file path.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
main('/content/test_Data_1.csv', 'output_data_file.csv')


Total valid entries in the CSV file: 4526
Processing complete. Fused data saved to: output_data_file.csv
