In [1]:
#!/usr/bin/env python
# coding: utf-8

import os
import sys
import platform
import logging
import argparse
import trino
import io
import boto3
from itertools import islice
from datetime import datetime, date, timedelta
import pendulum
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import math



pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [2]:
df = pd.read_csv('c2c_can_20Nov_2310_5.csv')
df.head()

Unnamed: 0,id,timestamp,timestamp.1,sequence,byte1,byte8,byte6,byte3,can_id,byte4,byte2,byte5,byte7
0,6,1763660418985,1763660418985,11208,255,255,255,111,419343920,111,255,255,255
1,6,1763660418985,1763660418985,11209,255,125,157,255,418384139,114,255,125,126
2,6,1763660418985,1763660418985,11210,255,255,255,111,419343920,111,255,255,255
3,6,1763660418985,1763660418985,11211,0,125,255,0,419373295,0,125,125,255
4,6,1763660418985,1763660418985,11212,253,255,255,100,217056000,255,0,255,255


In [3]:
df.dtypes

id             int64
timestamp      int64
timestamp.1    int64
sequence       int64
byte1          int64
byte8          int64
byte6          int64
byte3          int64
can_id         int64
byte4          int64
byte2          int64
byte5          int64
byte7          int64
dtype: object

In [4]:
sorted(df.can_id.unique())


[np.int64(1487),
 np.int64(150892043),
 np.int64(201326603),
 np.int64(202058448),
 np.int64(215101223),
 np.int64(217050891),
 np.int64(217056000),
 np.int64(217448864),
 np.int64(217448871),
 np.int64(217449121),
 np.int64(217449127),
 np.int64(217449378),
 np.int64(217449383),
 np.int64(217449893),
 np.int64(217450150),
 np.int64(217450921),
 np.int64(217995840),
 np.int64(407902195),
 np.int64(408885235),
 np.int64(408950771),
 np.int64(409016307),
 np.int64(409081843),
 np.int64(409147379),
 np.int64(409212915),
 np.int64(409278451),
 np.int64(409343987),
 np.int64(409409523),
 np.int64(409475059),
 np.int64(409540595),
 np.int64(409606131),
 np.int64(409671667),
 np.int64(409737203),
 np.int64(409802739),
 np.int64(410982387),
 np.int64(411113459),
 np.int64(411244531),
 np.int64(411375603),
 np.int64(411441139),
 np.int64(411506675),
 np.int64(411572211),
 np.int64(411703283),
 np.int64(411768819),
 np.int64(411834355),
 np.int64(411899891),
 np.int64(411965427),
 np.int64(41216

In [5]:
# df.isin(2556434419,2556499955)
can_id_list = [2556434419, 2556499955,2556565491,2556631027,2556696563,2556762099,2556827635,2556893171,2556958707,2557024243,2557482995,2557548531,2557614067,2557679603]


In [6]:
def drop_msb_and_convert(numbers_list):
    """
    Processes a list of integers: drops the most significant bit from each 
    number's binary representation and returns a list of the new integer values.

    Args:
        numbers_list (list): A list of integers.

    Returns:
        list: A new list of integers after processing.
    """
    translated_numbers = []
    for number in numbers_list:
        # 1. Convert the number to a binary string and drop '0b' prefix
        # We can use format() for a cleaner string
        binary_str = format(number, 'b')
        
        # 2. Drop the MSB from the binary string (slice from index 1 to the end)
        dropped_msb_str = binary_str[1:]
        
        # Handle cases where the original number was 0 or 1 (results in an empty string)
        if not dropped_msb_str:
            new_int_value = 0
        else:
            # 3. Convert the resulting string back to an integer (base 2)
            new_int_value = int(dropped_msb_str, 2)
            
        translated_numbers.append(new_int_value)
        
    return translated_numbers


# Process the list using the function
result_list = drop_msb_and_convert(can_id_list)

print(f"Original list: {can_id_list}")
print(f"Translated list: {result_list}")

# Output:
# Original list: [165, 42, 7, 255]
# Translated list: [69, 10, 3, 127]


Original list: [2556434419, 2556499955, 2556565491, 2556631027, 2556696563, 2556762099, 2556827635, 2556893171, 2556958707, 2557024243, 2557482995, 2557548531, 2557614067, 2557679603]
Translated list: [408950771, 409016307, 409081843, 409147379, 409212915, 409278451, 409343987, 409409523, 409475059, 409540595, 409999347, 410064883, 410130419, 410195955]


In [7]:
# number = can_id_list[0]  # Example int64 number
# binary_str = bin(number)

# # Drop the '0b' prefix and the first bit simultaneously
# dropped_msb_str = binary_str[3:]

# # Convert the resulting string back to an integer (base 2)
# new_int_value = int(dropped_msb_str, 2)

# print(f"Original number: {number} (binary: {binary_str})")
# print(f"New number after dropping MSB: {new_int_value} (binary: {dropped_msb_str})")

# # Output:
# # Original number: 165 (binary: 0b10100101)
# # New number after dropping MSB: 69 (binary: 0100101)


In [8]:
# Create mapping: can_id → index
mapping = {cid: i+1 for i, cid in enumerate(result_list)}

# Add new column
df["index_can"] = df["can_id"].map(mapping)

# Step 1 — Build mapping: translated → original
mapping = {t: o for t, o in zip(result_list, can_id_list)}

# Step 2 — Add column to your dataframe
df["old_can_id"] = df["can_id"].map(mapping)

In [9]:
df["old_can_id"] = df["old_can_id"].astype("Int64")
df['index_can'] = df["index_can"].astype("Int64")

In [None]:
rows = ['id','timestamp','sequence','can_id','old_can_id','byte1','byte2','byte3','byte4','byte5','byte6','byte7','byte8']

In [10]:
df[df.can_id.isin(result_list)].sort_values(by=['timestamp','sequence'])

Unnamed: 0,id,timestamp,timestamp.1,sequence,byte1,byte8,byte6,byte3,can_id,byte4,byte2,byte5,byte7,index_can,old_can_id
31267,6,1763660121446,1763660121446,30662,68,67,68,68,409016307,68,67,68,68,2,2556499955
31161,6,1763660122805,1763660122805,31781,69,69,68,69,409475059,69,69,68,69,9,2556958707
31046,6,1763660125565,1763660125565,34023,68,69,69,69,409409523,68,68,68,68,8,2556893171
30859,6,1763660125966,1763660125966,34342,71,71,71,71,409540595,71,71,71,70,10,2557024243
30819,6,1763660127625,1763660127625,35629,68,68,68,67,409081843,68,67,68,68,3,2556565491
30653,6,1763660127885,1763660127885,35808,67,69,68,67,409147379,68,68,69,68,4,2556631027
30098,6,1763660134985,1763660134985,41627,71,71,71,71,409540595,71,71,71,70,10,2557024243
28981,6,1763660143766,1763660143766,48804,68,69,69,69,409409523,68,68,68,68,8,2556893171
28918,6,1763660143926,1763660143926,48980,69,69,68,69,409475059,69,69,68,69,9,2556958707
28706,6,1763660145705,1763660145705,50513,68,68,68,67,409081843,68,67,68,68,3,2556565491
