## Analyze Doppler data from mmWave radar

#### Setup

In [1]:
import sys, os
sys.path.append("../")
from helper.gdrive_downloader import download_with_rclone

import polars as pl
import pandas as pd
from glob import glob

#### Download data from Google Drive

In [2]:
remote_path = "https://drive.google.com/drive/folders/1fI7C13G-UNubbeyqzopRXs2d2cwGM0F5"
local_path = "../data"

# download_with_rclone(remote_path, local_path)

#### Analyze Doppler data

1. Load the data as dataframes
2. Identify: Duplicates, missing values, & anomalies
3. Question: Distribution of Rangeidx to understand how far are the objects in the dataset and also to understand bias in distance
    - Same for Doppleridx
4. Distribution of number of objects in the scenes: plot in bins of 5
5. Distribution of peakVal,  in the dataset
6. what is position of objects in the scene?
7. Distribution and bias in orientation of objects in the dataset
8. Distribution and bias in activity and activity class of objects in the dataset

In [None]:
# Export the data to parquet
def export_to_parquet(data_folder: str):
    """
    Reads pickle dataframe files from the data_folder, 
    converts them to polars dataframe 
    and stores them as parquet files.
    """
    if not os.path.exists(data_folder):
        raise ValueError(f"Data folder {data_folder} does not exist")
    
    pickle_dfs = glob(os.path.join(data_folder, "*.pkl"))

    try:
        for df in pickle_dfs:
            df_name = df.split("/")[-1].split(".")[0]
            df = pd.read_pickle(df)
            
            if 'doppz' in df.columns:
                df['doppz'] = df['doppz'].apply(lambda x: x.tolist() if hasattr(x, "tolist") else x)
            
            pl_dataframe = pl.from_pandas(df)
            pl_dataframe.write_parquet(os.path.join(data_folder, f"{df_name}.parquet"))
        
        print("[INFO ✅] Export to parquet completed!")
    
    except Exception as e:
        print(f"[ERROR ❌] Error exporting data as parquet: {e}")    

export_to_parquet(local_path)

[INFO] Export to parquet completed!


In [None]:
# Load the parquet data as polars dataframes
micro_df = pl.read_parquet("../data/micro_df.parquet")

micro_df.head()

datetime,rangeIdx,dopplerIdx,numDetectedObj,range,peakVal,x_coord,y_coord,doppz,activity,activity_class
str,list[i64],list[i64],f64,list[f64],list[i64],list[f64],list[f64],list[list[i64]],i64,str
"""2023-03-06 22:58:59""","[1, 10, … 63]","[0, 0, … 0]",6.0,"[0.12932, 1.248754, … 7.875]","[6142, 3312, … 1959]","[-0.117188, 0.234375, … 0.0]","[0.0546875, 1.2265625, … 7.875]","[[18095, 16591, … 17600], [18842, 18620, … 17509], … [18550, 18433, … 18326]]",16,"""micro"""
"""2023-03-06 22:59:0""","[1, 10, … 63]","[0, 0, … 0]",6.0,"[0.12932, 1.249047, … 7.878131]","[6143, 3391, … 1962]","[-0.117188, 0.2734375, … -7.140625]","[0.0546875, 1.21875, … 3.328125]","[[19575, 19419, … 18241], [18494, 18615, … 17190], … [19129, 19013, … 17823]]",16,"""micro"""
"""2023-03-06 22:59:0""","[1, 42, … 63]","[0, 0, … 0]",7.0,"[0.12932, 5.252098, … 7.878131]","[6140, 1215, … 1960]","[-0.117188, -0.65625, … -7.140625]","[0.0546875, 5.2109375, … 3.328125]","[[15937, 17874, … 16997], [17924, 17291, … 16826], … [18101, 18402, … 17850]]",16,"""micro"""
"""2023-03-06 22:59:1""","[1, 42, … 63]","[0, 0, … 0]",7.0,"[0.12932, 5.252098, … 7.878131]","[6145, 1136, … 1961]","[-0.117188, -0.65625, … -7.140625]","[0.0546875, 5.2109375, … 3.328125]","[[18342, 17252, … 17881], [18869, 17568, … 18350], … [18495, 18155, … 18521]]",16,"""micro"""
"""2023-03-06 22:59:1""","[1, 42, … 63]","[0, 0, … 0]",9.0,"[0.12932, 5.252098, … 7.878131]","[6140, 1149, … 1958]","[-0.117188, -0.65625, … -7.140625]","[0.0546875, 5.2109375, … 3.328125]","[[18114, 19102, … 18196], [18289, 19046, … 18045], … [18701, 19002, … 17740]]",16,"""micro"""


In [8]:
micro_df.glimpse()

Rows: 25256
Columns: 11
$ datetime                   <str> '2023-03-06 22:58:59', '2023-03-06 22:59:0', '2023-03-06 22:59:0', '2023-03-06 22:59:1', '2023-03-06 22:59:1', '2023-03-06 22:59:2', '2023-03-06 22:59:2', '2023-03-06 22:59:3', '2023-03-06 22:59:3', '2023-03-06 22:59:4'
$ rangeIdx             <list[i64]> [1, 10, 42, 63, 1, 63], [1, 10, 42, 63, 1, 63], [1, 42, 63, 11, 11, 1, 63], [1, 42, 63, 10, 10, 1, 63], [1, 42, 63, 10, 8, 11, 10, 1, 63], [1, 10, 42, 63, 10, 1, 63], [1, 42, 63, 11, 1, 63], [1, 10, 42, 63, 10, 1, 63], [1, 42, 63, 10, 1, 63], [1, 10, 42, 63, 11, 1, 63]
$ dopplerIdx           <list[i64]> [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 1, -5, 0, 0], [0, 0, 0, 1, -4, 0, 0], [0, 0, 0, 1, -7, -5, -2, 0, 0], [0, 0, 0, 0, -1, 0, 0], [0, 0, 0, -1, 0, 0], [0, 0, 0, 0, 3, 0, 0], [0, 0, 0, 2, 0, 0], [0, 0, 0, 0, -4, 0, 0]
$ numDetectedObj             <f64> 6.0, 6.0, 7.0, 7.0, 9.0, 7.0, 6.0, 7.0, 6.0, 7.0
$ range                <list[f64]> [0.129319885603491, 1.248754262