## Smart Surveillance System.

---------------------------
### Discovering UPAR Dataset.
### Source: https://github.com/speckean/upar_dataset/tree/main/UPAR

In [3]:
# Load the EasyDict
import pandas as pd
import pickle

with open("dataset_all.pkl", "rb") as f:
    upar_data = pickle.load(f)

# Check the top-level keys
print(type(upar_data))
print(upar_data.keys())


<class 'easydict.EasyDict'>
dict_keys(['description', 'reorder', 'root', 'image_name', 'label', 'attr_name', 'dataset_ids', 'partition', 'weight_train', 'weight_val', 'weight_trainval', 'weight_test'])


In [6]:
print("image_name:", len(upar_data["image_name"]))
print("partition:", len(upar_data["partition"]))
print("label:", len(upar_data["label"]))


image_name: 224737
partition: 8
label: 224737


In [7]:
print(type(upar_data["partition"]))
print(upar_data["partition"].keys())
for key in upar_data["partition"]:
    print(f"{key}: {len(upar_data['partition'][key])} samples")


<class 'easydict.EasyDict'>
dict_keys(['train', 'val', 'test', 'trainval', 'dataset_train', 'dataset_val', 'dataset_test', 'dataset_trainval'])
train: 9 samples
val: 9 samples
test: 9 samples
trainval: 9 samples
dataset_train: 0 samples
dataset_val: 0 samples
dataset_test: 0 samples
dataset_trainval: 0 samples


In [9]:
import numpy as np
import pandas as pd

# Load main data
image_names = upar_data["image_name"]
label_matrix = np.array(upar_data["label"])
attr_names = upar_data["attr_name"]

# Convert label vectors to readable dicts
attribute_dicts = [
    {attr_names[i]: val for i, val in enumerate(attr_vector)}
    for attr_vector in label_matrix
]

# Build DataFrame
upar_df = pd.DataFrame({
    "image_name": image_names,
    "attributes": attribute_dicts
})

# Preview
upar_df.head()


Unnamed: 0,image_name,attributes
0,Market1501/bounding_box_train/0002_c1s1_000451...,"{'Age-Young': 0, 'Age-Adult': 1, 'Age-Old': 0,..."
1,Market1501/bounding_box_train/0002_c1s1_000551...,"{'Age-Young': 0, 'Age-Adult': 1, 'Age-Old': 0,..."
2,Market1501/bounding_box_train/0002_c1s1_000776...,"{'Age-Young': 0, 'Age-Adult': 1, 'Age-Old': 0,..."
3,Market1501/bounding_box_train/0002_c1s1_000801...,"{'Age-Young': 0, 'Age-Adult': 1, 'Age-Old': 0,..."
4,Market1501/bounding_box_train/0002_c1s1_069056...,"{'Age-Young': 0, 'Age-Adult': 1, 'Age-Old': 0,..."


In [10]:
# Show all unique attribute names in UPAR
for attr in attr_names:
    print(attr)


Age-Young
Age-Adult
Age-Old
Gender-Female
Hair-Length-Short
Hair-Length-Long
Hair-Length-Bald
UpperBody-Length-Short
UpperBody-Color-Black
UpperBody-Color-Blue
UpperBody-Color-Brown
UpperBody-Color-Green
UpperBody-Color-Grey
UpperBody-Color-Orange
UpperBody-Color-Pink
UpperBody-Color-Purple
UpperBody-Color-Red
UpperBody-Color-White
UpperBody-Color-Yellow
UpperBody-Color-Other
LowerBody-Length-Short
LowerBody-Color-Black
LowerBody-Color-Blue
LowerBody-Color-Brown
LowerBody-Color-Green
LowerBody-Color-Grey
LowerBody-Color-Orange
LowerBody-Color-Pink
LowerBody-Color-Purple
LowerBody-Color-Red
LowerBody-Color-White
LowerBody-Color-Yellow
LowerBody-Color-Other
LowerBody-Type-Trousers&Shorts
LowerBody-Type-Skirt&Dress
Accessory-Backpack
Accessory-Bag
Accessory-Glasses-Normal
Accessory-Glasses-Sun
Accessory-Hat


###  UPAR Features

#### **Person Traits**
- `Age-Young`, `Age-Adult`, `Age-Old`
- `Gender-Female`
- `Hair-Length-Short`, `Hair-Length-Long`, `Hair-Length-Bald`

#### **Upper Body**
- Length: `UpperBody-Length-Short`
- Color: `Black`, `Blue`, `Brown`, `Green`, `Grey`, `Orange`, `Pink`, `Purple`, `Red`, `White`, `Yellow`, `Other`

#### **Lower Body**
- Length: `LowerBody-Length-Short`
- Color: same as upper body
- Type: `Trousers&Shorts`, `Skirt&Dress`

#### **Accessories**
- `Backpack`, `Bag`, `Glasses-Normal`, `Glasses-Sun`, `Hat`


---------------------------
### Discovering CelebV-HQ Dataset.
### Source: https://github.com/CelebV-HQ/CelebV-HQ/tree/main

In [11]:
import json
import requests

# Load CelebV-HQ attribute file
url = "https://raw.githubusercontent.com/CelebV-HQ/CelebV-HQ/refs/heads/main/celebvhq_info.json"
response = requests.get(url)
celebv_data = json.loads(response.text)

# Show top-level structure
print(type(celebv_data))
print("Total entries:", len(celebv_data))
list(celebv_data.keys())[:3]  # Show sample keys (usually video filenames)


<class 'dict'>
Total entries: 2


['meta_info', 'clips']

In [12]:
# Look inside the 'clips' section
clips = celebv_data["clips"]
print("Total clips:", len(clips))

# Show sample clip key and structure
first_key = list(clips.keys())[0]
print("Sample clip ID:", first_key)
print(json.dumps(clips[first_key], indent=2))


Total clips: 35666
Sample clip ID: M2Ohb0FAaJU_1
{
  "ytb_id": "M2Ohb0FAaJU",
  "duration": {
    "start_sec": 81.62,
    "end_sec": 86.17
  },
  "bbox": {
    "top": 0.0,
    "bottom": 0.8815,
    "left": 0.1964,
    "right": 0.6922
  },
  "attributes": {
    "appearance": [
      0,
      0,
      1,
      0,
      0,
      1,
      1,
      0,
      0,
      0,
      0,
      0,
      0,
      1,
      0,
      0,
      1,
      0,
      1,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      1,
      0,
      0,
      0,
      0,
      0,
      1,
      0,
      1,
      0,
      0,
      0
    ],
    "action": [
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      1,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      1,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      1,
      1,
      0,
      0,
      0,
      0
    ],
    "emotion": {
   

In [14]:
import json

# Check the keys and content of meta_info
meta = celebv_data["meta_info"]
print("Keys in meta_info:", meta.keys())

# Print the whole structure if small, or just the first part
print(json.dumps(meta, indent=2)[:1500])


Keys in meta_info: dict_keys(['appearance_mapping', 'action_mapping'])
{
  "appearance_mapping": [
    "blurry",
    "male",
    "young",
    "chubby",
    "pale_skin",
    "rosy_cheeks",
    "oval_face",
    "receding_hairline",
    "bald",
    "bangs",
    "black_hair",
    "blonde_hair",
    "gray_hair",
    "brown_hair",
    "straight_hair",
    "wavy_hair",
    "long_hair",
    "arched_eyebrows",
    "bushy_eyebrows",
    "bags_under_eyes",
    "eyeglasses",
    "sunglasses",
    "narrow_eyes",
    "big_nose",
    "pointy_nose",
    "high_cheekbones",
    "big_lips",
    "double_chin",
    "no_beard",
    "5_o_clock_shadow",
    "goatee",
    "mustache",
    "sideburns",
    "heavy_makeup",
    "wearing_earrings",
    "wearing_hat",
    "wearing_lipstick",
    "wearing_necklace",
    "wearing_necktie",
    "wearing_mask"
  ],
  "action_mapping": [
    "blow",
    "chew",
    "close_eyes",
    "cough",
    "cry",
    "drink",
    "eat",
    "frown",
    "gaze",
    "glare",
    "he

### CelebV-HQ Features

#### **Appearance** (40 binary attributes)
Examples: `male`, `young`, `bald`, `blonde_hair`, `eyeglasses`, `no_beard`, `wearing_mask`, etc.

#### **Action** (35 binary attributes)
Examples: `smile`, `laugh`, `talk`, `yawn`, `wink`, `drink`, `sneeze`, etc.

#### **Emotion** (single label)
Examples: `neutral`, `happy`, `angry`, etc.

---------------------------
### Discovering FairFace Dataset.
### Source: https://github.com/dchen236/FairFace

In [15]:
import pandas as pd

url = "https://raw.githubusercontent.com/dchen236/FairFace/master/test_outputs.csv"
fairface_df = pd.read_csv(url)

# Inspect structure
fairface_df.info()
fairface_df.head(3)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   face_name_align     5 non-null      object
 1   race                5 non-null      object
 2   race4               5 non-null      object
 3   gender              5 non-null      object
 4   age                 5 non-null      object
 5   race_scores_fair    5 non-null      object
 6   race_scores_fair_4  5 non-null      object
 7   gender_scores_fair  5 non-null      object
 8   age_scores_fair     5 non-null      object
dtypes: object(9)
memory usage: 492.0+ bytes


Unnamed: 0,face_name_align,race,race4,gender,age,race_scores_fair,race_scores_fair_4,gender_scores_fair,age_scores_fair
0,detected_faces/race_Asian_face0.jpg,East Asian,Asian,Female,30-39,[7.7522168e-04 1.1608704e-05 2.9779517e-03 9.3...,[3.9179469e-03 1.9815963e-05 9.9597895e-01 8.3...,[8.5034102e-05 9.9991494e-01],[5.3760527e-07 2.0359068e-05 1.2785579e-03 9.3...
1,detected_faces/race_Latino_face0.jpg,Latino_Hispanic,Asian,Female,20-29,[0.05775655 0.00232665 0.6921566 0.05506464 0...,[0.10278564 0.01113555 0.85227126 0.03380755],[4.2146090e-05 9.9995786e-01],[3.8011539e-08 8.2747183e-06 1.3017138e-02 7.2...
2,detected_faces/race_White_face0.jpg,White,White,Male,60-69,[9.6510637e-01 3.6202928e-05 1.4138068e-02 6.4...,[9.9643815e-01 1.9284267e-05 2.9810504e-03 5.6...,[0.85520303 0.144797 ],[1.35437411e-04 1.17149175e-04 1.96260633e-04 ...


###  FairFace Features

#### **Age Groups**
- `0-2`, `3-9`, `10-19`, `20-29`, `30-39`, `40-49`, `50-59`, `60-69`, `70+`

#### **Gender**
- `Male`, `Female`

#### **Race / Ethnicity**
- `White`, `Black`, `Latino_Hispanic`, `East Asian`, `Southeast Asian`, `Indian`, `Middle Eastern`  
- Also includes simplified 4-class version: `White`, `Black`, `Asian`, `Indian`

#### **Scores**
- `race_scores_fair`, `gender_scores_fair`, `age_scores_fair`: softmax output for confidence
