## Convert .ndjson to Pandas
Here we will take the .ndjson file saved from LabelBox and convert it to a more friendly looking, Pandas Dataframe

In [1]:
!pip install pandas ndjson

Collecting pandas
  Using cached pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl.metadata (18 kB)
Collecting ndjson
  Downloading ndjson-0.3.1-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.1 (from pandas)
  Using cached tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl (10.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.7/10.7 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading ndjson-0.3.1-py2.py3-none-any.whl (5.3 kB)
Using cached pytz-2024.1-py2.py3-none-any.whl (505 kB)
Using cached tzdata-2024.1-py2.py3-none-any.whl (345 kB)
Installing collected packages: pytz, ndjson, tzdata, pandas
Successfully installed ndjson-0.3.1 pandas-2.0.3 pytz-2024.1 tzdata-2024.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m

In [2]:
import pandas as pd
import ndjson

# Load .ndjson data
with open('data/testing_detection.ndjson') as f:
    data = ndjson.load(f)

# Convert to DataFrame
df = pd.json_normalize(data)

# Extract relevant columns
annotations = []
for item in data:
    for project_id, project_data in item['projects'].items():
        for label in project_data['labels']:
            for obj in label['annotations']['objects']:
                annotations.append({
                    'image_id': item['data_row']['external_id'],
                    'image_url': item['data_row']['row_data'],
                    'label': obj['name'],
                    'top': obj['bounding_box']['top'],
                    'left': obj['bounding_box']['left'],
                    'height': obj['bounding_box']['height'],
                    'width': obj['bounding_box']['width']
                })

annotations_df = pd.DataFrame(annotations)

# Save the DataFrame to a CSV file if needed
annotations_df.to_csv('data/testing_annotations.csv', index=False)

print(annotations_df)


                 image_id                                          image_url  \
0     bathroom_mockup.png  https://storage.labelbox.com/clxkxmd5l0unt07zt...   
1      bedroom_mockup.png  https://storage.labelbox.com/clxkxmd5l0unt07zt...   
2      hallway_mockup.png  https://storage.labelbox.com/clxkxmd5l0unt07zt...   
3      kitchen_mockup.png  https://storage.labelbox.com/clxkxmd5l0unt07zt...   
4   livingroom_mockup.png  https://storage.labelbox.com/clxkxmd5l0unt07zt...   
5       office_mockup.png  https://storage.labelbox.com/clxkxmd5l0unt07zt...   
6              test_1.png  https://storage.labelbox.com/clxkxmd5l0unt07zt...   
7              test_2.png  https://storage.labelbox.com/clxkxmd5l0unt07zt...   
8              test_3.png  https://storage.labelbox.com/clxkxmd5l0unt07zt...   
9              test_4.png  https://storage.labelbox.com/clxkxmd5l0unt07zt...   
10             test_5.png  https://storage.labelbox.com/clxkxmd5l0unt07zt...   
11             test_6.png  https://stora