# Analysing Scraped data

In [None]:
import pandas as pd

In [None]:
file_path = 'csv_files/metadata_v2.csv'

# Read the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)

In [None]:
df.head()

In [None]:
unique_techniques = df['TECHNIQUE'].unique()
for i in unique_techniques:
    print(i)

In [None]:
df['TECHNIQUE'] = df['TECHNIQUE'].str.split(',').str[0].str.strip()

In [None]:
unique_techniques = df['TECHNIQUE'].unique()
for i in unique_techniques:
    print(i)

In [None]:
filtered_df = df[df['TECHNIQUE'].fillna('').str.contains('oil', case=False)]
filtered_df.head()

In [None]:
len(filtered_df)

In [None]:
filtered_df_2 = df[df['TITLE'].fillna('').str.contains('Virgin', case=False)]
filtered_df_2.head()

In [None]:
len(filtered_df_2)

In [None]:
print(filtered_df_2['TITLE'])

# Analysing COCO data

In [6]:
import json

In [7]:
coco_json_path = '../extra/coco_json/person_keypoints_val2017.json'

with open(coco_json_path) as f:
    data = json.load(f)

In [8]:
data.keys()

dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])

In [9]:
len(data['images'])

5000

In [10]:
data['images'][0]

{'license': 4,
 'file_name': '000000397133.jpg',
 'coco_url': 'http://images.cocodataset.org/val2017/000000397133.jpg',
 'height': 427,
 'width': 640,
 'date_captured': '2013-11-14 17:02:52',
 'flickr_url': 'http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg',
 'id': 397133}

## Store only if keypoints are there

In [None]:
from pycocotools.coco import COCO

# Create COCO instance
coco = COCO(coco_json_path)


In [None]:
image_ids = []
for ann_id in coco.anns:
    ann = coco.anns[ann_id]
    if "keypoints" in ann and ann["keypoints"]:
        image_ids.append(ann["image_id"])


In [None]:
image_filenames = []
for image_id in image_ids:
    image_info = coco.loadImgs(image_id)[0]
    image_filenames.append(image_info["file_name"])

## Download the required data

In [None]:
import requests

# Directory to save the downloaded images
save_directory = '../extra/pics/'

In [None]:
# # Iterate over selected image IDs and download the images
# for image_id in image_ids:
#     image_info = coco.loadImgs(image_id)[0]
#     image_url = image_info["coco_url"]
#     image_filename = image_info["file_name"]
#     save_path = save_directory + image_filename
    
#     response = requests.get(image_url, stream=True)
#     response.raise_for_status()
    
#     with open(save_path, "wb") as f:
#         for chunk in response.iter_content(chunk_size=8192):
#             f.write(chunk)

In [None]:
image_info = coco.loadImgs(image_ids[1])[0]
image_url = image_info["coco_url"]
image_filename = image_info["file_name"]
save_path = save_directory + image_filename

response = requests.get(image_url, stream=True)
response.raise_for_status()

with open(save_path, "wb") as f:
    for chunk in response.iter_content(chunk_size=8192):
        f.write(chunk)

In [None]:
image_info = coco.loadImgs(image_ids[1])[0]
image_url = image_info["coco_url"]
image_filename = '000000000139.jpg'
save_path = save_directory + image_filename

response = requests.get(image_url, stream=True)
response.raise_for_status()

with open(save_path, "wb") as f:
    for chunk in response.iter_content(chunk_size=8192):
        f.write(chunk)

## Changing size of image and annotations

In [None]:
# orig_image_ids = image_ids

In [None]:
image_ids = orig_image_ids

In [None]:
image_ids = image_ids[:2]

In [None]:
image_ids

In [None]:
import cv2
import json

# Directory to save the resized images
save_directory = '../extra/resized_pics/'

# Directory to save the adjusted keypoints
keypoints_directory = '../extra/resized_keypoints/'

# Desired size
target_size = (640, 640)

In [None]:
# # Iterate over selected image IDs
# for image_id in image_ids:
#     image_info = coco.loadImgs(image_id)[0]
#     image_path = '../../extra/pics/' + image_info["file_name"]
#     save_path = save_directory + image_info["file_name"]
#     keypoints_path = keypoints_directory + image_info["file_name"] + ".json"
    
#     # Load the image
#     image = cv2.imread(image_path)
    
#     # Resize the image
#     resized_image = cv2.resize(image, target_size)
    
#     # Scale and adjust the keypoints
#     keypoints = coco.loadAnns(coco.getAnnIds(image_id))
#     adjusted_keypoints = []
#     for kp in keypoints:
#         # Scale and adjust x, y coordinates
#         adjusted_x = int(kp["keypoints"][::3] * target_size[0] / image.shape[1])
#         adjusted_y = int(kp["keypoints"][1::3] * target_size[1] / image.shape[0])
#         adjusted_keypoints.extend([adjusted_x, adjusted_y, kp["keypoints"][2::3]])
    
#     # Save the resized image
#     cv2.imwrite(save_path, resized_image)
    
#     # Save the adjusted keypoints
#     with open(keypoints_path, "w") as f:
#         json.dump(adjusted_keypoints, f)

In [None]:
image_id = image_ids[0]
image_info = coco.loadImgs(image_id)[0]
image_path = '../extra/pics/' + image_info["file_name"]
save_path = save_directory + image_info["file_name"]
keypoints_path = keypoints_directory + image_info["file_name"] + ".json"

# Load the image
image = cv2.imread(image_path)

# Resize the image
resized_image = cv2.resize(image, target_size)

# Scale and adjust the keypoints
keypoints = coco.loadAnns(coco.getAnnIds(image_id))
adjusted_keypoints = []
for kp in keypoints:
    # Scale and adjust x, y coordinates
    adjusted_x = [int(x * target_size[0] / image.shape[1]) for x in kp["keypoints"][::3]]
    adjusted_y = [int(y * target_size[1] / image.shape[0]) for y in kp["keypoints"][1::3]]
    adjusted_keypoints.extend([x, y, v] for x, y, v in zip(adjusted_x, adjusted_y, kp["keypoints"][2::3]))

# Save the resized image
cv2.imwrite(save_path, resized_image)

# Save the adjusted keypoints
with open(keypoints_path, "w") as f:
    json.dump(adjusted_keypoints, f)

In [None]:
image_info["file_name"]

In [None]:
'000000000139.jpg'

In [None]:
import cv2
import matplotlib.pyplot as plt

# Keypoint data
keypoints = [
    [0.686445, 0.531960, 0.082891],
    [0.323967, 0.667188, 0.399061],
    [1.000000, 0.670312, 0.396714],
    [2.000000, 0.000000, 0.000000],
    [0.678125, 0.394366, 2.000000],
    [0.000000, 0.000000, 0.000000],
    [0.689063, 0.415493, 2.000000],
    [0.696875, 0.415493, 2.000000],
    [0.682813, 0.469484, 2.000000],
    [0.671875, 0.483568, 2.000000],
    [0.671875, 0.516432, 2.000000],
    [0.656250, 0.504695, 2.000000],
    [0.695312, 0.530516, 2.000000],
    [0.706250, 0.523474, 2.000000],
    [0.698438, 0.610329, 2.000000],
    [0.709375, 0.603286, 2.000000],
    [0.710938, 0.680751, 2.000000],
    [0.717187, 0.671362, 2.000000]
]

# Load the image
image_path = save_directory + '000000000139.jpg'
image = cv2.imread(image_path)

# Display the image
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

# Plot the keypoints on the image
for x, y, _ in keypoints:
    # Convert the relative coordinates to image coordinates
    img_x = int(x * image.shape[1])
    img_y = int(y * image.shape[0])
    
    # Draw a circle to represent the keypoint
    cv2.circle(image, (img_x, img_y), 5, (0, 255, 0), -1)

# Show the image with keypoints
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()


In [None]:
import cv2
import matplotlib.pyplot as plt

# Keypoint data
keypoints = [
    "class_name",
    0.686445, 0.531960, 0.082891, 0.323967, 0.667188, 0.399061,
    1.000000, 0.670312, 0.396714, 2.000000, 0.000000, 0.000000, 0.000000,
    0.678125, 0.394366, 2.000000, 0.000000, 0.000000, 0.000000,
    0.689063, 0.415493, 2.000000, 0.696875, 0.415493, 2.000000,
    0.682813, 0.469484, 2.000000, 0.671875, 0.483568, 2.000000,
    0.671875, 0.516432, 2.000000, 0.656250, 0.504695, 2.000000,
    0.695312, 0.530516, 2.000000, 0.706250, 0.523474, 2.000000,
    0.698438, 0.610329, 2.000000, 0.709375, 0.603286, 2.000000,
    0.710938, 0.680751, 2.000000, 0.717187, 0.671362, 2.000000
]

# Load the image
image_path = save_directory + '000000000139.jpg'
image = cv2.imread(image_path)

# Display the image
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

# Extract bounding box labels
class_name = keypoints[0]
bbox_labels = keypoints[1:5]
x_min, y_min, x_max, y_max = bbox_labels

# Draw the bounding box
image_h, image_w, _ = image.shape
x_min_abs = int(x_min * image_w)
y_min_abs = int(y_min * image_h)
x_max_abs = int(x_max * image_w)
y_max_abs = int(y_max * image_h)
cv2.rectangle(image, (x_min_abs, y_min_abs), (x_max_abs, y_max_abs), (0, 255, 0), 2)

# Plot the keypoints on the image
for i in range(5, len(keypoints), 3):
    x, y, v = keypoints[i:i+3]
    
    if v == 0:
        continue  # Skip keypoints with visibility 0
    
    # Convert the relative coordinates to image coordinates
    img_x = int(x * image_w)
    img_y = int(y * image_h)
    
    # Draw a circle to represent the keypoint
    cv2.circle(image, (img_x, img_y), 5, (0, 0, 255), -1)

# Show the image with keypoints
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()


In [None]:
print(x_min_abs)

In [None]:
save_directory + image_info["file_name"]

In [None]:
coco.loadImgs(image_ids[0])[0]

In [None]:
image_ids = []
for ann_id in coco.anns:
    ann = coco.anns[ann_id]
    if "keypoints" in ann and ann["keypoints"]:
        image_ids.append(ann["image_id"])

In [None]:
coco.anns[183126]['keypoints']

In [None]:
coco.anns[183126]['bbox']

In [None]:
coco.anns[183126]['num_keypoints']

# COCO to YOLO V8 annotations

In [None]:
import os
import json

# Path to COCO annotation file
coco_annotation_file = 'path/to/your/coco/annotations.json'

# Output directory for label files
out_dir = 'path/to/output/label/files'
os.makedirs(out_dir, exist_ok=True)

# Load COCO annotations
with open(coco_annotation_file, 'r') as f:
    coco_data = json.load(f)

# Iterate over COCO annotations
for annotation in coco_data['annotations']:
    image_id = annotation['image_id']
    image_file_name = coco_data['images'][image_id]['file_name']
    image_width = coco_data['images'][image_id]['width']
    image_height = coco_data['images'][image_id]['height']
    keypoints = annotation['keypoints']

    # Create label file
    label_file_path = os.path.join(out_dir, os.path.splitext(image_file_name)[0] + '.txt')
    label_file = open(label_file_path, 'w')

    # Write class label and bounding box to label file
    label_file.write('0 ')

    bbox = annotation['bbox']
    x_center = (bbox[0] + bbox[2] / 2) / image_width
    y_center = (bbox[1] + bbox[3] / 2) / image_height
    width = bbox[2] / image_width
    height = bbox[3] / image_height

    label_file.write(f"{x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f} ")

    # Write keypoints to label file
    for i in range(0, len(keypoints), 3):
        x = keypoints[i] / image_width
        y = keypoints[i+1] / image_height
        v = keypoints[i+2]

        label_file.write(f"{x:.6f} {y:.6f} {v} ")

    label_file.close()

    print(f"Converted COCO keypoints to label file: {label_file_path}")
