# Importing Libraries

In [1]:
import json

# Organizing the Dataset

In [2]:
folder_path = 'Dataset/SSID_Annotations/'

In [3]:
def load_json(file_path, folder_path):

    file_path = folder_path + file_path
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [4]:
train_data = load_json('SSID_Train.json', folder_path)
validation_data = load_json('SSID_Validation.json', folder_path)
test_data = load_json('SSID_Test.json', folder_path)

In [5]:
def organize_data(data):
    organized_data = {}
    
    for annotation in data['annotations']:

        current_dict = annotation[0]
        album_id = current_dict['album_id']
        story_id = current_dict['story_id']
        
        if album_id not in organized_data:
            organized_data[album_id] = {}

        if story_id not in organized_data[album_id]:
            organized_data[album_id][story_id] = []

        organized_data[album_id][story_id].append({
            'image_id': current_dict['youtube_image_id'], 
            'storytext': current_dict['storytext'],       
            'image_order': current_dict['image_order']     
        })
    
    for album_id in organized_data:
        for story_id in organized_data[album_id]:
            organized_data[album_id][story_id].sort(key=lambda x: x['image_order'])
    
    return organized_data

In [6]:
train_organized_data = organize_data(train_data)
validation_organized_data = organize_data(validation_data)
test_organized_data = organize_data(test_data)

In [7]:
def display_album(organized_data, album_id):
    if album_id in organized_data:
        print(f"Album ID: {album_id}")
        for story_id, story_data in organized_data[album_id].items():
            print(f"\nStory ID: {story_id}")
            for entry in story_data:
                image_file = f"{entry['image_id']}.jpg"
                story_text = entry['storytext']
                print(f"Image: {image_file} - Story: {story_text}")
    else:
        print(f"Album ID {album_id} not found.")

In [8]:
display_album(train_organized_data, 10856)

Album ID: 10856

Story ID: 5887
Image: 2001.jpg - Story: Today I am in my friends house and he is very happy for me.
Image: 2002.jpg - Story: He is telling me about his car.
Image: 2003.jpg - Story: He is telling me about his car's feature.
Image: 2004.jpg - Story: The car color is white and it looks awesome and beautiful.
Image: 2005.jpg - Story: Now he is telling me about all doors and seats of the car.

Story ID: 5889
Image: 2001.jpg - Story: Today I am here to meet my friend.
Image: 2002.jpg - Story: He is a good guy showing me his car.
Image: 2003.jpg - Story: He is telling me about his car's features.
Image: 2004.jpg - Story: He opened the door of car.
Image: 2005.jpg - Story: He is telling me about doors and seats of the car.

Story ID: 5891
Image: 2001.jpg - Story: [Male] is standing in their [location].
Image: 2002.jpg - Story: [Male] steps to the side, revealing a [figure].
Image: 2003.jpg - Story: [Male] holds [figure] so that the [location] is seen up close.
Image: 2004.jpg

In [9]:
print(type(train_organized_data))

<class 'dict'>


In [10]:
# store the organized data in a json file
def store_json(data, file_path, folder_path):
    file_path = folder_path + file_path
    with open(file_path, 'w') as file:
        json.dump(data, file)

In [11]:
store_json(train_organized_data, 'SSID_Train_Organized.json', 'Dataset/Organized_Annotations/')

In [12]:
store_json(validation_organized_data, 'SSID_Validation_Organized.json', 'Dataset/Organized_Annotations/')

In [13]:
store_json(test_organized_data, 'SSID_Test_Organized.json', 'Dataset/Organized_Annotations/')