In [1]:
import os
import sys
import numpy as np
import pandas as pd
import sklearn as sk
from sklearn.model_selection import train_test_split

In [2]:
captions_path=os.path.join("data", "captions.txt")
image_captions = {}
with open(captions_path, "r") as f:
    for line in f:
        image_name, caption = line.strip().split(".jpg,")
        image_path = os.path.join("data", "Images", f"{image_name}.jpg")
        if image_path in image_captions:
            image_captions[image_path].append(caption)
        else:
            image_captions[image_path] = [caption]

In [3]:
image_captions

{'data\\Images\\1000268201_693b08cb0e.jpg': ['A child in a pink dress is climbing up a set of stairs in an entry way .',
  'A girl going into a wooden building .',
  'A little girl climbing into a wooden playhouse .',
  'A little girl climbing the stairs to her playhouse .',
  'A little girl in a pink dress going into a wooden cabin .'],
 'data\\Images\\1001773457_577c3a7d70.jpg': ['A black dog and a spotted dog are fighting',
  'A black dog and a tri-colored dog playing with each other on the road .',
  'A black dog and a white dog with brown spots are staring at each other in the street .',
  'Two dogs of different breeds looking at each other on the road .',
  'Two dogs on pavement moving toward each other .'],
 'data\\Images\\1002674143_1b742ab4b8.jpg': ['A little girl covered in paint sits in front of a painted rainbow with her hands in a bowl .',
  'A little girl is sitting in front of a large painted rainbow .',
  'A small girl in the grass plays with fingerpaints in front of a 

In [4]:
images = list(image_captions.keys())
captions = list(image_captions.values())
train_images, test_images, train_captions, test_captions = train_test_split(images, captions, test_size=0.2, random_state=42)            

In [5]:
train_images

['data\\Images\\3393152604_27bd1037f2.jpg',
 'data\\Images\\3549673305_4dfd44e04a.jpg',
 'data\\Images\\3181328245_7c04ce1691.jpg',
 'data\\Images\\2393410666_b8c20fff61.jpg',
 'data\\Images\\239807547_4923efc821.jpg',
 'data\\Images\\1388373425_3c72b56639.jpg',
 'data\\Images\\3561543598_3c1b572f9b.jpg',
 'data\\Images\\3477369101_8e0c61d8f4.jpg',
 'data\\Images\\3564385317_1bf5094068.jpg',
 'data\\Images\\2766926202_4201bf2bf9.jpg',
 'data\\Images\\3394750987_a32ecc477e.jpg',
 'data\\Images\\441921713_1cafc7d7d2.jpg',
 'data\\Images\\2403376030_903521c371.jpg',
 'data\\Images\\2635905544_dbc65d0622.jpg',
 'data\\Images\\270809922_043e3bef06.jpg',
 'data\\Images\\597543181_6a85ef4c17.jpg',
 'data\\Images\\3482974845_db4f16befa.jpg',
 'data\\Images\\2874728371_ccd6db87f3.jpg',
 'data\\Images\\2913965136_2d00136697.jpg',
 'data\\Images\\241347689_d0b1ac297d.jpg',
 'data\\Images\\2272750492_91e8f67328.jpg',
 'data\\Images\\197504190_fd1fc3d4b7.jpg',
 'data\\Images\\416960865_048fd3f294.j

In [6]:
train_data = dict(zip(train_images, train_captions))

# Convert test set back into dictionary
test_data = dict(zip(test_images, test_captions))

In [7]:
train_data

{'data\\Images\\3393152604_27bd1037f2.jpg': ['A bulldog jumps over a log next to a stream .',
  'A dog is pawing at a fallen log nearby a stream in an area with pinestraw .',
  'Black and white dog grabs tree limb',
  'The black and white dog jumps by a log near a creek .',
  'The dog plays with the large log .'],
 'data\\Images\\3549673305_4dfd44e04a.jpg': ['A couple kissing on the neck on a busy street .',
  'A man embraces a woman on a crowded street .',
  'A man in a leather jacket with the collar popped kisses the neck of a woman in a gray coat wearing a white scarf .',
  'Man kisses girl on neck',
  "Man kissing a woman 's neck on a busy sidewalk ."],
 'data\\Images\\3181328245_7c04ce1691.jpg': ['Two babies are sitting in a play mat .',
  'two babies playing inside of a colorful padded enclosure .',
  'Two children sit inside a play area .',
  'Two toddlers are sitting in a colorful playpen .',
  'Two toddlers are sitting in a plastic playpen with a blue floor .'],
 'data\\Images