Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
sanjayss34 committed Jun 6, 2023
1 parent 59273f6 commit 1b783a9
Show file tree
Hide file tree
Showing 37 changed files with 2,609 additions and 423 deletions.
320 changes: 13 additions & 307 deletions README.md

Large diffs are not rendered by default.

312 changes: 312 additions & 0 deletions README_lavis.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions covr_examples50.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions covr_programs_and_questions50b.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions covr_programs_and_questions50b_find.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions gqa_examples50.json

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions gqa_preamble_simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""Write Python code to answer the questions about each image."""
# Global constants
# min x coordinate
LEFT = 0
# min y coordinate
BOTTOM = 0
# max x coordinate
RIGHT = 24
# max y coordinate
TOP = 24
from PIL import Image
from utils import open_image, query, get_pos

"""
API Reference:
open_image(path: str) -> Image - opens the image at the path and returns it as an Image object
query(img: Image, question: str) -> str - queries the image returns an answer to the question
get_pos(img: Image, object: str) -> (float, float) - returns the position of the object in the image
"""
1 change: 1 addition & 0 deletions gqa_programs_and_questions_nocomments3b.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"14199179": {"program": "answer = query(img, \"Which type of clothing is not white?\")", "question": "Which type of clothing is not white?"}, "10956934": {"program": "answer = query(img, \"What animal is in front of the tree?\")", "question": "What animal is in front of the tree?"}, "2763807": {"program": "fan_color = query(img, \"What color is the fan?\")\nanswer = query(img, \"What type of appliance is \"+fan_color)", "question": "What type of appliance has the same color as the fan?"}, "10419734": {"program": "girl_pos_x, girl_pos_y = get_pos(img, \"girl\")\nwhite_chair_pos_x, white_chair_pos_y = get_pos(img, \"white chair\")\nif white_chair_pos_x < girl_pos_x:\n answer = \"left\"\nelse:\n answer = \"right\"", "question": "Is the white chair to the right or to the left of the girl that is wearing a shirt?"}, "15594542": {"program": "bag_exists = query(img, \"Is there a bag?\")\nanswer = \"no\"\nif bag_exists == \"yes\":\n garbage_bin_pos_x, garbage_bin_pos_y = get_pos(img, \"garbage bin\")\n bag_pos_x, bag_pos_y = get_pos(img, \"bag\")\n if garbage_bin_pos_x > bag_pos_x:\n answer = \"yes\"", "question": "Is the garbage bin to the right of a bag?"}, "12797924": {"program": "answer = query(img, \"What is the name of the pink article of clothing?\")", "question": "What is the name of the pink article of clothing?"}, "14433986": {"program": "answer = query(img, \"Which kind of appliance is to the left of the bottle?\")", "question": "Which kind of appliance is to the left of the bottle?"}, "12301208": {"program": "answer = query(img, \"What is on the motorbike?\")", "question": "What is on the motorbike?"}, "15802315": {"program": "is_tall = query(img, \"Is the standing giraffe tall?\")\nis_brown = query(img, \"Is the standing giraffe brown?\")\nif is_tall == \"yes\" and is_brown == \"yes\":\n answer = \"yes\"\nelse:\n answer = \"no\"", "question": "Is the giraffe that is standing tall and brown?"}, "943692": {"program": "answer = query(img, \"What animal is leaving the water?\")", "question": "What animal is leaving the water?"}, "1063809": {"program": "answer = query(img, \"Who is in front of the bench that the bottle is to the right of?\")", "question": "Who is in front of the bench that the bottle is to the right of?"}, "10830891": {"program": "answer = query(img, \"Do the animals have the same species?\")", "question": "Do the animals have the same species?"}, "1734640": {"program": "answer = query(img, \"What is the candle on?\")", "question": "What is the candle on?"}, "4662590": {"program": "answer = query(img, \"Is the girl to the left of the tennis racket wearing a coat?\")", "question": "Is the girl to the left of the tennis racket wearing a coat?"}, "5244007": {"program": "answer = query(img, \"What is the animal that is inside the bag?\")", "question": "What is the animal that is inside the bag?"}, "4745123": {"program": "skateboard_pos_x, skateboard_pos_y = get_pos(img, \"skateboard\")\nif skateboard_pos_x < (LEFT+RIGHT)/2:\n answer = \"left\"\nelse:\n answer = \"right\"", "question": "On which side of the picture is the skateboard?"}, "15506285": {"program": "answer = query(img, \"Who is holding the camera?\")", "question": "Who is holding the camera?"}, "12927943": {"program": "answer = query(img, \"Do you see any statues near the boy in front of the pole?\")", "question": "Do you see any statues near the boy in front of the pole?"}, "9842755": {"program": "is_silver = query(img, \"Does the bench look silver?\")\nis_metallic = query(img, \"Does the bench look metallic?\")\nif is_silver == \"yes\" and is_metallic == \"yes\":\n answer = \"yes\"\nelse:\n answer = \"no\"", "question": "Does the bench look silver and metallic?"}, "12241876": {"program": "answer = query(img, \"Which kind of furniture is made of wood, the chair or the desk?\")", "question": "Which kind of furniture is made of wood, the chair or the desk?"}, "14623401": {"program": "answer = query(img, \"What is the gray animal leaning on?\")", "question": "What is the gray animal leaning on?"}, "11652967": {"program": "answer = query(img, \"What do you think are the flowers in front of?\")", "question": "What do you think are the flowers in front of?"}, "171036825": {"program": "answer = query(img, \"Who is drinking the wine?\")", "question": "Who is drinking the wine?"}, "9211111": {"program": "answer = query(img, \"Which kind of furniture is the girl lying on?\")", "question": "Which kind of furniture is the girl lying on?"}, "1162307": {"program": "answer = query(img, \"Who wears a shirt?\")", "question": "Who wears a shirt?"}, "13534236": {"program": "answer = query(img, \"Which place is it?\")", "question": "Which place is it?"}, "1536201": {"program": "answer = query(img, \"What is on the wall that looks blue?\")", "question": "What is on the wall that looks blue?"}, "12214327": {"program": "answer = query(img, \"What's in front of the mountains?\")", "question": "What's in front of the mountains?"}, "10612003": {"program": "answer = query(img, \"Which kind of animal is it?\")", "question": "Which kind of animal is it?"}, "7660022": {"program": "answer = query(img, \"What color is the dish of the food?\")", "question": "What color is the dish of the food?"}, "11281645": {"program": "answer = query(img, \"Is there a horse in the tall grass?\")", "question": "Is there a horse in the tall grass?"}, "12262825": {"program": "rug_pos_x, rug_pos_y = get_pos(img, \"rug\")\nif rug_pos_x < (LEFT+RIGHT)/2:\n answer = \"left\"\nelse:\n answer = \"right\"", "question": "On which side of the picture is the rug?"}, "7702895": {"program": "answer = query(img, \"What kind of toy is to the right of the baby animal?\")", "question": "What kind of toy is to the right of the baby animal?"}, "19903394": {"program": "answer = query(img, \"Are there kids in the picture?\")", "question": "Are there kids in the picture?"}, "11645335": {"program": "giraffes_exist = query(img, \"Are there any giraffes?\")\nzebras_exist = query(img, \"Are there any zebras?\")\nif giraffes_exist == \"yes\" or zebras_exist == \"yes\":\n answer = \"yes\"\nelse:\n answer = \"no\"", "question": "Are there either any giraffes or zebras in this picture?"}, "15981437": {"program": "bird_pos_x, bird_pos_y = get_pos(img, \"large bird\")\nif bird_pos_y < (BOTTOM+TOP)/2:\n answer = \"bottom\"\nelse:\n answer = \"top\"", "question": "In which part of the picture is the large bird, the bottom or the top?"}, "5791382": {"program": "answer = query(img, \"What is the woman that is standing wearing?\")", "question": "What is the woman that is standing wearing?"}, "17718224": {"program": "lettuce_pos_x, lettuce_pos_y = get_pos(img, \"lettuce\")\nsmall_food_pos_x, small_food_pos_y = get_pos(img, \"small food\")\nif lettuce_pos_x < small_food_pos_x:\n answer = \"left\"\nelse:\n answer = \"right\"", "question": "Is the lettuce to the right or to the left of the small food?"}, "17914622": {"program": "answer = query(img, \"Is it an outdoors scene?\")", "question": "Is it an outdoors scene?"}, "5635646": {"program": "answer = query(img, \"What is the plate on?\")", "question": "What is the plate on?"}, "13267130": {"program": "answer = query(img, \"Does the woman wear a hat?\")", "question": "Does the woman wear a hat?"}, "1181747": {"program": "answer = query(img, \"What animal is leaning on the grass?\")", "question": "What animal is leaning on the grass?"}, "11966658": {"program": "answer = query(img, \"What type of furniture is the tissue box on?\")", "question": "What type of furniture is the tissue box on?"}, "14475110": {"program": "answer = query(img, \"Do you see pizzas next to the plate that is on the table?\")", "question": "Do you see pizzas next to the plate that is on the table?"}, "2554156": {"program": "answer = query(img, \"Who is holding the racket?\")", "question": "Who is holding the racket?"}, "17112911": {"program": "answer = query(img, \"Do you see any meat on the cooked food?\")", "question": "Do you see any meat on the cooked food?"}, "71014038": {"program": "answer = query(img, \"What is the graffiti on?\")", "question": "What is the graffiti on?"}, "166365": {"program": "answer = query(img, \"Are there any keyboards in front of the device that is to the left of the pens?\")", "question": "Are there any keyboards in front of the device that is to the left of the pens?"}, "1872253": {"program": "answer = query(img, \"What is the vehicle that the man is driving called?\")", "question": "What is the vehicle that the man is driving called?"}, "15843084": {"program": "answer = query(img, \"What is inside the train?\")", "question": "What is inside the train?"}}
30 changes: 30 additions & 0 deletions lavis/configs/datasets/covr/nlvr2_incontext50.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
covr:
# data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features]

build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing
annotations:
train:
url:
- /shared/sanjayss/nlvr2/incontext50.json
storage:
- nlvr2/annotations/incontext50.json
val:
url:
- /shared/sanjayss/nlvr2/incontext50.json
storage:
- nlvr2/annotations/incontext50.json
test:
url:
- /shared/sanjayss/nlvr2/incontext50.json
storage:
- nlvr2/annotations/incontext50.json
images:
storage: nlvr2/images/
30 changes: 30 additions & 0 deletions lavis/configs/datasets/covr/nlvr2_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
covr:
# data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features]

build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing
annotations:
train:
url:
- /shared/sanjayss/nlvr2/incontext50.json
storage:
- nlvr2/annotations/incontext50.json
val:
url:
- /shared/sanjayss/nlvr2/test1.json
storage:
- nlvr2/annotations/test1.json
test:
url:
- /shared/sanjayss/nlvr2/test1.json
storage:
- nlvr2/annotations/test1.json
images:
storage: nlvr2/images/
30 changes: 30 additions & 0 deletions lavis/configs/datasets/covr/nlvr2_train2000.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
covr:
# data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features]

build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing
annotations:
train:
url:
- /shared/sanjayss/nlvr2/incontext50.json
storage:
- nlvr2/annotations/incontext50.json
val:
url:
- /shared/sanjayss/nlvr2/train_sample2000.json
storage:
- nlvr2/annotations/train_sample2000.json
test:
url:
- /shared/sanjayss/nlvr2/train_sample2000.json
storage:
- nlvr2/annotations/train_sample2000.json
images:
storage: nlvr2/images/
30 changes: 30 additions & 0 deletions lavis/configs/datasets/covr/sample1000.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
covr:
# data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features]

build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing
annotations:
train:
url:
- /shared/sanjayss/covr/train_sample100.json
storage:
- covr/annotations/incontext100.json
val:
url:
- /shared/sanjayss/covr/val_sample1000.json
storage:
- covr/annotations/val_sample1000.json
test:
url:
- /shared/sanjayss/covr/val_sample1000.json
storage:
- covr/annotations/val_sample1000.json
images:
storage: covr/images/
30 changes: 30 additions & 0 deletions lavis/configs/datasets/covr/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
covr:
# data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features]

build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing
annotations:
train:
url:
- /shared/sanjayss/covr/train_sample100.json
storage:
- covr/annotations/incontext100.json
val:
url:
- /shared/sanjayss/covr/test.json
storage:
- covr/annotations/test.json
testdev:
url:
- /shared/sanjayss/covr/test.json
storage:
- covr/annotations/test.json
images:
storage: covr/images/
30 changes: 30 additions & 0 deletions lavis/configs/datasets/covr/test_paraphrased.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
covr:
# data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features]

build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing
annotations:
train:
url:
- /shared/sanjayss/covr/train_sample100.json
storage:
- covr/annotations/incontext100.json
val:
url:
- /shared/sanjayss/covr/test_paraphrased.json
storage:
- covr/annotations/test_paraphrased.json
test_paraphraseddev:
url:
- /shared/sanjayss/covr/test_paraphrased.json
storage:
- covr/annotations/test_paraphrased.json
images:
storage: covr/images/
30 changes: 30 additions & 0 deletions lavis/configs/datasets/covr/testdev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
covr:
# data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features]

build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing
annotations:
train:
url:
- /shared/sanjayss/covr/train_sample100.json
storage:
- covr/annotations/incontext100.json
val:
url:
- /shared/sanjayss/covr/testdev.json
storage:
- covr/annotations/testdev.json
testdev:
url:
- /shared/sanjayss/covr/testdev.json
storage:
- covr/annotations/testdev.json
images:
storage: covr/images/
30 changes: 30 additions & 0 deletions lavis/configs/datasets/covr/testdev_paraphrased.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
covr:
# data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features]

build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing
annotations:
train:
url:
- /shared/sanjayss/covr/train_sample100.json
storage:
- covr/annotations/incontext100.json
val:
url:
- /shared/sanjayss/covr/testdev_paraphrased.json
storage:
- covr/annotations/testdev_paraphrased.json
testdev:
url:
- /shared/sanjayss/covr/testdev_paraphrased.json
storage:
- covr/annotations/testdev_paraphrased.json
images:
storage: covr/images/
33 changes: 33 additions & 0 deletions lavis/configs/datasets/gqa/sample200.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
gqa:
# data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features]

build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing
annotations:
train:
url:
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/gqa/train_balanced_questions.json
- /shared/sanjayss/gqa/train_balanced_questions_shuffled.json
storage:
- gqa/annotations/train_balanced_questions.json
val:
url:
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/gqa/val_balanced_questions.json
- /shared/sanjayss/gqa/train_balanced_questions_sample200.json
storage:
- gqa/annotations/train_balanced_questions_sample200.json
test:
url:
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/gqa/test_balanced_questions.json
- /shared/sanjayss/gqa/val_balanced_questions_shuffled.json
storage:
- gqa/annotations/val_balanced_questions.json
images:
storage: gqa/images/
33 changes: 33 additions & 0 deletions lavis/configs/datasets/gqa/sample2000.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause

datasets:
gqa:
# data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features]

build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing
annotations:
train:
url:
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/gqa/train_balanced_questions.json
- /shared/sanjayss/gqa/train_balanced_questions_shuffled.json
storage:
- gqa/annotations/train_balanced_questions.json
val:
url:
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/gqa/val_balanced_questions.json
- /shared/sanjayss/gqa/val_balanced_questions_sample2000.json
storage:
- gqa/annotations/val_balanced_questions_sample2000.json
test:
url:
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/gqa/test_balanced_questions.json
- /shared/sanjayss/gqa/val_balanced_questions_shuffled.json
storage:
- gqa/annotations/val_balanced_questions.json
images:
storage: gqa/images/
Loading

0 comments on commit 1b783a9

Please sign in to comment.