In [1]:
!pip install transformers

[0m

In [2]:
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import numpy as np
from enum import IntEnum
from abc import ABC, abstractmethod
import random
import math

In [3]:
import requests
import torch
from transformers import CLIPProcessor, CLIPModel
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize

In [4]:
class Action(IntEnum):
    UP = 3
    DOWN = 1
    RIGHT = 0
    LEFT = 2

In [5]:
COLORS = {
    "red": '#FF000000',
    "green": '#00FF00',
    "blue": '#0000FF',
    "purple": '#A020F0',
    "yellow": '#FFFF00',
    "grey": '#808080',
}

# Map of object type to integers
OBJECT_TO_IDX = {
    "empty": 0,
    "agent": 1,
    "object":2,
    "goal": 8,
}

IDX_TO_OBJECT = {v: k for k, v in OBJECT_TO_IDX.items()}
HEX_TO_COLOR = {v: k for k, v in COLORS.items()}

SHAPES_TO_IDX = {
    'triangle' : 0,
    'circle' : 1,
    'square' : 2,
    'rectangle' : 3
}

# Map of agent direction indices to vectors
DIR_TO_VEC = [
    # Right (positive X)
    np.array((1, 0)),
    # Down (positive Y)
    np.array((0, 1)),
    # Left (negative X)
    np.array((-1, 0)),
    # Up (negative Y)
    np.array((0, -1)),
]

In [6]:
class Shape:
    '''
        Attributes
        ----------
        distance_from_cooridinates : int
             How far the center should be from the points
        color : str
            A hex representation of the color that triangle should be
        fill : str
            Whether the object should be filled or not

        Methods
        -------
        init(self, size=int, color=str, fill=Bool, sound=None)
            Initializes the class
        draw(self, world=ImageDraw, x=int, y=int)
            Draws the triangle to the given Image at the x,y coordinates
        get_coordinates(self, x=int, y=int)
            Given a centerpoint, calculates the points of the triangle
    '''

    def __init__(self, size, color, fill=False, initial_loc=(-1, -1)):
        self.distance_from_coordinate = (size / 2)
        self.color = color
        self.fill = fill
        self.row, self.col = initial_loc

    @abstractmethod
    def draw(self, world, x, y):
        pass

    @abstractmethod
    def get_coordinates(self, x, y):
        pass


class Triangle(Shape):
    def __init__(self, size, color, fill=False, initial_loc=(-1, -1)):
        Shape.__init__(self, size, color, fill, initial_loc)
        self.shape = "triangle"

    # Draw a triangle around this point
    def draw(self, world, x, y):
        ImageDraw.ink = self.color
        bottom_left, bottom_right, top = self.get_coordinates(x, y)
        world.polygon([bottom_left, bottom_right, top], self.color, outline="black")

    def get_coordinates(self, x, y):
        x1 = x - self.distance_from_coordinate
        x2 = x + self.distance_from_coordinate
        x3 = x
        y1 = y + self.distance_from_coordinate
        y2 = y + self.distance_from_coordinate
        y3 = y - self.distance_from_coordinate

        return (x1, y1), (x2, y2), (x3, y3)


class Rectangle(Shape):
    def __init__(self, size, color, fill=False, initial_loc=(-1, -1)):
        Shape.__init__(self, size, color, fill, initial_loc)
        self.shape = "rectangle"

    # Draw a triangle around this point
    def draw(self, world, x, y):
        ImageDraw.ink = self.color
        top_left, bottom_right = self.get_coordinates(x, y)

        world.rectangle([top_left, bottom_right], self.color, outline="black")

    def get_coordinates(self, x, y):
        height = self.distance_from_coordinate / 2
        x1, y1 = x - self.distance_from_coordinate, y - height
        x2, y2 = x + self.distance_from_coordinate, y + height
        return (x1, y1), (x2, y2)


class Square(Shape):
    def __init__(self, size, color, fill=False, initial_loc=(-1, -1)):
        Shape.__init__(self, size, color, fill, initial_loc)
        self.shape = "square"

    def draw(self, world, x, y):
        ImageDraw.ink = self.color
        top_left, bottom_right = self.get_coordinates(x, y)
        world.rectangle([top_left, bottom_right], self.color, outline="black")

    def get_coordinates(self, x, y):
        x1, y1 = x - self.distance_from_coordinate, y - self.distance_from_coordinate
        x2, y2 = x + self.distance_from_coordinate, y + self.distance_from_coordinate
        return (x1, y1), (x2, y2)


class Circle(Shape):
    def __init__(self, size, color, fill=False, initial_loc=(-1, -1)):
        Shape.__init__(self, size, color, fill, initial_loc)
        self.shape = "circle"

    def draw(self, world, x, y):
        ImageDraw.ink = self.color
        bottom_left, top_right = self.get_coordinates(x, y)
        world.ellipse([bottom_left, top_right], fill=self.color, outline="black")

    def get_coordinates(self, x, y):
        x1, y1 = x - self.distance_from_coordinate, y - self.distance_from_coordinate
        x2, y2 = x + self.distance_from_coordinate, y + self.distance_from_coordinate
        return (x1, y1), (x2, y2)


class Goal:
    def __init__(self, world=None, starting_loc=(-1, -1), shape=Square, color='#FFFF00', grid_size = 5):
        self.world = world
        self.row, self.col = starting_loc
        self.shape = shape(grid_size / 2, color)

    def __str__(self):
        location = "Location: (" + str(self.row) + "," + str(self.col) + ") "
        obj = self.get_name()
        return location + obj

    def get_name(self):
        return HEX_TO_COLOR[self.shape.color] + " " + self.shape.shape
    
    def get_color(self):
        return HEX_TO_COLOR[self.shape.color]
    
    def get_shape(self):
        return self.shape.shape
    
    def get_location(self):
        return np.asarray([self.row, self.col])

    def update_loc(self, row, col):
        self.row = row
        self.col = col

    def draw(self, world, x, y):
        self.shape.draw(world, x, y)

    def update_world_reference(self, world):
        self.world = world
        

SHAPES = {
    "triangle": Triangle,
    "square": Square,
    "circle": Circle,
    "rectangle": Rectangle
}


class Agent:
    def __init__(self, world=None, starting_loc=(-1, -1), shape=Triangle, color='#FFFF00', grid_size = 5):
        self.world = world
        self.row, self.col = starting_loc
        self.grid_size = grid_size
        self.shape = shape(grid_size / 2, color)

    def __str__(self):
        location = "Location: (" + str(self.row) + "," + str(self.col) + ") "
        obj = self.get_name()
        return location + obj

    def get_name(self):
        return HEX_TO_COLOR[self.shape.color] + " " + self.shape.shape
    
    def get_color(self):
        return HEX_TO_COLOR[self.shape.color]
    
    def get_shape(self):
        return self.shape.shape

    def update_loc(self, row, col):
        self.row = row
        self.col = col

    def draw(self, world, x, y):
        self.shape.draw(world, x, y)

    # 0 - Right, 1 - Down, 2 - Left, 3 - Up
    def move(self, direction):
        x_change, y_change = DIR_TO_VEC[direction]
        new_row, new_col = self.row + x_change, self.col + y_change

        if self.world.space_can_be_visited(new_row, new_col):
            self.update_loc(new_row, new_col)
            self.world.update_agent_pos(new_row, new_col)

    def update_world_reference(self, world):
        self.world = world
    
    def get_location(self):
        return np.asarray([self.row, self.col])

class GridWorld:
    """
            This acts as our 2d world. Will handle logic of keeping objects in place

    """
    def __init__(self, size, background_color='#FFFFFF', agent = None, goal = None, max_obstacles = 0, grid_size=5):
        self.image = None
        self.pencil = None
        self.size = size
        self.grid_size = grid_size
        self.x_offset = int(abs((WIDTH - size * grid_size) / 2))
        self.y_offset = int(abs((HEIGHT - size * grid_size) / 2))

        self.world = np.empty(shape=(size, size), dtype=object)

        if agent is None:
            self.agent = Agent(self)
        else:
            self.agent = agent
            self.agent.update_world_reference(self)

        if goal is None:
            self.goal = Goal(self)
        else:
            self.goal = goal
            self.goal.update_world_reference(self)


        max_obstacles = min(max_obstacles, math.floor((self.size ** 2) / 3))
        self.total_obstacles = random.randint(0, max_obstacles)
        
        self.objective_full_connected = "The " + self.agent.get_name() + " is connected to the " + self.goal.get_name()
        self.objective_full_close = "The " + self.agent.get_name() + " is close to the " + self.goal.get_name()
        
        self.objective_shape_connected = "The " + self.agent.get_shape() + " is connected to the " + self.goal.get_shape()
        self.objective_shape_close = "The " + self.agent.get_shape() + " is close to the " + self.goal.get_shape()
        
        self.objective_color_connected = "The " + self.agent.get_color() + " shape is connected to the " + self.goal.get_color()+" shape"
        self.objective_color_close = "The " + self.agent.get_color() + " shape is close to the " + self.goal.get_color()+" shape"
        
        self.__setup_initial_locations()

        if DEBUG:
            print(self.agent)
            print(self.goal)
            
    def __setup_initial_locations(self):
        # Makes min objects 1, so goal can be placed
        total_objects = self.total_obstacles
        agent_row, agent_col = self.__find_free_space('agent')
        self.agent.update_loc(agent_row, agent_col)

        goal_row, goal_col = self.__find_free_space('goal')
        self.goal.update_loc(goal_row, goal_col)

        while total_objects > 0:
            row, col = self.__find_free_space('object')
            if row == -1 and col == -1:
                break
            else:
                total_objects -= 1
                
    def get_objectives(self):
        return [self.objective_full_connected, self.objective_full_close,self.objective_shape_connected,self.objective_shape_close,self.objective_color_connected,self.objective_color_close ]

    def is_goal_reached(self):
        if(self.agent.row == self.goal.row and self.agent.col == self.goal.col):
            return True
        return False

    def display_image(self):
        self.draw()
        plt.imshow(self.image)

    def save_image(self):
        self.draw()
        uuid_string = uuid.uuid4().hex.upper()[0:6]
        filename = time.strftime("%Y%m%d-%H%M%S")
        self.image.save(DIR + uuid_string + "-" + filename + '.png')
        
    def get_world_image(self):
        self.draw()
        return self.image

    # Draws the individual grids, as well as the objects contained in the underlying array
    def draw(self):
        self.refresh_canvas()
        step_size = self.grid_size

        y_start = self.y_offset
        y_end = self.y_offset + (step_size * self.size)
        x_start = self.x_offset
        x_end = self.x_offset + (step_size * self.size)

        for x in range(y_start, y_end + 1, step_size):
            line = ((x, y_start), (x, y_end))
            self.pencil.line(line, fill=128)

        for y in range(x_start, x_end + 1, step_size):
            line = ((x_start, y), (x_end, y))
            self.pencil.line(line, fill=128)
        self.draw_objects()

        return step_size

    # Draws the objects in the associated array
    def draw_objects(self):
        for col in range(0, self.size):
            for row in range(0, self.size):
                world_object = self.world[row, col]

                if world_object is not None:
                    if isinstance(world_object, Agent):
                        x, y = self.get_coordinates_from_array_loc(row, col)
                        self.agent.draw(self.pencil, x, y)
                    elif isinstance(world_object, Goal):
                        x, y = self.get_coordinates_from_array_loc(row, col)
                        self.goal.draw(self.pencil, x, y)
                    else:
                        x, y = self.get_coordinates_from_array_loc(row, col)
                        world_object.draw(self.pencil, x, y)

    def get_coordinates_from_array_loc(self, row, col):
        centroid = self.grid_size / 2
        x = centroid + self.x_offset + (self.grid_size * row)
        y = centroid + self.y_offset + (self.grid_size * col)
        return x, y

    def __find_free_space(self, object_to_place):
        #print("Placing " + object_to_place)
        count = 0
        row, col = None, None
        while row is None and col is None or self.world[row, col] is not None:
            if count > self.size:
                return -1, -1
            row = random.randint(0, self.world.shape[0] - 1)
            col = random.randint(0, self.world.shape[1] - 1)
            count += 1

        if object_to_place == 'object':
            shape, color = get_random_unique_shape_and_color(self)
            self.world[row, col] = SHAPES[shape](self.grid_size / 2, color)
        elif object_to_place == 'agent':
            self.world[row, col] = self.agent
        elif object_to_place == 'goal':
            self.world[row, col] = self.goal

        return row, col

    def space_can_be_visited(self, row, col):
        if (row < self.size and col < self.size) and (row >= 0 and col >= 0) \
                and (self.world[row, col] is None or isinstance(self.world[row, col], Goal)):
            return True
        else:
            return False
        
    def can_move(self, row, col):
        if (row < self.size and col < self.size) and (row >= 0 and col >= 0) \
                and (self.world[row, col] is None) :
            return True
        else:
            return False

    def update_agent_pos(self, row, col):
        self.agent.update_loc(row,col)
        for row in range(0, len(self.world)):
            for col in range(0, len(self.world[row])):
                if isinstance(self.world[row, col], Agent):
                    self.world[row, col] = None
        self.world[self.agent.row, self.agent.col] = self.agent
    
    def update_goal_pos(self, row, col):
        self.goal.update_loc(row,col)
        for row in range(0, len(self.world)):
            for col in range(0, len(self.world[row])):
                if isinstance(self.world[row, col], Goal):
                    self.world[row, col] = None
        self.world[self.goal.row, self.goal.col] = self.goal

    def refresh_canvas(self):
        if self.pencil is not None:
            del self.image
            del self.pencil
        image = Image.new(mode='RGB', size=(WIDTH, HEIGHT), color=(255, 255, 255, 0))
        self.pencil = ImageDraw.Draw(image)
        self.image = image


def get_random_shape():
    return random.sample(list(SHAPES), 1)[0]


def get_random_color():
    return random.sample(list(COLORS), 1)[0]


def get_random_unique_shape_and_color(world):
    agent_shape = world.agent.shape.shape
    agent_color = world.agent.shape.color
    goal_shape = world.goal.shape.shape
    goal_color = world.goal.shape.color

    random_shape = None
    random_color = None
    # Keep going until you have a unique shape/color that is not an agent or goal
    while (random_shape is None and random_color is None) or \
            (random_shape == agent_shape and random_color == agent_color) or \
            (random_shape == goal_shape and random_color == goal_color):
        random_shape = get_random_shape()
        random_color = get_random_color()

    return random_shape, random_color

In [7]:
COLORS = {
    "red": '#FF000000',
    "green": '#00FF00',
    "blue": '#0000FF',
    "purple": '#A020F0',
    "yellow": '#FFFF00',
    "grey": '#808080',
}

SHAPES = {
    "triangle": Triangle,
    "square": Square,
    "circle": Circle,
    "rectangle": Rectangle
}

# Map of object type to integers
OBJECT_TO_IDX = {
    "empty": 0,
    "agent": 1,
    "object":2,
    "goal": 8,
}

IDX_TO_OBJECT = {v: k for k, v in OBJECT_TO_IDX.items()}
HEX_TO_COLOR = {v: k for k, v in COLORS.items()}

# Map of agent direction indices to vectors
DIR_TO_VEC = [
    # Right (positive X)
    np.array((1, 0)),
    # Down (positive Y)
    np.array((0, 1)),
    # Left (negative X)
    np.array((-1, 0)),
    # Up (negative Y)
    np.array((0, -1)),
]

In [8]:
world2 = GridWorld(5, max_obstacles=0, grid_size = WIDTH//5)
world2.display_image()
world2.is_goal_reached()

NameError: name 'WIDTH' is not defined

In [None]:
world2.objective

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
_model = CLIPModel.from_pretrained('openai/clip-vit-large-patch14').to(device=device)

In [None]:
_processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')

In [None]:
import pandas

In [None]:
#Sampling agent and goal properties randomly

print(SHAPES)
NUMBER_OF_IMAGES = 250
WIDTH = 1000
HEIGHT = 1000
DEBUG = False
DIR = './'
rows = []
grid = [5,8,10,15]

In [None]:
from tqdm import tqdm

In [None]:
for x in tqdm(range(NUMBER_OF_IMAGES)):
    sample_shapes = random.choices(list(SHAPES.keys()), k=2)
    sample_colors = random.choices(list(COLORS.keys()), k=2)
#     print(sample_shapes)
#     print(sample_colors)
    worlds = []
    for grid_squares in grid :
        grid_size = WIDTH//grid_squares
        agent = Agent(shape = SHAPES[sample_shapes[0]], color=COLORS[sample_colors[0]], grid_size = grid_size)
        goal = Goal(shape = SHAPES[sample_shapes[1]], color=COLORS[sample_colors[1]],grid_size = grid_size)
        tempWorld = GridWorld(grid_squares, max_obstacles=0,agent=agent, goal=goal, grid_size=grid_size)
        worlds.append(tempWorld)
    for world in worlds:
        world.update_agent_pos(worlds[0].agent.row,worlds[0].agent.col)
        world.update_goal_pos(worlds[0].goal.row,worlds[0].goal.col)
        
    
    for world in worlds:
        a_loc = world.agent.get_location()
        g_loc = world.goal.get_location()
        for i in range(len(world.get_objectives())):
            #print("World Objective :", i)
            row = {}
            world.update_agent_pos(a_loc[0], a_loc[1])
            world.update_goal_pos(g_loc[0], g_loc[1])
            row['grid_squares'] = world.size

            row['agent_shape'] = world.agent.get_shape()
            row['agent_color'] = world.agent.get_color()
            row['agent_x_loc'] = a_loc[0]
            row['agent_y_loc'] = a_loc[1]

            row['goal_shape'] = world.goal.get_shape()
            row['goal_color'] = world.goal.get_color()
            row['goal_x_loc'] = g_loc[0]
            row['goal_y_loc'] = g_loc[1]

            prev_loc = np.asarray([-1,-1])
            curr_loc = a_loc
            MAX_STEPS = 2 * world.size
            #print("max_steps",MAX_STEPS)
            #world.display_image()
            current_steps = 0
            #print("Before while", world.is_goal_reached())
            while (not world.is_goal_reached()) and (current_steps < MAX_STEPS):
                torch.cuda.empty_cache()
                images = []
                agent_locations =[]
                curr_loc = world.agent.get_location()
                goalReached = False
                # 0 - Right, 1 - Down, 2 - Left, 3 - Up
                for direction in range(4):
                    x_change, y_change = DIR_TO_VEC[direction]
                    #print("Agent Location before change:", world.agent.get_location())
                    new_row, new_col = world.agent.row + x_change, world.agent.col + y_change
                    #print("Agent Location After change",new_row, new_col)
                    if world.space_can_be_visited(new_row, new_col):
                        #print("Possible to reach")
                        world.update_agent_pos(new_row, new_col)
                        #print("World Reached",world.is_goal_reached() )
                        if world.is_goal_reached():
                            #print("Goal Reached - BREAK")
                            break
                        ##print("Updated Agent Position",world.agent.get_location())
                        images.append(world.get_world_image())
                        agent_locations.append(world.agent.get_location())
                        world.update_agent_pos(curr_loc[0], curr_loc[1])
                #print("Agent Locations",agent_locations)
                #print("Goal Reached or Not :",world.is_goal_reached())
                if not world.is_goal_reached() :
                        inputs = _processor(text=[world.get_objectives()[i]], images=images, return_tensors="pt", padding=True).to(device=device)
                        outputs = _model(**inputs)
                        logits_per_image = outputs.logits_per_image # this is the image-text similarity score
                        probs = logits_per_image.softmax(dim=0) # we can take the softmax to get the label probabilities
                        ##print(probs)
                        probs.detach()
                        idx = torch.argmax(probs)
                        #print("Index ", idx)
                        #print("ArgMax Location: ", agent_locations[idx][0],agent_locations[idx][1])
                        if(agent_locations[idx][0] == prev_loc[0] and agent_locations[idx][1] == prev_loc[1]):
                            current_steps = MAX_STEPS
                            #print(" PREVIOUS == FUTURE  & BREAK")
                            break;
                        else :
                            prev_loc = curr_loc
                            curr_loc[0] = agent_locations[idx][0]
                            curr_loc[1] = agent_locations[idx][1]

                        world.update_agent_pos(agent_locations[idx][0],agent_locations[idx][1])
                        #print(agent_locations[idx][0],agent_locations[idx][1])
                        #plt.figure()
                        #plt.imshow(world.get_world_image())
                        logits_per_image.detach()
                current_steps = current_steps + 1
                #print("Current Steps : ", current_steps)
            row['goal_reached'] = world.is_goal_reached()
            row['steps_taken'] = current_steps
            row['goal_objective'] = i
            rows.append(row)
            #print("End of objective: ", world.is_goal_reached(), current_steps)    

In [None]:
import pandas as pd
df = pd.DataFrame(rows)
df.to_csv("final.csv",index=False)

In [None]:
df.head(50)

In [None]:


worlds[3].display_image()
fig = plt.imshow(worlds[3].get_world_image())
print(worlds[2].agent.get_location())
print(worlds[2].goal.get_location())
fig.set_cmap('hot')
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.savefig("4.png")

In [None]:
    sample_shapes = random.choices(list(SHAPES.keys()), k=2)
    sample_colors = random.choices(list(COLORS.keys()), k=2)
#     print(sample_shapes)
#     print(sample_colors)
    worlds = []
    for grid_squares in grid :
        grid_size = WIDTH//grid_squares
        agent = Agent(shape = SHAPES[sample_shapes[0]], color=COLORS[sample_colors[0]], grid_size = grid_size)
        goal = Goal(shape = SHAPES[sample_shapes[1]], color=COLORS[sample_colors[1]],grid_size = grid_size)
        tempWorld = GridWorld(grid_squares, max_obstacles=0,agent=agent, goal=goal, grid_size=grid_size)
        worlds.append(tempWorld)
    for world in worlds:
        world.update_agent_pos(worlds[0].agent.row,worlds[0].agent.col)
        world.update_goal_pos(worlds[0].goal.row,worlds[0].goal.col)

In [None]:
worlds[1].display_image()
print(worlds[1].agent.get_location())
print(worlds[1].goal.get_location())
fig.set_cmap('hot')
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.savefig("1.png")
#plt.imshow(world_8.get_world_image())

In [None]:
worlds[2].display_image()

In [None]:
worlds[3].display_image()
print(worlds[3].size)

In [None]:
worlds[4].display_image()

In [None]:
rows = []
for world in worlds:
    a_loc = world.agent.get_location()
    g_loc = world.goal.get_location()
    for i in range(len(world.get_objectives())):
        #print("World Objective :", i)
        row = {}
        world.update_agent_pos(a_loc[0], a_loc[1])
        world.update_goal_pos(g_loc[0], g_loc[1])
        row['grid_squares'] = world.size
        
        row['agent_shape'] = world.agent.get_shape()
        row['agent_color'] = world.agent.get_color()
        row['agent_x_loc'] = a_loc[0]
        row['agent_y_loc'] = a_loc[1]
        
        row['goal_shape'] = world.goal.get_shape()
        row['goal_color'] = world.goal.get_color()
        row['goal_x_loc'] = g_loc[0]
        row['goal_y_loc'] = g_loc[1]
        
        prev_loc = np.asarray([-1,-1])
        curr_loc = a_loc
        MAX_STEPS = 2 * world.size
        #print("max_steps",MAX_STEPS)
        #world.display_image()
        current_steps = 0
        #print("Before while", world.is_goal_reached())
        while (not world.is_goal_reached()) and (current_steps < MAX_STEPS):
            torch.cuda.empty_cache()
            images = []
            agent_locations =[]
            curr_loc = world.agent.get_location()
            goalReached = False
            # 0 - Right, 1 - Down, 2 - Left, 3 - Up
            for direction in range(4):
                x_change, y_change = DIR_TO_VEC[direction]
                print("Agent Location before change:", world.agent.get_location())
                new_row, new_col = world.agent.row + x_change, world.agent.col + y_change
                #print("Agent Location After change",new_row, new_col)
                if world.space_can_be_visited(new_row, new_col):
                    print("Possible to reach")
                    world.update_agent_pos(new_row, new_col)
                    #print("World Reached",world.is_goal_reached() )
                    if world.is_goal_reached():
                        #print("Goal Reached - BREAK")
                        break
                    ##print("Updated Agent Position",world.agent.get_location())
                    images.append(world.get_world_image())
                    agent_locations.append(world.agent.get_location())
                    world.update_agent_pos(curr_loc[0], curr_loc[1])
            #print("Agent Locations",agent_locations)
            #print("Goal Reached or Not :",world.is_goal_reached())
            if not world.is_goal_reached() :
                    inputs = _processor(text=[world.get_objectives()[i]], images=images, return_tensors="pt", padding=True).to(device=device)
                    outputs = _model(**inputs)
                    logits_per_image = outputs.logits_per_image # this is the image-text similarity score
                    probs = logits_per_image.softmax(dim=0) # we can take the softmax to get the label probabilities
                    ##print(probs)
                    probs.detach()
                    idx = torch.argmax(probs)
                    #print("Index ", idx)
                    #print("ArgMax Location: ", agent_locations[idx][0],agent_locations[idx][1])
                    if(agent_locations[idx][0] == prev_loc[0] and agent_locations[idx][1] == prev_loc[1]):
                        current_steps = MAX_STEPS
                        #print(" PREVIOUS == FUTURE  & BREAK")
                        break;
                    else :
                        prev_loc = curr_loc
                        curr_loc[0] = agent_locations[idx][0]
                        curr_loc[1] = agent_locations[idx][1]
                    
                    world.update_agent_pos(agent_locations[idx][0],agent_locations[idx][1])
                    #print(agent_locations[idx][0],agent_locations[idx][1])
                    #plt.figure()
                    #plt.imshow(world.get_world_image())
                    logits_per_image.detach()
            current_steps = current_steps + 1
            #print("Current Steps : ", current_steps)
        row['goal_reached'] = world.is_goal_reached()
        row['steps_taken'] = current_steps
        row['goal_objective'] = i
        rows.append(row)
        print("End of objective: ", world.is_goal_reached(), current_steps)    

In [None]:
import pandas as pd
df = pd.DataFrame(rows)

In [None]:
df.head(100)

In [None]:
print(agent_locations)

In [None]:
#gitub

In [None]:
for image in images:
    plt.figure()
    plt.imshow(image)

In [None]:
for direction in range(4):
        print(direction)

In [None]:
world.display_image()

In [None]:
print(world.agent.get_location())
print(world.goal.get_location())

In [None]:
world.update_agent_pos(1,1)
print(world.agent.get_location())
world.display_image()

In [None]:
print(world.is_goal_reached())
world.agent.move(Action.LEFT)
world.display_image()

In [None]:
inputs = _processor(text=[world2.objective], images= world2.image, return_tensors="pt", padding=True).to(device=device)
outputs = _model(**inputs)
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
probs = logits_per_image.softmax(dim=0) # we can take the softmax to get the label probabilities
print(probs)

In [None]:
logits_per_image

In [None]:
world2.agent.move(Action.RIGHT)
world2.display_image()
plt.imshow(world2.image)