<a href="https://colab.research.google.com/github/shabal1n/ML_course/blob/main/Mini_project_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import webbrowser

from geopy.geocoders import Nominatim
from geopy import distance
import folium
import random


def get_coordinates(address):
    locator = Nominatim(user_agent='mini_project')
    location = locator.geocode(address)
    return location.latitude, location.longitude


def get_dist(start, end):
    return distance.distance(start, end).km


def auto_open(path, f_map):
    html_page = f'{path}'
    f_map.save(html_page)
    new = 2
    webbrowser.open(html_page, new=new)

In [2]:
class Env:
    def __init__(self):
        self.city = get_coordinates('Dubai, UAE')
        self.places_to_visit = ['Burj Khalifa', 'Museum of the Future Dubai', 'Jumeirah the Palm',
                                'Dubai Mall', 'Dubai Miracle Garden', 'Sky Views Dubai', 'Legoland Dubai']
        self.places_coordinates = [get_coordinates(item) for item in self.places_to_visit]

In [3]:
class Agent:
    def __init__(self, start: tuple, env: Env):
        self.curr_pos = start
        self.score = 0
        self.visited_places = []
        self.env = env

    def is_finish(self):
        if len(self.visited_places) == len(self.env.places_coordinates) - 1:
            return True
        else:
            return False

    def get_available_places(self):
        return [i for i in self.env.places_coordinates if i not in self.visited_places]

    def get_dist(self, place: tuple):
        return get_dist(self.curr_pos, place).km

    def add_visited(self, pos: tuple):
        self.visited_places.append(pos)

    def get_all_except_self(self, place: tuple):
        return [x for x in self.env.places_coordinates if x != place]

    def get_all_except_self_and_visited(self, place: tuple):
        return [x for x in self.get_all_except_self(place) if x not in self.visited_places]

    def move_to(self, place: tuple):
        self.visited_places.append(self.curr_pos)
        self.curr_pos = place

    def update_score(self, reward):
        self.score += (-1 * reward)

In [4]:
class Q:
    discount = 0.9
    alpha = 0.9
    q_table = {}

    def __init__(self, env: Env):
        self.agent = Agent((25.197030599999998, 55.274221681616936), env)
        self.env = env
        self.fill_q()

    def fill_q(self):
        for place in self.env.places_coordinates:
            self.q_table[place] = {}
            for sub_place in self.agent.get_all_except_self(place):
                self.q_table[place][sub_place] = 0

    def epsilon(self):
        rand_var = random.randint(0, 10)
        destinations = self.agent.get_all_except_self_and_visited(self.agent.curr_pos)
        if rand_var < 7:
            return random.choice(destinations)
        else:
            dist = {dest: get_dist(self.agent.curr_pos, dest) for dest in destinations}
            min_key = (0, 0)
            min_val = 999
            for key, val in dist.items():
                if val < min_val:
                    min_val = val
                    min_key = key
            return min_key

    def q_function(self, dest):
        reward = (-1 * get_dist(self.agent.curr_pos, dest))

        max_reward_q_prime = min(
            [get_dist(dest, dest_prime) for dest_prime in self.agent.get_all_except_self_and_visited(dest) if
             (dest_prime != self.agent.curr_pos) & (len(self.agent.get_all_except_self_and_visited(dest_prime)) != 0)],
            default=0)
        # print(dest, self.agent.curr_pos)
        self.q_table[self.agent.curr_pos][dest] += (
                self.alpha * (reward + self.discount * max_reward_q_prime - get_dist(self.agent.curr_pos, dest)))
        
    def learn(self):
        count = 0
        for episode in range(100):
            count += 1
            print(count)
            while len(self.agent.get_all_except_self_and_visited(self.agent.curr_pos)) > 0:
                action = self.epsilon()
                self.q_function(action)
                self.agent.move_to(action)
        return self.q_table

In [None]:
env = Env()
f_map = folium.Map(location=env.city, tiles="OpenStreetMap", zoom_start=12)

for i in range(0, len(env.places_coordinates)):
    folium.Marker(env.places_coordinates[i], popup=env.places_to_visit[i]).add_to(f_map)
    loc = list(zip(env.places_coordinates[i]))

for i in range(0, 20):
    rl = Q(env)
    q_table = rl.learn()
    dirToGo = {}
    for k, v in q_table.items():
        dirToGo[k] = max(v, key=v.get)
    list_points = []
    for key, val in dirToGo:
        list_points.append((key, val))
    folium.PolyLine(list_points, color="red", weight=2.5, opacity=1).add_to(f_map)
auto_open('map.html', f_map)

In [14]:
import IPython
IPython.display.HTML(open('map.html').read())