In [6]:
import re
import shutil
import requests
import json
import os
import pymongo
from tabulate import tabulate
from datetime import datetime, timedelta
import pandas as pd
from collections import OrderedDict
from dotenv import load_dotenv
from random import randint
from fuzzywuzzy import fuzz
from math import radians, cos, sin, asin, sqrt

load_dotenv("app.env")

REWARRDS_DB = os.getenv("REWARDS_CONNECTION_URI")
STAGING_DB = os.getenv("STAGING_CONNECTION_URI")
DEV_DB = os.getenv("DEV_CONNECTION_URI")
HOTELS_PROD_DB = os.getenv("PROD_CONNECTION_URI")
VEHO_PROD_DB = os.getenv("VEHO_PROD_CONNECTION_URI")
TRIPLAZE_JWT = os.getenv("TRIPLAZE_JWT")
TRIPLAZE_API_JWT = os.getenv("TRIPLAZE_API_JWT")
environment = 'production'

global client
global veho_client

def close_clients():
    try:
        if client is not None:
            client.close()
        if veho_client is not None:
            veho_client.close()
    except Exception as e:
        print(e)
close_clients()

client = pymongo.MongoClient(HOTELS_PROD_DB)
veho_client = pymongo.MongoClient(VEHO_PROD_DB)

def get_hotelbeds_expedia_hotels_for_city(city):
    hotels_cursor = client.get_database('hotels').get_collection('hotel').find({
        'hotelCodeBySourceProvider.EXPEDIA' : {"$exists" : True},
        'hotelCodeBySourceProvider.HOTELBEDS_API' : {"$exists" : True},
        'cityName' : city
    },
    {
        'name' : 1,
        'hotelCodeBySourceProvider.EXPEDIA' : 1,
        'hotelCodeBySourceProvider.HOTELBEDS_API' : 1,
        'cityName' : 1,
        'planningToolId' : 1,
        'status' : 1
    })
    return list(hotels_cursor)

def get_rapid_hotel(hotel_code):
    return client.get_database('hotels').get_collection('rapid_hotel').find_one({ 'hotelCode' : hotel_code },{
        'hotelCode' :  1,
        'name' : 1,
        'cityName' : 1,
        'latitude' : 1,
        'longitude' : 1,
        'active' : 1,
        'address' : 1
    })

def get_hotel_beds_hotel(hotel_code):
    return client.get_database('hotels').get_collection('hb_hotels').find_one({ 'hotelCode' : hotel_code },{
        'hotelCode' :  1,
        'name' : 1,
        'cityName' : 1,
        'latitude' : 1,
        'longitude' : 1,
        'active' : 1,
        'address' : 1
    })

def get_hotel_beds_cities():
    with open('resources/hotel-beds/hotel_beds_cities.json') as f:
        return json.load(f)['cities']
    
def get_fuzzy_score(a, b):
    return fuzz.token_set_ratio(a, b)

def get_distance_between_hotelbeds_expedia(hotel_beds_coordinate, expedia_coordinate):
    hotelbeds_lat = hotel_beds_coordinate[0]
    hotelbeds_lon = hotel_beds_coordinate[1]

    expedia_lat = expedia_coordinate[0]
    expedia_lon = expedia_coordinate[1]

    def distance(lat1, lon1, lat2, lon2):
        p = 0.017453292519943295
        a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p)*cos(lat2*p) * (1-cos((lon2-lon1)*p)) / 2
        return round(12742 * asin(sqrt(a)),4)

    return distance(hotelbeds_lat, hotelbeds_lon, expedia_lat, expedia_lon)
    
def generate_report(cities):
    master = []
    master.append([
        'City',
        'Planning ID',
        'HotelbedsCode',
        'ExpediaHotelCode',
        'NameInHotelCollection',
        'NameInHotelBeds',
        'NameInExpedia',
        'HotelBeds_Address',
        'Expedia_Address',
        'DistanceBetweenHotels',
        'FuzzyScore-HotelvsHotelBeds',
        'FuzzyScore-HotelBedsvsExpedia',
    ])
    for city in cities:
        hotels = get_hotelbeds_expedia_hotels_for_city(city=city)
        for hotel in hotels:
            try:
                rapid_hotel = get_rapid_hotel(hotel['hotelCodeBySourceProvider']['EXPEDIA'])
                hotel_beds_hotel = get_hotel_beds_hotel(hotel['hotelCodeBySourceProvider']['HOTELBEDS_API'])
                planning_id = hotel['planningToolId']
                master.append([
                    city,
                    planning_id,
                    hotel['hotelCodeBySourceProvider']['HOTELBEDS_API'],
                    hotel['hotelCodeBySourceProvider']['EXPEDIA'],
                    hotel['name'],
                    hotel_beds_hotel['name'],
                    rapid_hotel['name'],
                    hotel_beds_hotel['address'],
                    rapid_hotel['address'],
                    get_distance_between_hotelbeds_expedia([hotel_beds_hotel['latitude'], hotel_beds_hotel['longitude']], [rapid_hotel['latitude'], rapid_hotel['longitude']]),
                    get_fuzzy_score(hotel['name'], hotel_beds_hotel['name']),
                    get_fuzzy_score(hotel_beds_hotel['name'], rapid_hotel['name']),
                ])
            except Exception as e:
                print(e)
                continue
    pd.DataFrame(master).to_csv('./resources/reports/hotel_name_conflict_report.csv', index=False)

generate_report(get_hotel_beds_cities())