## IMPORTS

In [26]:
import pandas as pd
import statistics as st
import sqlite3

## FETCH DATA FROM CSV

In [3]:
booking = pd.read_csv('booking_nyc.csv')
trivago = pd.read_csv('trivago_nyc.csv') # Extra datasets can be parsed similarly and added to website_list

## FETCH DATA FROM DATABASE

In [27]:
conn = sqlite3.connect('hotel_booking_nyc.db')

booking = pd.read_sql('SELECT * FROM booking_hotel_table', conn)
trivago = pd.read_sql('SELECT * FROM trivago_hotel_table', conn) # Extra datasets can be parsed similarly and added to website_list

## PRE-PROCESSING

In [28]:
websites_data = {"booking":booking, "trivago":trivago} # Extra dataset need to be appended to this dictionary
websites = list(websites_data.keys())

def convert_price(price_str):
    cleaned_price = price_str.replace(',', '')[1:]
    return int(cleaned_price)

def count_elements(lst):
	counts = {i: 0 for i in range(1, 6)} 
	for element in lst:
		counts[element] += 1
	return tuple((k, counts[k]) for k in range(1, 6))


## INVENTORY COMPARISON

1. Hotel listing score (Score out of 5)

In [40]:
def hotel_listing():
    hotel_list = {}
    all_hotel_list = set()

    for website in websites:
        hotel_list[website] = websites_data[website]['Hotel Name'].tolist()
        all_hotel_list = all_hotel_list | set(hotel_list[website])

    listing_score = {}
    for website in websites:
        listing_score[website] = (len(hotel_list[website])*5)/len(all_hotel_list)

    #Print listing score for each website
    for website, score in listing_score.items():
        print(f"The listing score for {website} is {score}")
    
    return listing_score


2. Price variability score (Score out of 5)

In [41]:
def compare_price_variability():
    price_list = {}

    for website in websites:
        price_list[website] = [convert_price(price) for price in websites_data[website]['Price'].tolist()]

    price_variability_score = {}
    for website in websites:
        price_variability_score[website] = (st.stdev(price_list[website])/st.mean(price_list[website])) * 5 

    #Print price variability score for each website
    for website, score in price_variability_score.items():
        print(f"The price variability score for {website} is {score}")
    
    return price_variability_score

3. Star coverage score (Score out of 5)

In [42]:
def star_coverage():
    hotel_star_list = {}
    all_hotel_and_star_list = set()

    for website in websites:
        hotel_star_list[website] = list(zip(websites_data[website]['Hotel Name'], websites_data[website]['Star']))
        all_hotel_and_star_list.update(hotel_star_list[website])
    
    # Extract star ratings from the combined list
    all_star_list = [t[1] for t in all_hotel_and_star_list]

    # Count star ratings for each website
    website_star_counts = {}
    for website, hotel_stars in hotel_star_list.items():
        star_list = [t[1] for t in hotel_stars]
        website_star_counts[website] = count_elements(star_list)

    # Count star ratings for the combined list (all websites)
    all_star = count_elements(all_star_list)

    #Calculate coverage scores for each website
    star_coverage_scores = {}
    for website, star_counts in website_star_counts.items():
        star_coverage_scores[website] = sum((website_star[1] / all_star[1]) for website_star, all_star in zip(star_counts, all_star))

    #Print star coverage score for each website
    for website, score in star_coverage_scores.items():
        print(f"The star coverage score for {website} is {score}")

    return star_coverage_scores
    


4. Average rating score (Score out of 5)

In [43]:
def average_rating():
    hotel_rating_list = {}

    for website in websites:
        hotel_rating_list[website] = [float(x) for x in websites_data[website]['Rating']]

    rating_score_scores = {}

    for website, rating_list in hotel_rating_list.items():
        rating_score_scores[website] = sum(rating_list)/(2*len([x for x in rating_list if x!=0]))

    #Print average rating score for each website
    for website, score in rating_score_scores.items():
        print(f"The average rating score for {website} is {score}")

    return rating_score_scores   

## FINAL SCORE

In [45]:
hotel_listing_score = hotel_listing()
price_variablity_score = compare_price_variability()
star_coverage_score = star_coverage()
average_rating_score = average_rating()

final_score = {}

for website in websites:
    final_score[website] = hotel_listing_score[website] + price_variablity_score[website] + star_coverage_score[website] + average_rating_score[website]

The listing score for booking is 3.8984674329501914
The listing score for trivago is 4.262452107279693
The price variability score for booking is 3.216268167056776
The price variability score for trivago is 2.8535144043397405
The star coverage score for booking is 3.5270542276573407
The star coverage score for trivago is 4.119924340683095
The average rating score for booking is 3.974543080939948
The average rating score for trivago is 4.133146067415731


In [47]:
#Print final scores for each website
for website, score in final_score.items():
    print(f"The final score regarding superior inventory for {website} is {score:.5f} out of 20")

The final score regarding superior inventory for booking is 14.61633 out of 20
The final score regarding superior inventory for trivago is 15.36904 out of 20
