In [20]:
import requests
import json
import os
import csv
from datetime import datetime
#page size should not be greater than 70
def fetch_hotel_reviews(hotel_id, page, page_size):
    url = "https://www.agoda.com/api/cronos/property/review/ReviewComments"

    payload = json.dumps({
        "hotelId": hotel_id,
        "providerId": 332,
        "demographicId": 0,
        "page": page,
        "pageSize": page_size,
        "sorting": 1,
        "providerIds": [332],
        "isReviewPage": True,
        "isCrawlablePage": True,
        "filters": {"language": [1], "room": []},
        "searchFilters": []
    })
    
    headers = {
        'content-type': 'application/json; charset=UTF-8',
        # Your cookie string here
        'cookie': 'ASP.NET_SessionId=r01pqkvf3c243snp0rsgwwvb; ...'
    }

    response = requests.request("POST", url, headers=headers, data=payload)
    
    return response.json()

reviews = fetch_hotel_reviews(2838309, 1, 70)
print(reviews.get("comments", []))

[{'isHelpfulComment': False, 'isReviewVoted': False, 'isShowReviewResponse': True, 'isShowReviewResponseTranslateButton': False, 'isShowReviewTranslateButton': False, 'helpfulVotes': 0, 'responseLanguageId': 1, 'unHelpfulVotes': 0, 'hotelReviewId': 829819335, 'providerId': 332, 'rating': 10.0, 'checkInDateMonthAndYear': 'June 2024', 'encryptedReviewData': 'Bsy6X1GdH1zJ83FXXwHDag==', 'formattedRating': '10.0', 'formattedReviewDate': 'July 05, 2024', 'formattedReviewHelpfulText': 'Did you find this review helpful?', 'ratingText': 'Exceptional', 'responderName': 'bai Hotel', 'responseDateText': 'Responded July 06, 2024', 'responseText': 'Dear Kristy, \n\nThank you for your kind words! We are delighted to hear that you had a great experience from checking in to check out. Your satisfaction is our priority. \n\nYour Host in the South, bai Hotel Cebu', 'responseTranslateSource': 'en', 'reviewComments': 'From checking in to Check out! Everything was great!', 'reviewNegatives': '', 'reviewPosi

In [21]:
def filter_reviews(reviews):
    """
    Filter reviews to include only English reviews from 2022 to 2024.
    """
    filtered_reviews = []
    break_detected = False

    for review in reviews:
        review_time = review.get('reviewDate')
        
        # Only include English reviews
        if not review.get('translateSource', '').startswith('en'):
            print("not English review")
            continue
        
        try:
            review_date = datetime.strptime(review_time, "%Y-%m-%dT%H:%M:%S%z")
            if not (review_date.year >= 2022 and review_date.year <= 2024):
                print("old reviews left")
                break_detected = True
                break
            
            filtered_reviews.append(review)
        except ValueError:
            # Handle the case where the date format is incorrect
            continue
    
    return filtered_reviews, break_detected

def write_reviews_to_csv(csv_file, reviews):
    """
    Write review data to the CSV file.
    """
    row_count = 0

    with open(csv_file, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        for review in reviews:
            review_id = review.get('hotelReviewId')
            review_comments = review.get('reviewComments')
            review_positives = review.get('reviewPositives', '')
            review_negatives = review.get('reviewNegatives', '')

            # Concatenate positives and negatives to comments
            review_content= f"{review_comments}\n {review_positives}\n {review_negatives}"
            review_score = review.get('rating')
            review_time = review.get('reviewDate')
            
            # Write the row to the CSV file
            writer.writerow([review_id, review_content, review_score, review_time])
            
            # Increment the counter
            row_count += 1

    return row_count

def check_and_write_headers(csv_file, headers):
    """
    Check if the CSV file exists and write headers if it doesn't.
    """
    file_exists = os.path.isfile(csv_file)
    if not file_exists:
        with open(csv_file, mode='a', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(headers)

In [22]:
def main():
    # Define the headers for the CSV file
    headers = ["Review ID", "Review Content", "Review Score", "Review Time"]
    
    # Directory to save CSV files
    output_dir = "agoda_hotel_reviews"
    os.makedirs(output_dir, exist_ok=True)

    # List of hotels with names and IDs
    hotels = [
        {"name": "Waterfront Hotel and Casino", "id": 52109},
        {"name": "Seda Ayala Center Cebu", "id": 5240124},
        {"name": "bai Hotel", "id": 2838309},
        {"name": "Fairfield by Marriott Cebu", "id": 46250538},
        {"name": "Jpark Island Resort and Waterpark", "id": 164680},
        {"name": "Dusit Thani Mactan", "id": 6532356}
       
    ]
      
    page_size = 70

    for hotel in hotels:
        hotel_name = hotel["name"]
        hotel_id = hotel["id"]
        page = 1
        total_reviews_written = 0
        csv_file = os.path.join(output_dir, f"{hotel_name.replace(' ', '_').lower()}_reviews_2022_2024.csv")
        
        check_and_write_headers(csv_file, headers)
        
        print(f"Scraping {hotel_name}")
        while True:
            print(f"Page: {page}")
            # Fetch reviews
            response_data = fetch_hotel_reviews(hotel_id, page, page_size)
            reviews = response_data.get('comments', {})
            
            if not reviews:
                print('no reviews left')
                break
            
            # Filter reviews
            filtered_reviews, break_detected = filter_reviews(reviews)
            
            # Write reviews to CSV and get the count of rows written
            row_count = write_reviews_to_csv(csv_file, filtered_reviews)
            
            total_reviews_written += row_count
            if break_detected:
                break
            page += 1
        
        print(f"English reviews from 2022 to 2024 for hotel {hotel_name} have been written to {csv_file}")
        print(f"Total number of rows written for hotel {hotel_name}: {total_reviews_written}")

if __name__ == "__main__":
    main()

Scraping Waterfront Hotel and Casino
Page: 1
Page: 2
Page: 3
Page: 4
Page: 5
Page: 6
Page: 7
Page: 8
Page: 9
Page: 10
Page: 11
Page: 12
Page: 13
Page: 14
Page: 15
Page: 16
Page: 17
Page: 18
old reviews left
English reviews from 2022 to 2024 for hotel Waterfront Hotel and Casino have been written to agoda_hotel_reviews\waterfront_hotel_and_casino_reviews_2022_2024.csv
Total number of rows written for hotel Waterfront Hotel and Casino: 1195
Scraping Seda Ayala Center Cebu
Page: 1
Page: 2
Page: 3
Page: 4
Page: 5
Page: 6
not English review
Page: 7
Page: 8
Page: 9
Page: 10
Page: 11
Page: 12
Page: 13
Page: 14
old reviews left
English reviews from 2022 to 2024 for hotel Seda Ayala Center Cebu have been written to agoda_hotel_reviews\seda_ayala_center_cebu_reviews_2022_2024.csv
Total number of rows written for hotel Seda Ayala Center Cebu: 923
Scraping bai Hotel
Page: 1
Page: 2
Page: 3
Page: 4
Page: 5
Page: 6
Page: 7
Page: 8
Page: 9
Page: 10
Page: 11
Page: 12
Page: 13
Page: 14
Page: 15
Page: 1