In [14]:
# Import necessary libraries
import json
from datetime import datetime

# Define the number of latest reviews to extract
n_latest_reviews = 3

def extract_latest_n_reviews(user_data, n):
    """
    Extract the latest n reviews for a user based on the 'timestamp' field.

    Parameters:
    - user_data: A dictionary containing user information, including their reviews.
    - n: Number of latest reviews to extract.

    Returns:
    - List of the latest n reviews.
    """
    # Sort reviews by timestamp in descending order
    sorted_reviews = sorted(user_data['reviews'], key=lambda x: x['timestamp'], reverse=True)
    return sorted_reviews

def print_review_text_lengths(data_path):
    """
    Reads the dataset and prints the length of the review text for each user.

    Parameters:
    - data_path: Path to the dataset JSON file.
    """
    # Load the dataset
    with open(data_path, 'r', encoding='utf-8') as f:
        input_set = json.load(f)

    # Ensure that input_set is a list
    if not isinstance(input_set, list):
        raise ValueError("The dataset should be a list of user data.")
    number = 0
    # Iterate over each user
    for user_index, user_data in enumerate(input_set):
        user_id = user_data.get('user_id', f'User_{user_index+1}')

        # Check if the user has reviews
        if not user_data.get('reviews'):
            print(f"User ID: {user_id} has no reviews.")
            continue

        # Extract latest n reviews
        latest_reviews = extract_latest_n_reviews(user_data, n_latest_reviews)

        # Generate review_text
        review_text = "\n".join([
            f"Product: {rev['product_name']}\nReview: {rev['text']}"
            for rev in latest_reviews
        ])

        # Compute the length of review_text
        text_length = len(review_text)

        # Print the user_id and the length of the review_text
        print(f"ID: {user_index+1}User ID: {user_id}, Review Text Length: {text_length}")




In [15]:
# Specify the path to your dataset
dataset_path = 'data/train_val_user_reviews.json'



# Call the function to print review text lengths
print_review_text_lengths(dataset_path)


ID: 1User ID: AFSKPY37N3C43SOI5IEXEK5JSIYA, Review Text Length: 3783
ID: 2User ID: AHV6QCNBJNSGLATP56JAWJ3C4G2A, Review Text Length: 8949
ID: 3User ID: AFJBKPK5W56XWSNPQU2WW66ISWYQ, Review Text Length: 5298
ID: 4User ID: AFXF3EGQTQDXMRLDWFU7UBFQZB7Q, Review Text Length: 26014
ID: 5User ID: AFWVN52MRBWOTIK7UGXBWGOY4HBA, Review Text Length: 6799
ID: 6User ID: AFQQQ5LGNSQUEBGDCYBAZZE5T3DA, Review Text Length: 6729
ID: 7User ID: AGAM2CCKV52HI4YZU7ASZTSXA7YQ, Review Text Length: 23150
ID: 8User ID: AF2BLE54TEMGZ546U763ZHZRXC4A, Review Text Length: 54181
ID: 9User ID: AGZZXSMMS4WRHHJRBUJZI4FZDHKQ, Review Text Length: 3367
ID: 10User ID: AGD25H7BIT2JUXSIOPYCYB23J3ZQ, Review Text Length: 3016
ID: 11User ID: AEXGISIVX7WBUNI7UHHERVB3DF7Q, Review Text Length: 3126
ID: 12User ID: AEZP6Z2C5AVQDZAJECQYZWQRNG3Q, Review Text Length: 60816
ID: 13User ID: AGTW6ZGPUAORQ7X6CNBP6PJW7OTA, Review Text Length: 4973
ID: 14User ID: AHALZ7AKVAVL7QEVBCI55JVLGXOQ, Review Text Length: 4511
ID: 15User ID: AHTLWVDXSM

: 