In [1]:
import requests
import json
import pandas as pd

In [2]:
def extract_review_data(book_id, num_pages, api_key):
    # GraphQL endpoint URL
    url = 'https://kxbwmqov6jgg3daaamb744ycu4.appsync-api.us-east-1.amazonaws.com/graphql'

    # Initialize lists to store data
    reviewer_names = []
    reviewer_avatars = []
    comment_texts = []
    creation_dates = []
    ratings = []
    like_counts = []
    profile_urls = []
    shelf_names = []
    tag_names = []

    # GraphQL query variables
    variables = {
        "filters": {
            "resourceType": "WORK",
            "resourceId": book_id
        },
        "pagination": {
            "limit": 30
        }
    }

    # GraphQL query
    query = """
        query getReviews($filters: BookReviewsFilterInput!, $pagination: PaginationInput) {
          getReviews(filters: $filters, pagination: $pagination) {
            ...BookReviewsFragment
            __typename
          }
        }

        fragment BookReviewsFragment on BookReviewsConnection {
          totalCount
          edges {
            node {
              ...ReviewCardFragment
              __typename
            }
            __typename
          }
          pageInfo {
            prevPageToken
            nextPageToken
            __typename
          }
          __typename
        }

        fragment ReviewCardFragment on Review {
          __typename
          id
          creator {
            ...ReviewerProfileFragment
            __typename
          }
          recommendFor
          updatedAt
          createdAt
          spoilerStatus
          lastRevisionAt
          text
          rating
          shelving {
            shelf {
              name
              webUrl
              __typename
            }
            taggings {
              tag {
                name
                webUrl
                __typename
              }
              __typename
            }
            webUrl
            __typename
          }
          likeCount
          viewerHasLiked
          commentCount
        }

        fragment ReviewerProfileFragment on User {
          id: legacyId
          imageUrlSquare
          isAuthor
          ...SocialUserFragment
          textReviewsCount
          viewerRelationshipStatus {
            isBlockedByViewer
            __typename
          }
          name
          webUrl
          contributor {
            id
            works {
              totalCount
              __typename
            }
            __typename
          }
          __typename
        }

        fragment SocialUserFragment on User {
          viewerRelationshipStatus {
            isFollowing
            isFriend
            __typename
          }
          followersCount
          __typename
        }
    """

    # Headers
    headers = {
        "Content-Type": "application/json",
        "X-Api-Key": api_key
    }

    # Loop through the specified number of pages
    for page in range(num_pages):
        # Add page token for pagination
        if page > 0:
            variables["pagination"]["after"] = page_token

        # GraphQL request
        response = requests.post(url, json={
            "operationName": "getReviews",
            "variables": variables,
            "query": query
        }, headers=headers)  # Include headers in the request

        # Parse response
        response_data = response.json()

        # Extract data from current page
        reviews = response_data["data"]["getReviews"]["edges"]

        # Extract data from reviews
        for review in reviews:
            node = review["node"]
            reviewer_names.append(node["creator"]["name"])
            reviewer_avatars.append(node["creator"]["imageUrlSquare"])
            comment_texts.append(node["text"])
            creation_dates.append(node["createdAt"])
            ratings.append(node["rating"])
            like_counts.append(node["likeCount"])
            profile_urls.append(node["creator"]["webUrl"])
            shelf_names.append(node["shelving"]["shelf"]["name"])
            tags = [tag['tag']["name"] for tag in node["shelving"]["taggings"]]
            tag_names.append(tags)


        # Check for next page token
        page_token = response_data["data"]["getReviews"]["pageInfo"].get("nextPageToken")

        # Break loop if there are no more pages
        if not page_token:
            break

    # Create DataFrame
    df = pd.DataFrame({
        "Reviewer Name": reviewer_names,
        "Reviewer Avatar": reviewer_avatars,
        "Comment": comment_texts,
        "Creation Date": creation_dates,
        "Rating": ratings,
        "Number of Likes": like_counts,
        "Profile URL": profile_urls,
        "Shelf Name": shelf_names,
        "Tag Names": tag_names
    })

    return df

In [3]:
# Example usage
book_id = "kca://work/amzn1.gr.work.v1.5NB0xZVy7hlyaO9xdCptrg"
num_pages = 3  # Extract data from 3 pages
api_key = "da2-xpgsdydkbregjhpr6ejzqdhuwy"
review_data = extract_review_data(book_id, num_pages, api_key)
display(review_data.head())

Unnamed: 0,Reviewer Name,Reviewer Avatar,Comment,Creation Date,Rating,Number of Likes,Profile URL,Shelf Name,Tag Names
0,Jason Fella,https://i.gr-assets.com/images/S/compressed.ph...,"Just finishing the book now, and I have some v...",1510164000000.0,3,435,https://www.goodreads.com/user/show/22966655-j...,read,[]
1,Steffan Bard,https://i.gr-assets.com/images/S/compressed.ph...,There are parts of me that really want to buy ...,1528672000000.0,2,150,https://www.goodreads.com/user/show/20176950-s...,read,[]
2,May Ling,https://i.gr-assets.com/images/S/compressed.ph...,Summary: You will either love or hate this boo...,1574785000000.0,5,115,https://www.goodreads.com/user/show/1667021-ma...,read,[cognitive-neuroscience]
3,Khalid Abdul-Mumin,https://i.gr-assets.com/images/S/compressed.ph...,<b>Profound</b> <i>and</i> <b>Poignant ancient...,1670073000000.0,5,75,https://www.goodreads.com/user/show/94611523-k...,read,"[reviewed-reads, my-favorites-non-fiction, fav..."
4,Benjamin Hare,https://i.gr-assets.com/images/S/compressed.ph...,A new coat of paint on a very old pseudoscienc...,1545815000000.0,1,59,https://www.goodreads.com/user/show/69050333-b...,read,[]


In [4]:
review_data.iloc[0]["Comment"]

'Just finishing the book now, and I have some very mixed feelings about it so far. I have one of the doc\'s other books, and I do love how he tackles each subject with such focus and detail, and backs it up with science whenever possible. And he does have a lot of data to support his claims, as well as citing some very interesting studies.<br />First off, I\'m surprised to see so many glowing, 5-star reviews. I know a lot of people who\'ve attended his seminars are reviewing the book, but this whole process takes a lot of time and practice. In the book, he makes it sound like this will be a relatively quick, easy process, and I think he is setting up people for frustration and failure. As an experienced meditator myself, I can say I\'ve only achieved the state he says you need to be in to do most of this work, a few times ever. And I\'ve been meditating seriously for 10 years. He says we need to become "pure consciousness. No thing, no one, no where, no time. Completely take our awaren