<a href="https://colab.research.google.com/github/tbbye/Temporal-Satisfaction/blob/main/User_review_pull.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import time
import json
import re

# We need the VADER sentiment analysis tool from the NLTK library
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Download VADER lexicon data (only needed once in the notebook)
# This may take a few seconds on the first run.
nltk.download('vader_lexicon', quiet=True)

# --- CONFIGURATION ---
APP_ID = 1245620 # Elden Ring
API_URL = f"https://store.steampowered.com/appreviews/{APP_ID}"
MAX_PAGES = 10  # Limit for testing (10 pages = 1000 reviews max)
# Thresholds for scoring:
POSITIVE_THRESHOLD = 0.2  # Reviews with a Compound Score above 0.2 are considered Positive
NEGATIVE_THRESHOLD = -0.2 # Reviews with a Compound Score below -0.2 are considered Negative

# This list will store ALL review text collected from the API
all_reviews_text = []

# Starting parameters for the first request
params = {
    'json': 1,
    'language': 'english',
    'filter': 'recent',
    'num_per_page': 100,
    'cursor': '*' # Start at the beginning
}

# -------------------------------------------------------------
# STEP 1: LOOPING AND STORING ALL REVIEWS (Collection)
# -------------------------------------------------------------

print(f"Starting review collection for App ID: {APP_ID}...")
page_count = 0

while params['cursor'] and page_count < MAX_PAGES:
    try:
        page_count += 1
        response = requests.get(API_URL, params=params)

        if response.status_code == 200:
            data = response.json()
            if data.get('success') == 1:
                reviews_on_page = data.get('reviews', [])
                if not reviews_on_page:
                    break

                for review in reviews_on_page:
                    review_text = review['review']
                    all_reviews_text.append(review_text)

                print(f"Collected {len(reviews_on_page)} reviews. Total collected: {len(all_reviews_text)}")

                params['cursor'] = data.get('cursor', None)
                time.sleep(1) # Be polite

            else:
                break
        else:
            break

    except requests.exceptions.RequestException as e:
        print(f"A connection error occurred: {e}")
        break

# -------------------------------------------------------------
# STEP 2: FINDING "TIME" WORDS AND FILTERING
# -------------------------------------------------------------

TIME_KEYWORDS = [
    # ... your full list of keywords from the previous step ...
    "second", "seconds", "short playtime", "play time", "player time",
    "minute", "minutes", "long lifespan", "life span",
    "hour", "hours", "hourly", "length", "lengths", "lengthy", "limited time",
    "day", "days", "daily", "session", "sessions", "roadmap", "road map",
    "week", "weeks", "weekly", "season", "seasons", "seasonal",
    "month", "months", "monthly", "quarterly", "year", "years", "yearly",
    "annual", "replayable", "endless"
]

time_centric_reviews = []
keyword_pattern = re.compile('|'.join(re.escape(k) for k in TIME_KEYWORDS), re.IGNORECASE)

print("\nStarting analysis: Filtering for time-centric language...")
for review_text in all_reviews_text:
    if keyword_pattern.search(review_text):
        time_centric_reviews.append(review_text)

print(f"Total time-centric reviews found: {len(time_centric_reviews)}")

# -------------------------------------------------------------
# STEP 3: ATTITUDE (SENTIMENT) ANALYSIS
# -------------------------------------------------------------

# Initialize the VADER analyzer
analyzer = SentimentIntensityAnalyzer()
positive_time_count = 0
negative_time_count = 0

print("Starting sentiment analysis on filtered reviews...")

for review in time_centric_reviews:
    # Get the sentiment scores for the review
    vs = analyzer.polarity_scores(review)

    # vs['compound'] is the main overall score (-1.0 is most negative, +1.0 is most positive)
    compound_score = vs['compound']

    # Check if the time-centric review is strongly positive or negative
    if compound_score >= POSITIVE_THRESHOLD:
        positive_time_count += 1
    elif compound_score <= NEGATIVE_THRESHOLD:
        negative_time_count += 1

# Calculate the total analyzed reviews and percentage
total_analyzed = positive_time_count + negative_time_count

if total_analyzed > 0:
    positive_percent = (positive_time_count / total_analyzed) * 100
    negative_percent = (negative_time_count / total_analyzed) * 100
else:
    positive_percent = 0
    negative_percent = 0

print("\n==================================")
print("FINAL TIME-CENTRIC SENTIMENT RESULTS")
print("==================================")
print(f"Total Time-Centric Reviews Analyzed (Pos/Neg): {total_analyzed}")
print(f"Reviews Suggesting Time is WELL SPENT (Positive): {positive_time_count} ({positive_percent:.2f}%)")
print(f"Reviews Suggesting Time is WASTED (Negative): {negative_time_count} ({negative_percent:.2f}%)")
print("==================================")

Starting review collection for App ID: 1245620...
Collected 100 reviews. Total collected: 100
Collected 100 reviews. Total collected: 200
Collected 100 reviews. Total collected: 300
Collected 100 reviews. Total collected: 400
Collected 100 reviews. Total collected: 500
Collected 100 reviews. Total collected: 600
Collected 100 reviews. Total collected: 700
Collected 100 reviews. Total collected: 800
Collected 100 reviews. Total collected: 900
Collected 100 reviews. Total collected: 1000

Starting analysis: Filtering for time-centric language...
Total time-centric reviews found: 75
Starting sentiment analysis on filtered reviews...

FINAL TIME-CENTRIC SENTIMENT RESULTS
Total Time-Centric Reviews Analyzed (Pos/Neg): 66
Reviews Suggesting Time is WELL SPENT (Positive): 55 (83.33%)
Reviews Suggesting Time is WASTED (Negative): 11 (16.67%)
