In [1]:
import requests
import json
import pandas as pd
import logging
import time
import os
from tqdm import tqdm

### General Set Up


In [2]:
logging.basicConfig(filename='nexus_mods_log.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

API_KEY = "P4WFtQQ8zMc89xCESY0x2E183bfOpHRjvY159e4H3HcOJ6GNaDI=--I3/cG6WtrxR9ac4t--/e+gBW1nRPXJS7PggzSspg=="
HEADERS = {
    'apikey': API_KEY,
    'Accept': 'application/json'
}
GRAPHQL_URL = "https://api.nexusmods.com/v2/graphql"

REQUEST_LIMIT = 2500
REQUESTS_PER_HOUR_LIMIT = 100

AUTHORS_OUTPUT_FILE = "nexus_mods_authors.csv"
AUTHORS_CHECKPOINT_FILE = "authors_checkpoint.json"
USER_ID_LIST_FILE = "all_mod_users_id.txt"

#### Function for API limits

In [3]:
# Function to handle API rate limits
def rate_limited_request(url, params=None):
    while True:
        response = requests.get(url, headers=HEADERS, params=params)
        if response.status_code == 429:
            logging.warning("Rate limit reached. Sleeping for 60 seconds.")
            time.sleep(60)
        else:
            return response

#### Functions for checkpoints

In [4]:
# Load checkpoint
def load_author_checkpoint():
    if os.path.exists(AUTHORS_CHECKPOINT_FILE):
        with open(AUTHORS_CHECKPOINT_FILE, 'r') as f:
            return json.load(f)
    return {}

In [5]:
# Save checkpoint
def save_author_checkpoint(data):
    with open(AUTHORS_CHECKPOINT_FILE, 'w') as f:
        json.dump(data, f)

In [6]:
def extract_unique_user_ids():
    mods_df = pd.read_csv("nexus_mods_mods.csv")
    unique_user_ids = mods_df['user'].apply(lambda x: eval(x)['member_id']).drop_duplicates()

    with open(USER_ID_LIST_FILE, 'w') as f:
        for user_id in unique_user_ids:
            f.write(f"{user_id}\n")

    logging.info(f"Extracted {len(unique_user_ids)} unique user IDs.")

In [7]:
def load_user_ids():
    if os.path.exists(USER_ID_LIST_FILE):
        with open(USER_ID_LIST_FILE, 'r') as f:
            return [line.strip() for line in f.readlines()]
    return []

#### Games List and Details

In [8]:
USER_QUERY = """
query user($id: Int!) {
  user(id: $id) {
    about
    name
    country
    joined
    lastActive
    avatar
    recognizedAuthor
    roles
    modCount
    collectionCount
    contributedModCount
    ownedModCount
    endorsementsGiven
    posts
    kudos
    uniqueModDownloads
    views
    videoCount
    imageCount
    banned
    deleted
    showActivityFeed
    showLastActive
    uniqueModDownloads
    views
    recognizedAuthor
    posts
    paypal
    moderationHistoryCount
    memberId
    isBlocked
    isTracked
    moderationHistoryCount
    moderationJwt
    donationsEnabled
    dpOptedIn
    banned
    blockedFromOptingInModsAt

  }
}
"""

USER_QUERY = """
query user($id: Int!) {
  user(id: $id) {
    about
    avatar
    banned
    blockedFromOptingInModsAt
    collectionCount
    contributedModCount
    country
    deleted
    donationsEnabled
    dpOptedIn
   <del> email</del>
    endorsementsGiven
    fullPageNotificationCount
    hasGivenKudos
    imageCount
    <del>ipAddress</del>
    isBlocked
    isTracked
    joined
    kudos
    lastActive
    legacyRoles
    memberId
    membershipRoles
    <del>modAnalyticsByMonth {
      ...ModAnalyticsByMonthPageFragment
    }</del>
    <del>modAnalyticsForMonth {
      ...ModAnalyticsForMonthPageFragment
    }</del>
    modCount
    moderationHistoryCount
    moderationJwt
    <del>modsBlockedFromEarningDp {
      ...BlockedModsPageFragment
    }</del>
    name
    ownedModCount
    paypal
    posts
    recognizedAuthor
    roles
    <del>showActivityFeed</del>
    showLastActive
    uniqueModDownloads
    usernameLastChangedAt
    videoCount
    views
  }
}"""

In [9]:
def get_user_details(user_id):
    variables = {"id": int(user_id)}
    response = requests.post(
        GRAPHQL_URL,
        headers=HEADERS,
        json={"query": USER_QUERY, "variables": variables}
    )
    if response.status_code == 200:
        return response.json().get('data', {}).get('user', None)
    else:
        logging.error(f"Failed to fetch details for user ID {user_id}: {response.status_code}")
        return None



In [10]:
def collect_authors():
    extract_unique_user_ids()  # Ensure unique IDs are extracted
    user_ids = load_user_ids()
    
    authors_checkpoint = load_author_checkpoint()
    authors_data = []

    for user_id in tqdm(user_ids, desc="Fetching author details"):
        if user_id in authors_checkpoint:
            logging.info(f"Skipping user ID {user_id}, already processed.")
            continue

        logging.info(f"Fetching data for user ID {user_id}")
        author_data = get_user_details(user_id)
        
        if author_data:
            authors_data.append(author_data)
            authors_checkpoint[user_id] = True
            save_author_checkpoint(authors_checkpoint)

        pd.DataFrame(authors_data).to_csv(AUTHORS_OUTPUT_FILE, index=False)
        time.sleep(2)  # Respect API limits

    logging.info("Author data collection completed.")
    return authors_data



In [11]:
def main():
    collect_authors()

In [12]:
if __name__ == "__main__":
    main()

FileNotFoundError: [Errno 2] No such file or directory: 'nexus_mods_mods.csv'