In [27]:
import requests
import json
import pandas as pd
import logging
import time
import os
from tqdm import tqdm
import threading

### General Set Up


In [28]:
logging.basicConfig(filename='nexus_mods_log.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

API_KEY = "P4WFtQQ8zMc89xCESY0x2E183bfOpHRjvY159e4H3HcOJ6GNaDI=--I3/cG6WtrxR9ac4t--/e+gBW1nRPXJS7PggzSspg=="
HEADERS = {
    'apikey': API_KEY,
    'Accept': 'application/json'
}
BASE_URL = "https://api.nexusmods.com/v1/"
GRAPHQL_URL = "https://api.nexusmods.com/v2/graphql"
REQUEST_LIMIT = 2500
REQUESTS_PER_HOUR_LIMIT = 100

MODS_OUTPUT_FILE = "nexus_mods_mods.csv"
CHECKPOINT_FILE = "mods_checkpoint.json"
MODS_OUTPUT_FILE_PREFIX = "nexus_mods_mods"
MAX_FILE_SIZE_KB = 100000

#### Function for API limits

In [29]:
# Function to handle API rate limits
def rate_limited_request(url, params=None):
    while True:
        response = requests.get(url, headers=HEADERS, params=params)
        if response.status_code == 429:
            logging.warning("Rate limit reached. Sleeping for 60 seconds.")
            time.sleep(60)
        else:
            return response

#### Functions for checkpoints

In [30]:
# Load checkpoint
def load_checkpoint():
    if os.path.exists(CHECKPOINT_FILE):
        with open(CHECKPOINT_FILE, 'r') as f:
            return json.load(f)
    return {}

In [31]:
# Save checkpoint
def save_checkpoint(data):
    with open(CHECKPOINT_FILE, 'w') as f:
        json.dump(data, f)

#### MODSList and Details

In [32]:
def load_existing_data():
    if os.path.exists(MODS_OUTPUT_FILE):
        df = pd.read_csv(MODS_OUTPUT_FILE)
        if 'game_domain' not in df.columns:
            logging.warning("Missing 'game_domain' column in existing data, skipping...")
            return []
        return df.to_dict('records')
    return []

In [33]:
def get_mod_by_id(game_domain, mod_id):
    url = f"{BASE_URL}games/{game_domain}/mods/{mod_id}.json"
    response = rate_limited_request(url)
    if response.status_code == 200:
        mod_data = response.json()
        if mod_data.get('status') == 'published':
            return mod_data
        else:
            logging.warning(f"Skipping unpublished mod {mod_id} for {game_domain}")
            return None
    elif response.status_code == 404:
        logging.warning(f"Mod {mod_id} for {game_domain} not found.")
        return None
    else:
        logging.error(f"Error fetching mod {mod_id} for {game_domain}: {response.status_code}")
        return None


In [34]:
def save_data_with_limit(mods_data):
    file_index = 1
    output_file = f"{MODS_OUTPUT_FILE_PREFIX}_{file_index}.csv"

    # Save data first
    pd.DataFrame(mods_data).to_csv(output_file, index=False)

    # Ensure file exists before checking size
    while os.path.exists(output_file) and os.path.getsize(output_file) / 1024 > MAX_FILE_SIZE_KB:
        file_index += 1
        output_file = f"{MODS_OUTPUT_FILE_PREFIX}_{file_index}.csv"
        pd.DataFrame(mods_data).to_csv(output_file, index=False)

In [35]:
def collect_mods_data(game_list):
    checkpoint = load_checkpoint()
    mods_data = load_existing_data()
    start_time = time.time()

    START_MOD_IDS = {  # Set the starting mod ID for specific games
        "newvegas": 34677,
        "oblivion": 1412,
        "skyrimspecialedition":27531
    }

    with tqdm(total=len(game_list), desc="Fetching mods by ID", unit="game") as game_pbar:
        for game in game_list:
            game_domain = game['domain_name']
            total_mods = game['mods']
            mod_id = max([mod['mod_id'] for mod in mods_data if mod['game_domain'] == game_domain], default=START_MOD_IDS.get(game_domain, 1))
            collected_mods = sum(1 for mod in mods_data if mod['game_domain'] == game_domain)

            with tqdm(total=total_mods, desc=f"Processing {game_domain}", unit="mod", leave=True) as mod_pbar:
                mod_pbar.update(collected_mods)
                while collected_mods < total_mods:
                    if f"{game_domain}_{mod_id}" in checkpoint:
                        logging.info(f"Skipping mod {mod_id} for {game_domain}, already processed.")
                        mod_id += 1
                        mod_pbar.update(1)
                        continue

                    logging.info(f"Fetching mod {mod_id} for {game_domain}")
                    mod_data = get_mod_by_id(game_domain, mod_id)
                    if mod_data:
                        mod_data['game_domain'] = game_domain
                        mod_data['mod_id'] = mod_id
                        mods_data.append(mod_data)
                        checkpoint[f"{game_domain}_{mod_id}"] = True
                        save_checkpoint(checkpoint)
                        collected_mods += 1
                        mod_pbar.update(1)

                    mod_id += 1

                    # Save data ensuring file size limit
                    save_data_with_limit(mods_data)

            game_pbar.update(1)

    elapsed_time = time.time() - start_time
    logging.info(f"Mods data collection completed in {elapsed_time:.2f} seconds.")
    return mods_data


In [36]:
def main():
    game_list = pd.read_csv("nexus_mods_games.csv").to_dict('records')
    if not game_list:
        logging.error("No games found in nexus_mods_games.csv.")
        return

    collected_mods = collect_mods_data(game_list)
    
    if collected_mods:
        pd.DataFrame(collected_mods).to_csv(MODS_OUTPUT_FILE, index=False)
        logging.info("Mods data collection complete.")
    else:
        logging.warning("No mods were collected.")


In [37]:
if __name__ == "__main__":
    main()

Fetching mods by ID:   0%|                                                                    | 0/10 [00:00<?, ?game/s]
Processing skyrimspecialedition:   0%|                                                     | 0/102501 [00:00<?, ?mod/s][A
Processing skyrimspecialedition:   0%|                                          | 1/102501 [00:00<24:06:08,  1.18mod/s][A
Processing skyrimspecialedition:   0%|                                          | 2/102501 [00:01<16:17:22,  1.75mod/s][A
Processing skyrimspecialedition:   0%|                                          | 3/102501 [00:01<15:07:21,  1.88mod/s][A
Processing skyrimspecialedition:   0%|                                          | 4/102501 [00:02<13:44:26,  2.07mod/s][A
Processing skyrimspecialedition:   0%|                                          | 5/102501 [00:02<13:15:49,  2.15mod/s][A
Processing skyrimspecialedition:   0%|                                          | 6/102501 [00:03<13:25:45,  2.12mod/s][A
Processing skyrimsp

KeyboardInterrupt: 

In [None]:
def check_and_collect_new_mods():
    game_list = pd.read_csv("nexus_mods_games.csv").to_dict('records')
    existing_data = pd.read_csv(MODS_OUTPUT_FILE) if os.path.exists(MODS_OUTPUT_FILE) else pd.DataFrame()

    for game in game_list:
        game_domain = game['domain_name']
        total_mods = game['mods']
        existing_count = existing_data[existing_data['game_domain'] == game_domain].shape[0] if not existing_data.empty else 0
        
        if existing_count < total_mods:
            logging.info(f"New mods detected for {game_domain}. Fetching additional {total_mods - existing_count} mods.")
            collect_mods_data([game])
        else:
            logging.info(f"No new mods detected for {game_domain}.")

In [None]:
def main():
    action = input("Enter 'full' to collect all mods or 'update' to fetch new mods: ").strip().lower()
    if action == 'full':
        game_list = pd.read_csv("nexus_mods_games.csv").to_dict('records')
        collect_mods_data(game_list)
    elif action == 'update':
        check_and_collect_new_mods()
    else:
        print("Invalid option. Please enter 'full' or 'update'.")

if __name__ == "__main__":
    main()


In [11]:
df=pd.read_csv("nexus_mods_mods.csv")
df.head()

Unnamed: 0,name,summary,description,picture_url,mod_downloads,mod_unique_downloads,uid,mod_id,game_id,allow_rating,...,updated_time,author,uploaded_by,uploaded_users_profile_url,contains_adult_content,status,available,user,endorsement,game_domain
0,The Cracking City - Ayleid Player Base,"A completely functional player base added, ent...",The resources used here are from [url=http://w...,https://staticdelivery.nexusmods.com/mods/1704...,4377,2656,7318624272465,81,1704,True,...,2016-10-30T02:50:37.000+00:00,Evittalex,evittalex,https://www.nexusmods.com/users/16033119,False,published,True,"{'member_id': 16033119, 'member_group_id': 27,...","{'endorse_status': 'Undecided', 'timestamp': N...",skyrimspecialedition
1,Tamriel y mas alla,Una aventura completamente nueva que abarca to...,"HOY SE SUBIO UN ARCHIVO DE MUESTRA, LA BASE...",https://staticdelivery.nexusmods.com/mods/1704...,476,319,7318624272468,84,1704,True,...,2017-06-19T00:50:22.000+00:00,NacsoZXProdudctions,NacsoZX2,https://www.nexusmods.com/users/21741529,False,published,True,"{'member_id': 21741529, 'member_group_id': 3, ...","{'endorse_status': 'Undecided', 'timestamp': N...",skyrimspecialedition
2,Unofficial Skyrim Special Edition Patch - Germ...,Die deutsche Übersetzung des Unofficial Skyrim...,[center][/center]\n<br />\n<br />[color=#a4c2f...,https://staticdelivery.nexusmods.com/mods/1704...,307839,67085,7318624272484,100,1704,True,...,2024-10-19T17:08:33.000+00:00,Ypselonia,Ypselonia,https://www.nexusmods.com/users/3105610,False,published,True,"{'member_id': 3105610, 'member_group_id': 27, ...","{'endorse_status': 'Undecided', 'timestamp': N...",skyrimspecialedition
3,Blessing's Bay,"A lighthouse player home, unfurnished, just no...",[left][/left][center]\r<br />[font=Comic Sans ...,https://staticdelivery.nexusmods.com/mods/1704...,610,348,7318624272498,114,1704,True,...,2016-10-26T13:05:43.000+00:00,Blessingwindwalker,blessingwindwalker,https://www.nexusmods.com/users/4309220,False,published,True,"{'member_id': 4309220, 'member_group_id': 30, ...","{'endorse_status': 'Undecided', 'timestamp': N...",skyrimspecialedition
4,Miriam's Way,A Solitude farmhouse unfurnished.,\r<br />[center][i][font=Comic Sans MS][color...,https://staticdelivery.nexusmods.com/mods/1704...,569,315,7318624272499,115,1704,True,...,2016-10-26T13:17:06.000+00:00,Blessingwindwalker,blessingwindwalker,https://www.nexusmods.com/users/4309220,False,published,True,"{'member_id': 4309220, 'member_group_id': 30, ...","{'endorse_status': 'Undecided', 'timestamp': N...",skyrimspecialedition
