# PVP JSON Data Downloader and Verifier

This notebook downloads JSON files from PVP.QQ API and verifies their contents.

## Files to Download:
- **herolist.json** - Hero/character data
- **item.json** - Equipment/item data  
- **summoner.json** - Summoner spell data
- **ming.json** - Rune/inscription data

In [1]:
# Import required libraries
import os
import json
import requests
from urllib.parse import urlparse
from pathlib import Path
import pandas as pd
from IPython.display import display, HTML

## 1. Download Function

In [2]:
def download_file(url, save_dir):
    """Download a file from URL and save it to the specified directory"""
    # Create save directory if it doesn't exist
    os.makedirs(save_dir, exist_ok=True)
    
    # Extract filename from URL
    parsed_url = urlparse(url)
    file_name = os.path.basename(parsed_url.path)
    
    # Build complete file path
    file_path = os.path.join(save_dir, file_name)
    
    try:
        # Send HTTP request to download file
        print(f"Downloading {url}...")
        response = requests.get(url, timeout=30)
        
        # Check if request was successful
        if response.status_code == 200:
            # Write file content to local file
            with open(file_path, 'wb') as file:
                file.write(response.content)
            print(f"✓ Successfully downloaded: {file_path}")
            
            # Try to validate JSON format
            try:
                json.loads(response.text)
                print(f"  ✓ Valid JSON format")
            except json.JSONDecodeError:
                print(f"  ⚠ Warning: File may not be valid JSON")
                
            return True
        else:
            print(f"✗ Failed to download {url}, status code: {response.status_code}")
            return False
            
    except requests.exceptions.RequestException as e:
        print(f"✗ Error downloading {url}: {e}")
        return False

## 2. Download All JSON Files

In [3]:
# Define URLs and download all files
save_dir = "json"

urls = {
    "herolist": "https://pvp.qq.com/web201605/js/herolist.json",
    "item": "https://pvp.qq.com/web201605/js/item.json", 
    "summoner": "https://pvp.qq.com/web201605/js/summoner.json",
    "ming": "https://pvp.qq.com/web201605/js/ming.json"
}

print("Starting download of PVP JSON files...")
print("=" * 50)

success_count = 0
total_count = len(urls)

# Download each file
for name, url in urls.items():
    print(f"\n[{name.upper()}]")
    if download_file(url, save_dir):
        success_count += 1

# Summary
print("\n" + "=" * 50)
print(f"Download Summary:")
print(f"Successfully downloaded: {success_count}/{total_count} files")
print(f"Files saved to: {os.path.abspath(save_dir)}")

if success_count == total_count:
    print("🎉 All files downloaded successfully!")
else:
    print(f"⚠ {total_count - success_count} files failed to download")

Starting download of PVP JSON files...

[HEROLIST]
Downloading https://pvp.qq.com/web201605/js/herolist.json...
✓ Successfully downloaded: json/herolist.json
  ✓ Valid JSON format

[ITEM]
Downloading https://pvp.qq.com/web201605/js/item.json...
✓ Successfully downloaded: json/item.json
  ✓ Valid JSON format

[SUMMONER]
Downloading https://pvp.qq.com/web201605/js/summoner.json...
✓ Successfully downloaded: json/summoner.json
  ✓ Valid JSON format

[MING]
Downloading https://pvp.qq.com/web201605/js/ming.json...
✓ Successfully downloaded: json/ming.json
  ✓ Valid JSON format

Download Summary:
Successfully downloaded: 4/4 files
Files saved to: /Users/administrator/Documents/GitHub/pvp-ai/crawler/json
🎉 All files downloaded successfully!


## 3. Verification Function

In [4]:
def verify_json_file(file_path):
    """Verify and display basic info about a JSON file"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        print(f"✓ {os.path.basename(file_path)}")
        print(f"  Size: {os.path.getsize(file_path):,} bytes")
        
        if isinstance(data, list):
            print(f"  Type: Array with {len(data)} items")
            if len(data) > 0:
                print(f"  First item keys: {list(data[0].keys()) if isinstance(data[0], dict) else 'N/A'}")
        elif isinstance(data, dict):
            print(f"  Type: Object with {len(data)} keys")
            print(f"  Keys: {list(data.keys())}")
        else:
            print(f"  Type: {type(data).__name__}")
            
        return True, data
        
    except FileNotFoundError:
        print(f"✗ {os.path.basename(file_path)} - File not found")
        return False, None
    except json.JSONDecodeError as e:
        print(f"✗ {os.path.basename(file_path)} - Invalid JSON: {e}")
        return False, None
    except Exception as e:
        print(f"✗ {os.path.basename(file_path)} - Error: {e}")
        return False, None

## 4. Verify All Downloaded Files

In [5]:
# Verify all downloaded files
print("Verifying downloaded JSON files...")
print("=" * 50)

expected_files = [
    "herolist.json",
    "item.json", 
    "summoner.json",
    "ming.json"
]

verification_results = {}
success_count = 0

for filename in expected_files:
    file_path = os.path.join(save_dir, filename)
    print(f"\n[{filename.upper()}]")
    success, data = verify_json_file(file_path)
    if success:
        success_count += 1
        verification_results[filename] = data

print("\n" + "=" * 50)
print(f"Verification Summary:")
print(f"Successfully verified: {success_count}/{len(expected_files)} files")

if success_count == len(expected_files):
    print("🎉 All files are valid!")
else:
    print(f"⚠ {len(expected_files) - success_count} files have issues")

Verifying downloaded JSON files...

[HEROLIST.JSON]
✓ herolist.json
  Size: 34,851 bytes
  Type: Array with 126 items
  First item keys: ['ename', 'cname', 'id_name', 'title', 'new_type', 'hero_type', 'skin_name', 'moss_id']

[ITEM.JSON]
✓ item.json
  Size: 40,596 bytes
  Type: Array with 114 items
  First item keys: ['item_id', 'item_name', 'item_type', 'price', 'total_price', 'des1']

[SUMMONER.JSON]
✓ summoner.json
  Size: 2,790 bytes
  Type: Array with 11 items
  First item keys: ['summoner_id', 'summoner_name', 'summoner_rank', 'summoner_description']

[MING.JSON]
✓ ming.json
  Size: 12,087 bytes
  Type: Array with 93 items
  First item keys: ['ming_id', 'ming_type', 'ming_grade', 'ming_name', 'ming_des']

Verification Summary:
Successfully verified: 4/4 files
🎉 All files are valid!


## 5. Data Exploration

In [6]:
# Display summary statistics for each dataset
print("📊 Data Summary")
print("=" * 50)

for filename, data in verification_results.items():
    if isinstance(data, list) and len(data) > 0:
        print(f"\n📁 {filename}")
        print(f"   Records: {len(data):,}")
        
        # Show first few records as DataFrame
        df = pd.DataFrame(data)
        print(f"   Columns: {list(df.columns)}")
        print(f"   Sample data:")
        display(df.head(3))
        
        # Show data types
        print(f"   Data types:")
        print(df.dtypes.to_string())

📊 Data Summary

📁 herolist.json
   Records: 126
   Columns: ['ename', 'cname', 'id_name', 'title', 'new_type', 'hero_type', 'skin_name', 'moss_id', 'hero_type2', 'pay_type', 'time', 'upgrade', 'm_bl_link']
   Sample data:


Unnamed: 0,ename,cname,id_name,title,new_type,hero_type,skin_name,moss_id,hero_type2,pay_type,time,upgrade,m_bl_link
0,105,廉颇,lianpo,正义爆轰,0,3,正义爆轰|地狱岩魂|无尽征程|寅虎·御盾|功夫炙烤|撼地雄心,3627,,,,,
1,106,小乔,xiaoqiao,恋之微风,0,2,恋之微风|万圣前夜|天鹅之梦|纯白花嫁|缤纷独角兽|丁香结|青蛇|音你心动|山海·琳琅生|时...,3644,,,,,
2,107,赵云,zhaoyun,苍天翔龙,0,1,苍天翔龙|忍●炎影|未来纪元|皇家上将|嘻哈天王|白执事|引擎之心|龙胆|淬星耀世|百木心枪...,3661,4.0,,,,


   Data types:
ename           int64
cname          object
id_name        object
title          object
new_type        int64
hero_type       int64
skin_name      object
moss_id         int64
hero_type2    float64
pay_type      float64
time           object
upgrade       float64
m_bl_link      object

📁 item.json
   Records: 114
   Columns: ['item_id', 'item_name', 'item_type', 'price', 'total_price', 'des1', 'des2']
   Sample data:


Unnamed: 0,item_id,item_name,item_type,price,total_price,des1,des2
0,1111,铁剑,1,165,275,<p>+20物理攻击</p>,
1,1112,匕首,1,180,300,<p>+10%攻击速度 </p>,
2,1113,搏击拳套,1,180,300,<p>+8%暴击率 </p>,


   Data types:
item_id         int64
item_name      object
item_type       int64
price           int64
total_price     int64
des1           object
des2           object

📁 summoner.json
   Records: 11
   Columns: ['summoner_id', 'summoner_name', 'summoner_rank', 'summoner_description']
   Sample data:


Unnamed: 0,summoner_id,summoner_name,summoner_rank,summoner_description
0,80104,惩击,LV.1解锁,30秒CD：对身边的野怪和小兵造成真1500点的实伤害并眩晕1秒
1,80108,终结,LV.3解锁,60秒CD：立即对身边敌军英雄造成其已损失生命值16%的真实伤害，如果成功击败敌人则减少90...
2,80110,狂暴,LV.5解锁,75秒CD：使用期间增加10%伤害、增加25%韧性、20%的物理吸血和30%法术吸血，持续7秒


   Data types:
summoner_id              int64
summoner_name           object
summoner_rank           object
summoner_description    object

📁 ming.json
   Records: 93
   Columns: ['ming_id', 'ming_type', 'ming_grade', 'ming_name', 'ming_des']
   Sample data:


Unnamed: 0,ming_id,ming_type,ming_grade,ming_name,ming_des
0,1501,red,5,圣人,<p>法术攻击力+5.3</p>
1,1503,red,5,传承,<p>物理攻击力+3.2</p>
2,1504,red,5,异变,<p>物理攻击力+2</p><p>物理穿透+3.6</p>


   Data types:
ming_id       object
ming_type     object
ming_grade    object
ming_name     object
ming_des      object


## 6. Quick Data Analysis

In [7]:
# Analyze hero data
if 'herolist.json' in verification_results:
    heroes_df = pd.DataFrame(verification_results['herolist.json'])
    
    print("🏆 Hero Analysis")
    print("=" * 30)
    print(f"Total Heroes: {len(heroes_df)}")
    
    if 'hero_type' in heroes_df.columns:
        print("\nHero Types:")
        hero_types = heroes_df['hero_type'].value_counts()
        display(hero_types)
    
    if 'new_type' in heroes_df.columns:
        print("\nNew Types:")
        new_types = heroes_df['new_type'].value_counts()
        display(new_types)

🏆 Hero Analysis
Total Heroes: 126

Hero Types:


hero_type
1    30
2    28
5    20
6    19
4    17
3    12
Name: count, dtype: int64


New Types:


new_type
0    125
1      1
Name: count, dtype: int64

In [8]:
# Analyze item data
if 'item.json' in verification_results:
    items_df = pd.DataFrame(verification_results['item.json'])
    
    print("⚔️ Item Analysis")
    print("=" * 30)
    print(f"Total Items: {len(items_df)}")
    
    if 'item_type' in items_df.columns:
        print("\nItem Types:")
        item_types = items_df['item_type'].value_counts()
        display(item_types)
    
    if 'price' in items_df.columns:
        print("\nPrice Statistics:")
        # Convert price to numeric, handling any non-numeric values
        try:
            prices = pd.to_numeric(items_df['price'], errors='coerce')
            print(f"Min Price: {prices.min()}")
            print(f"Max Price: {prices.max()}")
            print(f"Average Price: {prices.mean():.2f}")
        except:
            print("Price data not available in numeric format")

⚔️ Item Analysis
Total Items: 114

Item Types:


item_type
1    34
3    29
2    26
7    11
4     7
5     7
Name: count, dtype: int64


Price Statistics:
Min Price: 150
Max Price: 2004
Average Price: 912.11
