In [35]:
# Cell 1 - Imports and Setup
import pandas as pd
import os
import json
import sys
from datetime import datetime
from pathlib import Path

# Print current working directory to verify location
print(f"Current working directory: {os.getcwd()}")

# Setup paths
NOTEBOOK_DIR = Path(os.getcwd())
REPO_ROOT = NOTEBOOK_DIR.parent.parent  # Go up two levels to reach repo root
DATA_DIR = NOTEBOOK_DIR.parent / 'data'
JSON_OUTPUT_DIR = DATA_DIR / 'json'

# Add repository root to Python path for protocol imports
sys.path.append(str(REPO_ROOT))

# Import scoring modules
from protocol.scoring.post_scorer import PostScorer
from protocol.scoring.miner_weights import MinerWeights

# Create output directory
JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Print paths for verification
print(f"Repository root: {REPO_ROOT}")
print(f"Data directory: {DATA_DIR}")
print(f"JSON output directory: {JSON_OUTPUT_DIR}")

Current working directory: /Users/brendanplayford/masa/agent-arena-subnet/notebooks/scoring
Repository root: /Users/brendanplayford/masa/agent-arena-subnet
Data directory: /Users/brendanplayford/masa/agent-arena-subnet/notebooks/data
JSON output directory: /Users/brendanplayford/masa/agent-arena-subnet/notebooks/data/json


In [36]:
def extract_posts_by_uid(uid: str, input_file: str) -> pd.DataFrame:
    """Extract all posts for a given user ID from a JSON file."""
    if not os.path.exists(input_file):
        raise FileNotFoundError(f"Input file not found: {input_file}")
    
    # Read the JSON file
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    # Filter posts by uid
    posts = []
    if isinstance(data, dict):
        if data['uid'] == uid:
            for tweet in data['tweets']:
                if 'Tweet' in tweet:
                    posts.append(tweet['Tweet'])
    elif isinstance(data, list):
        for post in data:
            if post['uid'] == uid:
                for tweet in post['tweets']:
                    if 'Tweet' in tweet:
                        posts.append(tweet['Tweet'])
    
    # Convert to DataFrame
    df = pd.DataFrame(posts)
    return df

In [44]:
uid = "227"  # Replace with your desired UID
input_file = str(DATA_DIR / 'posts.json')

try:
    user_posts = extract_posts_by_uid(uid, input_file)
    output_file = JSON_OUTPUT_DIR / f'user_{uid}_posts.json'
    user_posts.to_json(output_file, orient='records', indent=2)
    print(f"Saved {len(user_posts)} posts to {output_file}")
except FileNotFoundError as e:
    print(f"Error: {e}")
    print(f"Please ensure the posts.json file exists in: {DATA_DIR}")
except ValueError as e:
    print(f"Error: {e}")

Saved 37 posts to /Users/brendanplayford/masa/agent-arena-subnet/notebooks/data/json/user_227_posts.json


In [45]:
from datetime import datetime
import json
import math
from pathlib import Path
from protocol.scoring.post_scorer import PostScorer
from protocol.scoring.miner_weights import MinerWeights

def score_user_posts(uid: str, user_posts_file: Path):
    """Score posts for a specific user."""
    # Initialize scorers
    post_scorer = PostScorer()
    miner_weights = MinerWeights(post_scorer)
    
    # Load the specific user's posts
    with open(user_posts_file, 'r') as f:
        tweets = json.load(f)
    
    # Format data for scoring
    formatted_post = {
        'uid': uid,
        'user_id': tweets[0]['UserID'],
        'subnet_id': 59,
        'created_at': int(datetime.now().timestamp()),
        'tweets': [{'Tweet': tweet} for tweet in tweets]
    }
    
    # Score the posts
    scored_posts = post_scorer.score_posts([formatted_post])
    
    # Calculate miner weights
    uids, weights = miner_weights.calculate_weights(scored_posts)
    
    # Detailed scoring breakdown
    print("\n=== Scoring Breakdown ===")
    print(f"UID: {uid}")
    print(f"\nIndividual Tweet Scores:")
    print("-" * 80)
    print(f"{'Tweet ID':<20} {'Engagement':<12} {'Interaction':<12} {'Final Score':<12}")
    print("-" * 80)
    
    for tweet in formatted_post['tweets']:
        tweet_data = tweet['Tweet']
        engagement_score = post_scorer.calculate_engagement_score(tweet_data)
        interaction_score = post_scorer.calculate_interaction_score(tweet_data)
        final_score = post_scorer.calculate_tweet_score(tweet_data)
        
        print(f"{tweet_data['ID']:<20} {engagement_score:.4f}     {interaction_score:.4f}     {final_score:.4f}")
    
    print("\n=== Weight Calculation ===")
    total_tweets = len(tweets)
    avg_score = sum(s['score'] for s in scored_posts[0]['scores']) / total_tweets
    volume_bonus = math.log1p(total_tweets) / 10
    final_weight = weights[0]
    
    print(f"Total Tweets: {total_tweets}")
    print(f"Average Score: {avg_score:.4f}")
    print(f"Volume Bonus: {volume_bonus:.4f}")
    print(f"Final Weight: {final_weight:.4f}")
    
    return {
        'scored_posts': scored_posts,
        'uids': uids,
        'weights': weights,
        'detailed_scores': {
            'total_tweets': total_tweets,
            'average_score': avg_score,
            'volume_bonus': volume_bonus,
            'final_weight': final_weight
        }
    }

# Score the posts
uid = "227"
output_file = JSON_OUTPUT_DIR / f'user_{uid}_posts.json'

try:
    results = score_user_posts(uid, output_file)
except Exception as e:
    print(f"Error processing posts: {str(e)}")
    raise


=== Scoring Breakdown ===
UID: 227

Individual Tweet Scores:
--------------------------------------------------------------------------------
Tweet ID             Engagement   Interaction  Final Score 
--------------------------------------------------------------------------------
1871534055566835724  0.0596     0.6667     0.2417
1871534042145010090  0.0062     0.5000     0.1543
1871533993029665056  0.0640     0.6667     0.2448
1871533975203893509  0.0054     0.3333     0.1038
1871533959433318521  0.0059     0.3333     0.1042
1871533927888044113  0.1241     0.5000     0.2369
1871533914696880146  0.0067     0.3333     0.1047
1871533827144954184  0.0052     0.3333     0.1036
1871533811240239412  0.0062     0.3333     0.1043
1871533770547110240  0.0035     0.3333     0.1024
1872787544179695992  0.0022     0.6667     0.2016
1872677810160140630  0.0025     0.5000     0.1517
1872677797816320346  0.0027     0.5000     0.1519
1872603892539941095  0.0045     0.6667     0.2031
1872463531007975

In [42]:
def process_subnet_posts(subnet_id: int, input_file: Path):
    """Process and score all posts for a given subnet."""
    # Load and verify coverage first
    with open(input_file, 'r') as f:
        all_posts = json.load(f)
    
    # Coverage analysis
    user_stats = {}
    for post in all_posts:
        if post.get('subnet_id') != subnet_id:
            continue
            
        uid = post['uid']
        tweets = [tweet['Tweet'] for tweet in post.get('tweets', []) if 'Tweet' in tweet]
        user_stats[uid] = {
            'total_tweets_claimed': post.get('count', 0),
            'tweets_found': len(tweets),
            'query': post.get('query', '')
        }
    
    print("\n=== Tweet Coverage Analysis ===")
    print(f"{'UID':<10} {'Claimed':<10} {'Found':<10} {'Coverage %':<10}")
    print("-" * 50)
    
    for uid, stats in sorted(user_stats.items()):
        coverage = (stats['tweets_found'] / stats['total_tweets_claimed'] * 100) if stats['total_tweets_claimed'] > 0 else 0
        print(f"{uid:<10} {stats['total_tweets_claimed']:<10} {stats['tweets_found']:<10} {coverage:.1f}%")
        if coverage < 100:
            print(f"  Query: {stats['query']}")
    
    # Process each post
    results = {}
    for post in all_posts:
        if post.get('subnet_id') != subnet_id:
            continue
            
        uid = post['uid']
        tweets = [tweet['Tweet'] for tweet in post.get('tweets', []) if 'Tweet' in tweet]
        
        if not tweets:
            print(f"No valid tweets found for UID {uid}")
            continue
            
        # Format data for scoring
        formatted_post = {
            'uid': uid,
            'user_id': tweets[0].get('UserID'),
            'subnet_id': subnet_id,
            'created_at': post.get('created_at'),
            'tweets': [{'Tweet': tweet} for tweet in tweets]
        }
        
        try:
            # Score the posts
            post_scorer = PostScorer()
            miner_weights = MinerWeights(post_scorer)
            
            scored_posts = post_scorer.score_posts([formatted_post])
            uids, weights = miner_weights.calculate_weights(scored_posts)
            
            # Store results with tweet details
            results[uid] = {
                'total_tweets': len(tweets),
                'tweets_claimed': post.get('count', 0),
                'average_score': scored_posts[0]['average_score'],
                'volume_bonus': math.log1p(len(tweets)) / 10,
                'final_weight': weights[0],
                'tweets': [{
                    'id': tweet['ID'],
                    'text': tweet['Text'][:100] + '...' if len(tweet['Text']) > 100 else tweet['Text'],
                    'engagement': {
                        'likes': tweet.get('Likes', 0),
                        'replies': tweet.get('Replies', 0),
                        'retweets': tweet.get('Retweets', 0),
                        'views': tweet.get('Views', 0)
                    }
                } for tweet in tweets]
            }
            
        except Exception as e:
            print(f"Error processing UID {uid}: {str(e)}")
            continue
    
    # Print detailed summary
    print("\n=== Subnet Scoring Summary ===")
    print(f"Subnet ID: {subnet_id}")
    print(f"Total UIDs processed: {len(results)}")
    print("\nDetailed Scoring Breakdown:")
    print("=" * 100)
    
    for uid, data in sorted(results.items()):
        print(f"\nUID: {uid}")
        print(f"Total Tweets Found: {data['total_tweets']} (Claimed: {data['tweets_claimed']})")
        print(f"Average Score: {data['average_score']:.4f}")
        print(f"Volume Bonus: {data['volume_bonus']:.4f}")
        print(f"Final Weight: {data['final_weight']:.4f}")
        print("\nTweets:")
        for tweet in data['tweets']:
            print("-" * 80)
            print(f"ID: {tweet['id']}")
            print(f"Text: {tweet['text']}")
            print(f"Engagement: Likes={tweet['engagement']['likes']}, "
                  f"Replies={tweet['engagement']['replies']}, "
                  f"Retweets={tweet['engagement']['retweets']}, "
                  f"Views={tweet['engagement']['views']}")
        print("=" * 100)
    
    return results

# Process subnet 59
input_file = DATA_DIR / 'posts.json'
subnet_results = process_subnet_posts(59, input_file)


=== Tweet Coverage Analysis ===
UID        Claimed    Found      Coverage %
--------------------------------------------------
100        3          1          33.3%
  Query: (@Maina20950800) since:2025-01-03
102        3          1          33.3%
  Query: (from:pham_thuong97) since:2025-01-03
103        3          1          33.3%
  Query: (from:ERICKMORA13) since:2025-01-03
104        3          1          33.3%
  Query: (from:wangcoco12345) since:2025-01-03
105        3          1          33.3%
  Query: (from:jexchangemining) since:2025-01-03
106        3          1          33.3%
  Query: (from:ProSib) since:2024-12-31
107        3          1          33.3%
  Query: (from:atlascryptodump) since:2024-12-31
108        3          1          33.3%
  Query: (from:jeacryptomarket) since:2024-12-31
109        3          1          33.3%
  Query: (from:Mohammed_A_250) since:2025-01-03
11         3          1          33.3%
  Query: (from:tuyetnga28986) since:2025-01-03
110        3      