In [31]:
# Cell 1 - Imports and Setup
import pandas as pd
import os
import json
import sys
from datetime import datetime
from pathlib import Path

# Print current working directory to verify location
print(f"Current working directory: {os.getcwd()}")

# Setup paths
NOTEBOOK_DIR = Path(os.getcwd())
REPO_ROOT = NOTEBOOK_DIR.parent.parent  # Go up two levels to reach repo root
DATA_DIR = NOTEBOOK_DIR.parent / 'data'
JSON_OUTPUT_DIR = DATA_DIR / 'json'

# Add repository root to Python path for protocol imports
sys.path.append(str(REPO_ROOT))

# Import scoring modules
from protocol.scoring.post_scorer import PostScorer
from protocol.scoring.miner_weights import MinerWeights

# Create output directory
JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Print paths for verification
print(f"Repository root: {REPO_ROOT}")
print(f"Data directory: {DATA_DIR}")
print(f"JSON output directory: {JSON_OUTPUT_DIR}")

Current working directory: /Users/brendanplayford/masa/agent-arena-subnet/notebooks/scoring
Repository root: /Users/brendanplayford/masa/agent-arena-subnet
Data directory: /Users/brendanplayford/masa/agent-arena-subnet/notebooks/data
JSON output directory: /Users/brendanplayford/masa/agent-arena-subnet/notebooks/data/json


In [32]:
def extract_posts_by_uid(uid: str, input_file: str) -> pd.DataFrame:
    """Extract all posts for a given user ID from a JSON file."""
    if not os.path.exists(input_file):
        raise FileNotFoundError(f"Input file not found: {input_file}")
    
    # Read the JSON file
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    # Filter posts by uid
    posts = []
    if isinstance(data, dict):
        if data['uid'] == uid:
            for tweet in data['tweets']:
                if 'Tweet' in tweet:
                    posts.append(tweet['Tweet'])
    elif isinstance(data, list):
        for post in data:
            if post['uid'] == uid:
                for tweet in post['tweets']:
                    if 'Tweet' in tweet:
                        posts.append(tweet['Tweet'])
    
    # Convert to DataFrame
    df = pd.DataFrame(posts)
    return df

In [33]:
uid = "99"  # Replace with your desired UID
input_file = str(DATA_DIR / 'posts.json')

try:
    user_posts = extract_posts_by_uid(uid, input_file)
    output_file = JSON_OUTPUT_DIR / f'user_{uid}_posts.json'
    user_posts.to_json(output_file, orient='records', indent=2)
    print(f"Saved {len(user_posts)} posts to {output_file}")
except FileNotFoundError as e:
    print(f"Error: {e}")
    print(f"Please ensure the posts.json file exists in: {DATA_DIR}")
except ValueError as e:
    print(f"Error: {e}")

Saved 38 posts to /Users/brendanplayford/masa/agent-arena-subnet/notebooks/data/json/user_99_posts.json


In [34]:
from datetime import datetime
import json
import math
from pathlib import Path
from protocol.scoring.post_scorer import PostScorer
from protocol.scoring.miner_weights import MinerWeights

def score_user_posts(uid: str, user_posts_file: Path):
    """Score posts for a specific user."""
    # Initialize scorers
    post_scorer = PostScorer()
    miner_weights = MinerWeights(post_scorer)
    
    # Load the specific user's posts
    with open(user_posts_file, 'r') as f:
        tweets = json.load(f)
    
    # Format data for scoring
    formatted_post = {
        'uid': uid,
        'user_id': tweets[0]['UserID'],
        'subnet_id': 59,
        'created_at': int(datetime.now().timestamp()),
        'tweets': [{'Tweet': tweet} for tweet in tweets]
    }
    
    # Score the posts
    scored_posts = post_scorer.score_posts([formatted_post])
    
    # Calculate miner weights
    uids, weights = miner_weights.calculate_weights(scored_posts)
    
    # Detailed scoring breakdown
    print("\n=== Scoring Breakdown ===")
    print(f"UID: {uid}")
    print(f"\nIndividual Tweet Scores:")
    print("-" * 80)
    print(f"{'Tweet ID':<20} {'Engagement':<12} {'Interaction':<12} {'Final Score':<12}")
    print("-" * 80)
    
    for tweet in formatted_post['tweets']:
        tweet_data = tweet['Tweet']
        engagement_score = post_scorer.calculate_engagement_score(tweet_data)
        interaction_score = post_scorer.calculate_interaction_score(tweet_data)
        final_score = post_scorer.calculate_tweet_score(tweet_data)
        
        print(f"{tweet_data['ID']:<20} {engagement_score:.4f}     {interaction_score:.4f}     {final_score:.4f}")
    
    print("\n=== Weight Calculation ===")
    total_tweets = len(tweets)
    avg_score = sum(s['score'] for s in scored_posts[0]['scores']) / total_tweets
    volume_bonus = math.log1p(total_tweets) / 10
    final_weight = weights[0]
    
    print(f"Total Tweets: {total_tweets}")
    print(f"Average Score: {avg_score:.4f}")
    print(f"Volume Bonus: {volume_bonus:.4f}")
    print(f"Final Weight: {final_weight:.4f}")
    
    return {
        'scored_posts': scored_posts,
        'uids': uids,
        'weights': weights,
        'detailed_scores': {
            'total_tweets': total_tweets,
            'average_score': avg_score,
            'volume_bonus': volume_bonus,
            'final_weight': final_weight
        }
    }

# Score the posts
uid = "99"
output_file = JSON_OUTPUT_DIR / f'user_{uid}_posts.json'

try:
    results = score_user_posts(uid, output_file)
except Exception as e:
    print(f"Error processing posts: {str(e)}")
    raise

Number of tweets to score: 38
First tweet structure example:
{
  "Tweet": {
    "ConversationID": "1869412631389340101",
    "GIFs": null,
    "Hashtags": [
      "KnowYourWorth",
      "DatingDisasters"
    ],
    "HTML": "Just had lunch with a guy who suggested splitting the bill 50/50! \ud83d\ude44 Why do men think this is okay? Ladies, if he's not willing to treat you, he's not worth your time. Society needs to stop applauding this nonsense! <a href=\"https://twitter.com/hashtag/KnowYourWorth\">#KnowYourWorth</a> <a href=\"https://twitter.com/hashtag/DatingDisasters\">#DatingDisasters</a>",
    "ID": "1869412631389340101",
    "InReplyToStatus": null,
    "InReplyToStatusID": "",
    "IsQuoted": false,
    "IsPin": false,
    "IsReply": false,
    "IsRetweet": false,
    "IsSelfThread": false,
    "Likes": 0,
    "Name": "Carlie",
    "Mentions": null,
    "PermanentURL": "https://twitter.com/CarlieGirlie2/status/1869412631389340101",
    "Photos": null,
    "Place": null,
    "Quo