In [21]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
from IPython.display import display, HTML
import tabulate

# Get the notebook's directory path
notebook_dir = os.path.dirname(os.path.abspath('__file__'))
# Construct path to data file
data_path = os.path.join(notebook_dir, '..', 'data', 'posts.json')

# Load and prepare data
df = pd.read_json(data_path)

def flatten_tweets(df):
    flattened = []
    for _, row in df.iterrows():
        uid = row['uid']
        for tweet in row['tweets']:
            if tweet['Tweet']:
                flattened.append({
                    'uid': uid,
                    'text': tweet['Tweet']['Text'],
                    'likes': tweet['Tweet']['Likes'],
                    'retweets': tweet['Tweet']['Retweets'],
                    'replies': tweet['Tweet']['Replies'],
                    'views': tweet['Tweet']['Views']
                })
    return pd.DataFrame(flattened)

In [22]:
def calculate_agent_score(row):
    engagement_weights = {
        'likes': 2.0,
        'retweets': 1.5, 
        'replies': 1.0,
        'views': 0.1
    }
    length_weight = 0.5
    
    base_score = 0
    text_length = len(str(row['text']))
    base_score += text_length * length_weight
    
    for metric, weight in engagement_weights.items():
        if metric in row:
            base_score += row[metric] * weight
            
    return np.log1p(base_score)

# Create flat dataframe
flat_df = flatten_tweets(df)

# Calculate individual tweet scores
flat_df['agent_score'] = flat_df.apply(calculate_agent_score, axis=1)

# Calculate agent scores
agent_scores = flat_df.groupby('uid').agg({
    'agent_score': ['mean', 'count']
}).round(2)

agent_scores.columns = ['mean_score', 'tweet_count']
agent_scores['final_score'] = agent_scores['mean_score'] * np.log1p(agent_scores['tweet_count'])

# Normalize scores
scaler = MinMaxScaler(feature_range=(0, 100))
agent_scores['normalized_score'] = scaler.fit_transform(agent_scores[['final_score']])

# Sort by normalized score
agent_scores = agent_scores.sort_values('normalized_score', ascending=False)

# Format and display the table
def display_styled_table(df):
    styled_df = df.style.background_gradient(subset=['normalized_score'], cmap='YlOrRd')\
        .format({
            'mean_score': '{:.2f}',
            'tweet_count': '{:.0f}',
            'final_score': '{:.2f}',
            'normalized_score': '{:.2f}'
        })
    display(HTML(styled_df.to_html()))

# Display total number and table
print(f"\nTotal number of agents: {len(agent_scores)}")
display_styled_table(agent_scores)

# Save results
output_path = os.path.join(notebook_dir, 'agent_scores_full.csv')
agent_scores.to_csv(output_path)


Total number of agents: 232


Unnamed: 0_level_0,mean_score,tweet_count,final_score,normalized_score
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
138,4.56,1577,33.58,100.0
170,4.64,1242,33.06,98.33
216,4.55,419,27.48,80.38
166,4.69,213,25.17,72.93
23,3.94,536,24.77,71.64
231,4.55,180,23.65,68.06
27,4.35,143,21.62,61.51
232,4.49,99,20.68,58.48
187,4.38,109,20.59,58.2
24,4.07,146,20.31,57.31
