In [None]:
# task_6_vendor_scorecard.py

import pandas as pd
import numpy as np
from datetime import datetime

# Load Telegram vendor posts data
posts_df = pd.read_csv("vendor_posts.csv") 

# Load NER-extracted data (product name, price, etc.)
entities_df = pd.read_csv("ner_extracted_entities.csv")  

# Merge post metadata and NER results
merged_df = posts_df.merge(entities_df, on="vendor_id")

# Convert timestamp to datetime
merged_df['timestamp'] = pd.to_datetime(merged_df['timestamp'])

# Calculate weekly posting frequency
merged_df['week'] = merged_df['timestamp'].dt.isocalendar().week

vendor_scores = []
for vendor_id, group in merged_df.groupby("vendor_id"):
    posts_per_week = group.groupby("week").size().mean()
    avg_views = group['views'].mean()
    avg_price = group['price'].mean()
    top_post = group.loc[group['views'].idxmax()]

    lending_score = (avg_views * 0.5) + (posts_per_week * 0.5)

    vendor_scores.append({
        "vendor_id": vendor_id,
        "Avg. Views/Post": round(avg_views, 2),
        "Posts/Week": round(posts_per_week, 2),
        "Avg. Price (ETB)": round(avg_price, 2),
        "Top Product": top_post['product'],
        "Top Price": top_post['price'],
        "Lending Score": round(lending_score, 2)
    })

scorecard_df = pd.DataFrame(vendor_scores)
scorecard_df = scorecard_df.sort_values("Lending Score", ascending=False)

# Save final scorecard
scorecard_df.to_csv("vendor_lending_scorecard.csv", index=False)
print("Vendor lending scorecard generated.")
