In [1]:
# vendor_scorecard.py
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt

class VendorAnalyticsEngine:
    def __init__(self, ner_results, post_metadata):
        """
        Initialize with:
        - ner_results: List of NER outputs (from your model)
        - post_metadata: List of dicts with post metadata (views, timestamps, etc.)
        """
        self.ner_results = ner_results
        self.post_metadata = post_metadata
        self.vendor_data = self._preprocess_data()
        
    def _preprocess_data(self):
        """Combine NER results with post metadata"""
        vendor_dict = defaultdict(lambda: {
            'posts': [],
            'products': [],
            'prices': [],
            'views': [],
            'dates': []
        })
        
        for post, metadata in zip(self.ner_results, self.post_metadata):
            vendor = metadata.get('vendor_name', 'unknown')
            
            # Extract entities
            products = [e['word'] for e in post if e['entity_group'] == 'PRODUCT']
            prices = [float(e['word'].replace(',', '')) 
                     for e in post if e['entity_group'] == 'PRICE' and e['word'].replace(',', '').isdigit()]
            
            vendor_dict[vendor]['posts'].append(post)
            vendor_dict[vendor]['products'].append(products)
            vendor_dict[vendor]['prices'].append(prices)
            vendor_dict[vendor]['views'].append(metadata.get('views', 0))
            vendor_dict[vendor]['dates'].append(metadata.get('date'))
            
        return dict(vendor_dict)
    
    def calculate_metrics(self, vendor_name):
        """Calculate all metrics for a single vendor"""
        if vendor_name not in self.vendor_data:
            raise ValueError(f"Vendor {vendor_name} not found in data")
            
        data = self.vendor_data[vendor_name]
        
        # Activity & Consistency
        posting_freq = self._calculate_posting_frequency(data['dates'])
        
        # Market Reach & Engagement
        avg_views = np.mean(data['views']) if data['views'] else 0
        top_post_idx = np.argmax(data['views']) if data['views'] else None
        top_post = {
            'views': data['views'][top_post_idx] if top_post_idx is not None else 0,
            'products': data['products'][top_post_idx] if top_post_idx is not None else [],
            'prices': data['prices'][top_post_idx] if top_post_idx is not None else []
        }
        
        # Business Profile
        avg_price = np.mean(data['prices']) if data['prices'] else 0
        
        return {
            'vendor': vendor_name,
            'posting_frequency': posting_freq,
            'avg_views': avg_views,
            'top_post': top_post,
            'avg_price': avg_price,
            'total_products': len(data['products']),
            'total_posts': len(data['posts'])
        }
    
    def _calculate_posting_frequency(self, dates):
        """Calculate posts per week"""
        if not dates:
            return 0
            
        # Convert to datetime objects if strings
        if isinstance(dates[0], str):
            dates = [datetime.strptime(d, '%Y-%m-%d %H:%M:%S') for d in dates]
        
        dates_sorted = sorted(dates)
        time_span = (dates_sorted[-1] - dates_sorted[0]).days
        
        if time_span == 0:
            return len(dates)  # All posts in same day
        
        weeks = time_span / 7
        return len(dates) / weeks
    
    def calculate_lending_score(self, metrics, weights=None):
        """Calculate composite lending score"""
        if weights is None:
            weights = {
                'avg_views': 0.5,
                'posting_frequency': 0.3,
                'avg_price': 0.2
            }
        
        # Normalize metrics
        max_views = max(m['avg_views'] for m in self.get_all_metrics()) or 1
        max_freq = max(m['posting_frequency'] for m in self.get_all_metrics()) or 1
        max_price = max(m['avg_price'] for m in self.get_all_metrics()) or 1
        
        normalized_views = metrics['avg_views'] / max_views
        normalized_freq = metrics['posting_frequency'] / max_freq
        normalized_price = metrics['avg_price'] / max_price
        
        score = (normalized_views * weights['avg_views'] +
                normalized_freq * weights['posting_frequency'] +
                normalized_price * weights['avg_price'])
        
        return min(100, score * 100)  # Scale to 100 max
    
    def get_all_metrics(self):
        """Calculate metrics for all vendors"""
        return [self.calculate_metrics(vendor) for vendor in self.vendor_data]
    
    def generate_scorecard_report(self, output_file="../reports/vendor_scorecard.html"):
        """Generate comprehensive HTML report"""
        metrics = self.get_all_metrics()
        
        # Calculate scores
        for m in metrics:
            m['lending_score'] = self.calculate_lending_score(m)
        
        # Create DataFrame for table
        df = pd.DataFrame([{
            'Vendor': m['vendor'],
            'Avg. Views/Post': round(m['avg_views'], 1),
            'Posts/Week': round(m['posting_frequency'], 1),
            'Avg. Price (ETB)': round(m['avg_price'], 2),
            'Total Products': m['total_products'],
            'Lending Score': round(m['lending_score'], 1)
        } for m in metrics])
        
        # Sort by score
        df = df.sort_values('Lending Score', ascending=False)
        
        # Generate HTML
        from jinja2 import Template
        
        template = Template("""
        <!DOCTYPE html>
        <html>
        <head>
            <title>Vendor Scorecard</title>
            <style>
                body { font-family: Arial, sans-serif; margin: 2em; }
                h1 { color: #2c3e50; }
                table { border-collapse: collapse; width: 100%; margin: 1em 0; }
                th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
                th { background-color: #3498db; color: white; }
                tr:nth-child(even) { background-color: #f2f2f2; }
                .score-high { background-color: #2ecc71 !important; color: white; }
                .score-medium { background-color: #f39c12 !important; color: white; }
                .score-low { background-color: #e74c3c !important; color: white; }
                .chart { margin: 2em 0; }
            </style>
        </head>
        <body>
            <h1>Vendor Scorecard for Micro-Lending</h1>
            <p>Generated on {{ date }}</p>
            
            <h2>Summary Statistics</h2>
            <p>Analyzed {{ vendor_count }} vendors with {{ total_posts }} total posts</p>
            
            <h2>Scorecard</h2>
            {{ table }}
            
            <div class="chart">
                <h3>Lending Score Distribution</h3>
                <img src="{{ score_dist_plot }}" alt="Score Distribution">
            </div>
            
            <h2>Top Performing Vendors</h2>
            {% for vendor in top_vendors %}
            <div style="margin-bottom: 2em;">
                <h3>{{ loop.index }}. {{ vendor.vendor }} (Score: {{ vendor.lending_score|round(1) }})</h3>
                <p>
                    <strong>Business Profile:</strong> {{ vendor.total_products }} products listed, 
                    average price {{ vendor.avg_price|round(2) }} ETB
                </p>
                <p>
                    <strong>Engagement:</strong> {{ vendor.avg_views|round(1) }} average views/post, 
                    {{ vendor.posting_frequency|round(1) }} posts/week
                </p>
                <p>
                    <strong>Top Post:</strong> {{ vendor.top_post.views }} views - 
                    Products: {{ vendor.top_post.products|join(', ') }},
                    Prices: {{ vendor.top_post.prices|join(', ') }} ETB
                </p>
            </div>
            {% endfor %}
        </body>
        </html>
        """)
        
        # Generate score distribution plot
        plt.figure(figsize=(10, 5))
        plt.hist([m['lending_score'] for m in metrics], bins=10, color='#3498db')
        plt.title('Lending Score Distribution')
        plt.xlabel('Score')
        plt.ylabel('Number of Vendors')
        plot_path = "../reports/score_distribution.png"
        plt.savefig(plot_path)
        plt.close()
        
        # Prepare data for template
        from datetime import datetime
        html = template.render(
            date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            vendor_count=len(metrics),
            total_posts=sum(m['total_posts'] for m in metrics),
            table=df.to_html(classes='dataframe', index=False),
            score_dist_plot=plot_path,
            top_vendors=sorted(metrics, key=lambda x: x['lending_score'], reverse=True)[:3]
        )
        
        with open(output_file, "w", encoding="utf-8") as f:
            f.write(html)
        
        return output_file

# Example Usage
if __name__ == "__main__":
    # Sample data - replace with your actual NER results and post metadata
    sample_ner_results = [
        [{'entity_group': 'PRODUCT', 'word': 'ልብስ'}, {'entity_group': 'PRICE', 'word': '500'}],
        [{'entity_group': 'PRODUCT', 'word': 'ሸሚዝ'}, {'entity_group': 'PRICE', 'word': '300'}]
    ]
    
    sample_metadata = [
        {'vendor_name': 'Vendor1', 'views': 1500, 'date': '2023-01-01 10:00:00'},
        {'vendor_name': 'Vendor1', 'views': 2500, 'date': '2023-01-05 15:00:00'}
    ]
    
    # Initialize analytics engine
    engine = VendorAnalyticsEngine(sample_ner_results, sample_metadata)
    
    # Generate scorecard report
    report_path = engine.generate_scorecard_report()
    print(f"Generated vendor scorecard report at: {report_path}")
    
    # Get metrics for a specific vendor
    vendor_metrics = engine.calculate_metrics("Vendor1")
    print("\nVendor Metrics:")
    print(vendor_metrics)
    
    # Calculate lending score


    score = engine.calculate_lending_score(vendor_metrics)
    print(f"\nLending Score: {score:.1f}")



Generated vendor scorecard report at: ../reports/vendor_scorecard.html

Vendor Metrics:
{'vendor': 'Vendor1', 'posting_frequency': 3.5, 'avg_views': np.float64(2000.0), 'top_post': {'views': 2500, 'products': ['ሸሚዝ'], 'prices': [300.0]}, 'avg_price': np.float64(400.0), 'total_products': 2, 'total_posts': 2}

Lending Score: 100.0
