# 🎯 Lead Scoring & Prioritization Tool

This notebook demonstrates a **Lead Scoring & Prioritization Tool** that evaluates potential business leads based on multiple weighted factors like:

- Company size  
- Revenue  
- Data completeness (email, phone, LinkedIn)  
- Engagement readiness  
- Title relevance  


It also validates emails and phone numbers and categorizes leads into Hot, Warm, or Cold.
---


In [1]:
import sys
import os
import random
import argparse
import re
from datetime import datetime, timedelta

try:
    import streamlit as st
    STREAMLIT_AVAILABLE = True
except Exception:
    STREAMLIT_AVAILABLE = False

import pandas as pd
import numpy as np
import unittest


In [2]:
# Hide Streamlit Deploy/Toolbar/Status UI when running in app mode
if STREAMLIT_AVAILABLE:
    hide_streamlit_style = """
        <style>
        #MainMenu {visibility: hidden;}
        footer {visibility: hidden;}
        header {visibility: hidden;}
        [data-testid="stToolbar"] {visibility: hidden !important;}
        [data-testid="stDeployButton"] {display: none !important;}
        [data-testid="stStatusWidget"] {display: none !important;}
        </style>
    """
    st.markdown(hide_streamlit_style, unsafe_allow_html=True)


2025-10-09 13:01:08.099 
  command:

    streamlit run /home/mahi/venv/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]


In [3]:
def validate_email(email):
    if not email:
        return False
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return bool(re.match(pattern, email))

def validate_phone(phone):
    if not phone:
        return False
    digits = re.sub(r'\D', '', phone)
    return len(digits) == 10

def generate_valid_email(company_name, person_id):
    clean_company = re.sub(r'[^a-zA-Z0-9]', '', company_name.lower())
    return f"lead{person_id}@{clean_company}.com"

def generate_valid_phone():
    area_code = random.randint(200, 999)
    exchange = random.randint(200, 999)
    number = random.randint(1000, 9999)
    return f"+1 ({area_code}) {exchange}-{number}"


In [4]:
def generate_sample_leads(n=50, seed=42):
    rng = random.Random(seed)
    np.random.seed(seed)
    companies = [f"Company {i+1}" for i in range(n)]
    cities = ["New York", "San Francisco", "Chicago", "Austin", "Boston", "Seattle", "Denver"]
    leads_data = []

    for i in range(n):
        has_email = rng.random() > 0.15
        has_phone = rng.random() > 0.30
        has_linkedin = rng.random() > 0.25
        employees = rng.choice([10, 25, 50, 100, 250, 500, 1000])
        revenue = employees * (40000 + rng.uniform(20000, 120000))
        recent_funding = rng.random() > 0.65
        location = rng.choice(cities)

        email = generate_valid_email(companies[i], i+1) if has_email else None
        if email and not validate_email(email):
            email = None

        phone = generate_valid_phone() if has_phone else None
        if phone and not validate_phone(phone):
            phone = None

        lead = {
            'company_name': companies[i],
            'contact_name': f"Person {i+1}",
            'title': rng.choice(['CEO', 'VP Sales', 'CTO', 'Marketing Manager', 'COO']),
            'email': email,
            'phone': phone,
            'linkedin_url': f"linkedin.com/in/person{i+1}" if has_linkedin else None,
            'company_size': employees,
            'estimated_revenue': revenue,
            'recent_funding': recent_funding,
            'location': location
        }
        leads_data.append(lead)

    return pd.DataFrame(leads_data)


In [5]:
def calculate_lead_score(row, weights, rng=None, size_thresholds=None, revenue_thresholds=None):
    if rng is None:
        _rand = random.random
    else:
        _rand = rng.random

    if size_thresholds is None:
        size_thresholds = {'large': 1000, 'medium': 500, 'small': 100}
    if revenue_thresholds is None:
        revenue_thresholds = {'high': 50, 'medium': 20, 'low': 5}

    score = 0.0
    factors = []

    # Company Size
    if row['company_size'] >= size_thresholds['large']:
        pts = 25
    elif row['company_size'] >= size_thresholds['medium']:
        pts = 20
    elif row['company_size'] >= size_thresholds['small']:
        pts = 15
    else:
        pts = 10
    score += pts * weights.get('company_size', 1.0)
    factors.append(("Company Size", pts * weights.get('company_size', 1.0)))

    # Revenue
    rev_m = row['estimated_revenue'] / 1_000_000
    if rev_m >= revenue_thresholds['high']:
        pts = 25
    elif rev_m >= revenue_thresholds['medium']:
        pts = 20
    elif rev_m >= revenue_thresholds['low']:
        pts = 15
    else:
        pts = 10
    score += pts * weights.get('revenue', 1.0)
    factors.append(("Revenue", pts * weights.get('revenue', 1.0)))

    # Data Completeness
    comp = 0
    if row.get('email') and validate_email(row.get('email')):
        comp += 8
    if row.get('phone') and validate_phone(row.get('phone')):
        comp += 7
    if row.get('linkedin_url'):
        comp += 5
    score += comp * weights.get('data', 1.0)
    factors.append(("Data Completeness", comp * weights.get('data', 1.0)))

    # Engagement
    pts = 10 if row.get('recent_funding') else 0
    if _rand() > 0.5:
        pts += 5
    score += pts * weights.get('engagement', 1.0)
    factors.append(("Engagement", pts * weights.get('engagement', 1.0)))

    # Title Relevance
    if row.get('title') in ['CEO', 'CTO', 'COO', 'VP Sales', 'Chief Revenue Officer']:
        pts = 15
    else:
        pts = 8
    score += pts * weights.get('title', 1.0)
    factors.append(("Title", pts * weights.get('title', 1.0)))

    return min(100, round(score, 2)), factors


In [6]:
def calculate_confidence(row):
    c = 0
    if row.get('email') and validate_email(row.get('email')):
        c += 33
    if row.get('phone') and validate_phone(row.get('phone')):
        c += 33
    if row.get('linkedin_url'):
        c += 34
    return min(100, int(c))


In [7]:
def run_console_demo(n=50, seed=42, weights=None, out_dir='/mnt/data', size_thresholds=None, revenue_thresholds=None):
    print("Streamlit not available — running console demo.")
    df = generate_sample_leads(n=n, seed=seed)

    if weights is None:
        weights = {'company_size': 1.0, 'revenue': 1.0, 'data': 1.0, 'engagement': 1.0, 'title': 1.0}

    rng = random.Random(seed)
    results = df.apply(lambda row: calculate_lead_score(row, weights, rng, size_thresholds, revenue_thresholds), axis=1)
    df['lead_score'] = results.apply(lambda x: x[0])
    df['factors'] = results.apply(lambda x: x[1])
    df['confidence'] = df.apply(calculate_confidence, axis=1)
    df['category'] = df['lead_score'].apply(lambda s: 'Hot' if s >= 70 else 'Warm' if s >= 40 else 'Cold')

    df['email_valid'] = df['email'].apply(lambda x: '✅' if validate_email(x) else '❌')
    df['phone_valid'] = df['phone'].apply(lambda x: '✅' if validate_phone(x) else '❌')

    print(f"\nGenerated {len(df)} leads. Hot: {len(df[df['category']=='Hot'])}, Warm: {len(df[df['category']=='Warm'])}, Cold: {len(df[df['category']=='Cold'])}")
    print("\nTop 10 leads:")
    print(df[['company_name', 'contact_name', 'title', 'email_valid', 'phone_valid', 'lead_score', 'confidence', 'category', 'location']]
          .sort_values('lead_score', ascending=False).head(10).to_string(index=False))


In [8]:
run_console_demo(n=15, seed=101)


Streamlit not available — running console demo.

Generated 15 leads. Hot: 2, Warm: 12, Cold: 1

Top 10 leads:
company_name contact_name             title email_valid phone_valid  lead_score  confidence category      location
   Company 9     Person 9               CEO           ✅           ❌        83.0          67      Hot      New York
   Company 8     Person 8               COO           ✅           ❌        73.0          67      Hot        Denver
   Company 1     Person 1               COO           ✅           ❌        68.0          67     Warm San Francisco
  Company 14    Person 14          VP Sales           ✅           ❌        68.0          67     Warm       Chicago
  Company 10    Person 10               COO           ✅           ❌        63.0          67     Warm       Seattle
   Company 4     Person 4               CTO           ✅           ❌        63.0          67     Warm       Seattle
  Company 11    Person 11               COO           ✅           ❌        58.0      

To run the full interactive Streamlit UI version of this tool:

```bash
!streamlit run leadscore.py
