# 🎯 Lead Scoring & Prioritization Tool
This notebook demonstrates a multi-factor lead scoring system that evaluates leads based on:
- Company size
- Estimated revenue
- Data completeness (email, phone, LinkedIn)
- Engagement readiness (recent funding)
- Contact title relevance

It also validates emails and phone numbers and categorizes leads into **Hot, Warm, or Cold**.


In [1]:
import random
import re
from datetime import datetime
import pandas as pd
import numpy as np


### 🔹 Email and Phone Validation
Functions to check:
- Valid email addresses
- Valid phone numbers (10 digits)
- Generate placeholder valid emails and phone numbers


In [2]:
def validate_email(email):
    if not email:
        return False
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return bool(re.match(pattern, email))

def validate_phone(phone):
    if not phone:
        return False
    digits = re.sub(r'\D', '', phone)
    return len(digits) == 10

def generate_valid_email(company_name, person_id):
    clean_company = re.sub(r'[^a-zA-Z0-9]', '', company_name.lower())
    return f"lead{person_id}@{clean_company}.com"

def generate_valid_phone():
    area_code = random.randint(200, 999)
    exchange = random.randint(200, 999)
    number = random.randint(1000, 9999)
    return f"+1 ({area_code}) {exchange}-{number}"


### 🔹 Sample Lead Data
We generate a sample dataset with:
- Company Name
- Contact Name & Title
- Email, Phone, LinkedIn
- Company Size
- Estimated Revenue
- Recent Funding
- Location


In [3]:
def generate_sample_leads(n=50, seed=42):
    rng = random.Random(seed)
    np.random.seed(seed)
    companies = [f"Company {i+1}" for i in range(n)]
    cities = ["New York", "San Francisco", "Chicago", "Austin", "Boston", "Seattle", "Denver"]
    leads_data = []

    for i in range(n):
        has_email = rng.random() > 0.15
        has_phone = rng.random() > 0.30
        has_linkedin = rng.random() > 0.25
        employees = rng.choice([10, 25, 50, 100, 250, 500, 1000])
        revenue = employees * (40000 + rng.uniform(20000, 120000))
        recent_funding = rng.random() > 0.65
        location = rng.choice(cities)

        email = generate_valid_email(companies[i], i+1) if has_email else None
        phone = generate_valid_phone() if has_phone else None
        linkedin = f"linkedin.com/in/person{i+1}" if has_linkedin else None

        leads_data.append({
            'company_name': companies[i],
            'contact_name': f"Person {i+1}",
            'title': rng.choice(['CEO', 'VP Sales', 'CTO', 'Marketing Manager', 'COO']),
            'email': email,
            'phone': phone,
            'linkedin_url': linkedin,
            'company_size': employees,
            'estimated_revenue': revenue,
            'recent_funding': recent_funding,
            'location': location
        })

    return pd.DataFrame(leads_data)

# Generate leads
df = generate_sample_leads(50)
df.head(5)


Unnamed: 0,company_name,contact_name,title,email,phone,linkedin_url,company_size,estimated_revenue,recent_funding,location
0,Company 1,Person 1,COO,lead1@company1.com,,linkedin.com/in/person1,25,1848845.0,False,Seattle
1,Company 2,Person 2,VP Sales,,+1 (483) 670-9467,,25,2081652.0,False,Boston
2,Company 3,Person 3,VP Sales,lead3@company3.com,+1 (731) 713-9873,linkedin.com/in/person3,100,11892660.0,True,New York
3,Company 4,Person 4,CTO,lead4@company4.com,+1 (414) 322-5999,,1000,93659450.0,False,New York
4,Company 5,Person 5,CEO,lead5@company5.com,+1 (427) 825-3155,linkedin.com/in/person5,500,52971240.0,False,Austin


### 🔹 Lead Scoring
Score each lead (0–100) based on:
- Company size (0–25)
- Revenue (0–25)
- Data completeness (0–20)
- Engagement readiness (0–15)
- Title relevance (0–15)


In [4]:
def calculate_lead_score(row, weights=None, rng=None, size_thresholds=None, revenue_thresholds=None):
    if rng is None:
        _rand = random.random
    else:
        _rand = rng.random

    if weights is None:
        weights = {'company_size': 1.0, 'revenue': 1.0, 'data': 1.0, 'engagement': 1.0, 'title': 1.0}

    if size_thresholds is None:
        size_thresholds = {'large': 1000, 'medium': 500, 'small': 100}
    if revenue_thresholds is None:
        revenue_thresholds = {'high': 50, 'medium': 20, 'low': 5}

    score = 0
    factors = []

    # Company size
    if row['company_size'] >= size_thresholds['large']: pts = 25
    elif row['company_size'] >= size_thresholds['medium']: pts = 20
    elif row['company_size'] >= size_thresholds['small']: pts = 15
    else: pts = 10
    score += pts * weights['company_size']
    factors.append(("Company Size", pts * weights['company_size']))

    # Revenue
    rev_m = row['estimated_revenue'] / 1_000_000
    if rev_m >= revenue_thresholds['high']: pts = 25
    elif rev_m >= revenue_thresholds['medium']: pts = 20
    elif rev_m >= revenue_thresholds['low']: pts = 15
    else: pts = 10
    score += pts * weights['revenue']
    factors.append(("Revenue", pts * weights['revenue']))

    # Data completeness
    comp = 0
    if row.get('email') and validate_email(row['email']): comp += 8
    if row.get('phone') and validate_phone(row['phone']): comp += 7
    if row.get('linkedin_url'): comp += 5
    score += comp * weights['data']
    factors.append(("Data Completeness", comp * weights['data']))

    # Engagement readiness
    pts = 10 if row['recent_funding'] else 0
    if _rand() > 0.5: pts += 5
    score += pts * weights['engagement']
    factors.append(("Engagement", pts * weights['engagement']))

    # Title relevance
    pts = 15 if row['title'] in ['CEO', 'CTO', 'COO', 'VP Sales', 'Chief Revenue Officer'] else 8
    score += pts * weights['title']
    factors.append(("Title", pts * weights['title']))

    return min(100, round(score,2)), factors

def calculate_confidence(row):
    c = 0
    if row.get('email') and validate_email(row['email']): c += 33
    if row.get('phone') and validate_phone(row['phone']): c += 33
    if row.get('linkedin_url'): c += 34
    return min(100, int(c))


### 🔹 Apply Lead Score & Category
Categorize leads into **Hot (>=70), Warm (40–69), Cold (<40)**


In [5]:
results = df.apply(lambda row: calculate_lead_score(row), axis=1)
df['lead_score'] = results.apply(lambda x: x[0])
df['factors'] = results.apply(lambda x: x[1])
df['confidence'] = df.apply(calculate_confidence, axis=1)
df['category'] = df['lead_score'].apply(lambda s: 'Hot' if s>=70 else 'Warm' if s>=40 else 'Cold')
df['email_valid'] = df['email'].apply(lambda x: '✅' if validate_email(x) else '❌')
df['phone_valid'] = df['phone'].apply(lambda x: '✅' if validate_phone(x) else '❌')

df.head(5)


Unnamed: 0,company_name,contact_name,title,email,phone,linkedin_url,company_size,estimated_revenue,recent_funding,location,lead_score,factors,confidence,category,email_valid,phone_valid
0,Company 1,Person 1,COO,lead1@company1.com,,linkedin.com/in/person1,25,1848845.0,False,Seattle,48.0,"[(Company Size, 10.0), (Revenue, 10.0), (Data ...",67,Warm,✅,❌
1,Company 2,Person 2,VP Sales,,+1 (483) 670-9467,,25,2081652.0,False,Boston,35.0,"[(Company Size, 10.0), (Revenue, 10.0), (Data ...",0,Cold,❌,❌
2,Company 3,Person 3,VP Sales,lead3@company3.com,+1 (731) 713-9873,linkedin.com/in/person3,100,11892660.0,True,New York,73.0,"[(Company Size, 15.0), (Revenue, 15.0), (Data ...",67,Hot,✅,❌
3,Company 4,Person 4,CTO,lead4@company4.com,+1 (414) 322-5999,,1000,93659450.0,False,New York,78.0,"[(Company Size, 25.0), (Revenue, 25.0), (Data ...",33,Hot,✅,❌
4,Company 5,Person 5,CEO,lead5@company5.com,+1 (427) 825-3155,linkedin.com/in/person5,500,52971240.0,False,Austin,78.0,"[(Company Size, 20.0), (Revenue, 25.0), (Data ...",67,Hot,✅,❌


### 🔹 Lead Summary
Count of Hot, Warm, and Cold leads


In [6]:
hot_count = len(df[df['category']=='Hot'])
warm_count = len(df[df['category']=='Warm'])
cold_count = len(df[df['category']=='Cold'])

print(f"🔥 Hot: {hot_count} | 🌤 Warm: {warm_count} | ❄ Cold: {cold_count}")


🔥 Hot: 13 | 🌤 Warm: 34 | ❄ Cold: 3


### 🔹 Top 10 Leads with Details


In [7]:
top_leads = df.sort_values('lead_score', ascending=False).head(10)
top_leads[['company_name','contact_name','title','email','email_valid','phone','phone_valid','lead_score','confidence','category','location']]


Unnamed: 0,company_name,contact_name,title,email,email_valid,phone,phone_valid,lead_score,confidence,category,location
49,Company 50,Person 50,COO,lead50@company50.com,✅,+1 (248) 741-1974,❌,88.0,67,Hot,Denver
14,Company 15,Person 15,CTO,lead15@company15.com,✅,+1 (773) 278-8630,❌,83.0,67,Hot,Boston
5,Company 6,Person 6,CEO,lead6@company6.com,✅,+1 (976) 686-5438,❌,78.0,67,Hot,New York
4,Company 5,Person 5,CEO,lead5@company5.com,✅,+1 (427) 825-3155,❌,78.0,67,Hot,Austin
18,Company 19,Person 19,COO,lead19@company19.com,✅,,❌,78.0,67,Hot,San Francisco
3,Company 4,Person 4,CTO,lead4@company4.com,✅,+1 (414) 322-5999,❌,78.0,33,Hot,New York
40,Company 41,Person 41,CEO,lead41@company41.com,✅,+1 (877) 685-4075,❌,78.0,67,Hot,Denver
30,Company 31,Person 31,Marketing Manager,lead31@company31.com,✅,+1 (858) 277-4093,❌,76.0,67,Hot,Boston
10,Company 11,Person 11,Marketing Manager,lead11@company11.com,✅,,❌,76.0,67,Hot,Austin
17,Company 18,Person 18,COO,lead18@company18.com,✅,,❌,73.0,67,Hot,New York


In [8]:
# Example: First lead
lead = top_leads.iloc[0]
print(f"Lead: {lead['company_name']} ({lead['category']})")
for factor, pts in lead['factors']:
    print(f"{factor}: +{pts} pts")


Lead: Company 50 (Hot)
Company Size: +20.0 pts
Revenue: +25.0 pts
Data Completeness: +13.0 pts
Engagement: +15.0 pts
Title: +15.0 pts
