In [1]:
# AI Sales Agent - Google Colab Notebook

# **Step 1: Install Required Libraries**
!pip install predictionguard langchain langchain_community pandas python-dotenv cryptography




In [2]:
# **Step 2: Import Required Libraries**
import os
import json
import pandas as pd
from getpass import getpass
from dotenv import load_dotenv
from cryptography.fernet import Fernet
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import PredictionGuard
import re

In [3]:
# **Step 3: Secure API Key Input**
load_dotenv()


False

In [4]:
pg_access_token = getpass('Enter your Prediction Guard access token: ')
os.environ['PREDICTIONGUARD_TOKEN'] = pg_access_token

Enter your Prediction Guard access token: ··········


In [5]:
# **Step 4: Initialize Prediction Guard Model**
llm = PredictionGuard(
    model="Hermes-3-Llama-3.1-70B",
    predictionguard_api_key=os.environ['PREDICTIONGUARD_TOKEN'],
    temperature=0.75,
    max_tokens=100,
    stop=["000"]
)

  llm = PredictionGuard(


In [6]:
# Load Meeting Transcriptions
with open('meeting_transcriptions.txt', 'r', encoding='utf-8') as file:
    raw_transcriptions = file.read().strip().split('\n\n')

In [7]:
# Load Prediction Guard Offerings from JSON
with open('pg_offerings.json', 'r', encoding='utf-8') as file:
    pg_offerings = json.load(file)

In [8]:
# Extract the key offerings in a structured format
pg_summary = f"""
**Prediction Guard Offerings:**
- **Secure AI Deployment**: {pg_offerings["Prediction Guard Offerings"]["Key Offerings & Benefits"]["1. Secure & Reliable AI Deployment"]}
- **Optimized Performance**: {pg_offerings["Prediction Guard Offerings"]["Key Offerings & Benefits"]["2. Optimized Performance with Intel Gaudi 2 Processors"]}
- **Industry Use Cases**: {pg_offerings["Prediction Guard Offerings"]["Key Offerings & Benefits"]["3. Industry-Specific AI Use Cases"]}
- **Scalability & Cost Planning**: {pg_offerings["Prediction Guard Offerings"]["Key Offerings & Benefits"]["4. AI Scalability & Cost Planning"]}
- **Developer-Friendly AI**: {pg_offerings["Prediction Guard Offerings"]["Key Offerings & Benefits"]["5. Developer-Friendly AI Integration"]}
"""

In [9]:
# **Step 6: Encrypt and Decrypt Meeting Notes**
def generate_encryption_key():
    return Fernet.generate_key()

def encrypt_data(data, key):
    cipher = Fernet(key)
    encrypted_data = cipher.encrypt(data.encode('utf-8'))
    return encrypted_data

def decrypt_data(encrypted_data, key):
    cipher = Fernet(key)
    decrypted_data = cipher.decrypt(encrypted_data).decode('utf-8')
    return decrypted_data

# Generate Encryption Key
enc_key = generate_encryption_key()

# Extract meeting details and encrypt transcriptions
meeting_data = []
encrypted_transcriptions = []

for transcription in raw_transcriptions:
    # Improve regex to handle variations in date and time format
    date_match = re.search(r'Date:\s*([\d-]+)', transcription, re.IGNORECASE)
    time_match = re.search(r'Time:\s*([\d:APM\s]+)', transcription, re.IGNORECASE)

    date = date_match.group(1) if date_match else "Unknown"
    time = time_match.group(1) if time_match else "Unknown"

    # Remove extracted date/time before processing transcription
    cleaned_transcription = re.sub(r'Date:\s*[\d-]+\nTime:\s*[\d:APM\s]+', '', transcription).strip()

    # Encrypt transcription
    encrypted_data = encrypt_data(cleaned_transcription, enc_key)
    encrypted_transcriptions.append(encrypted_data)

    meeting_data.append({"Date": date, "Time": time, "Transcription": cleaned_transcription})

# Decrypt Transcriptions for Processing
decrypted_transcriptions = [
    {"Date": item["Date"], "Time": item["Time"], "Transcription": decrypt_data(et, enc_key)}
    for item, et in zip(meeting_data, encrypted_transcriptions)
]
print("Meeting Transcriptions Successfully Decrypted!")

Meeting Transcriptions Successfully Decrypted!


In [10]:
# **Step 7: Meeting Summary and Lead Scoring Using AI and Keyword Analysis**
summary_prompt_template = PromptTemplate(
    input_variables=["transcription"],
    template="""
    Extract only the key discussion points from the provided meeting transcription.
    - **Do NOT add extra details, names, or assumptions.**
    - **Return exactly 3 bullet points summarizing only what was discussed.**
    - **Do not generate additional topics or interpretations.**
    - **Each bullet point should be concise.**
    - **Don't keep the transcription in the summary.**

    **Meeting Transcription:**
    {transcription}
    """
)

summary_chain = LLMChain(llm=llm, prompt=summary_prompt_template)

def summarize_meeting(discussion):
    """Generate AI-driven summary of meeting discussion points."""
    try:
        response = summary_chain.run({"discussion": "\n".join(discussion)})
        return response.strip() if response else "Summarization failed."
    except Exception as e:
        return f"Error: {str(e)}"

lead_score_prompt_template = PromptTemplate(
    input_variables=["transcription", "offerings"],
    template="""
    Analyze the provided meeting transcription **without making assumptions**.
    - **Assign a lead score from 0 to 100.**
    - **Justify the score ONLY based on what was explicitly stated.**
    - **If the transcription lacks urgency, do not assume it.**
    - **Keep responses concise and avoid any extra details.**

    **Meeting Transcription:**
    {transcription}

    **Prediction Guard Offerings (Relevant Data for Comparison):**
    {offerings}
    """
)

lead_score_chain = LLMChain(llm=llm, prompt=lead_score_prompt_template)

# Define Functions
def summarize_transcription(transcription):
    """Generate AI-driven summary of meeting transcription."""
    try:
        response = summary_chain.run({"transcription": transcription})
        return response.strip() if response else "Summarization failed."
    except Exception as e:
        return f"Error: {str(e)}"

def assess_lead_quality(transcription, offerings):
    """Use LLM to assess the lead quality based on meeting transcription and PG offerings."""
    try:
        response = lead_score_chain.run({"transcription": transcription, "offerings": offerings})
        return response.strip() if response else "Scoring failed."
    except Exception as e:
        return f"Error: {str(e)}"

def keyword_based_scoring(assessment):
    """Improved keyword-based lead scoring with dynamic scaling."""

    # **Hot Lead (90+): Strong buying signals, immediate interest, decision-making authority involved**
    hot_keywords = [
        "immediate interest", "high priority", "pilot deployment", "urgent requirement",
        "ready to integrate", "aligns perfectly", "strong fit", "need a solution now",
        "deploying soon", "proceeding with implementation", "finalizing decision",
        "implementation roadmap", "procurement stage", "moving forward", "approved for rollout"
    ]

    # **Warm Lead (70-89): Interest exists but requires follow-ups, internal approvals, or budget discussion**
    warm_keywords = [
        "follow-up needed", "budget discussion", "considering options", "evaluating",
        "will discuss internally", "need more details", "potential use case", "pending approval",
        "reviewing internally", "exploring possibilities", "seeking clarification",
        "next steps discussion", "awaiting management approval", "assessing feasibility",
        "needs internal review", "requires IT/security sign-off", "not yet finalized"
    ]

    # **Cold Lead (<70): Low priority, vague interest, or long-term considerations**
    cold_keywords = [
        "no immediate plans", "not a priority", "just exploring", "not ready yet",
        "considering in the future", "long-term evaluation", "low urgency",
        "passive interest", "general inquiry", "no budget available",
        "no defined timeline", "waiting for funding", "not a business priority",
        "evaluating competitors", "need more convincing", "informational discussion"
    ]

    # Convert assessment to lowercase for case-insensitive matching
    assessment_lower = assessment.lower()

    # **Dynamic Scoring System**
    score = 50  # Start at a neutral score

    # Count keyword occurrences to refine scoring
    hot_count = sum(1 for keyword in hot_keywords if keyword in assessment_lower)
    warm_count = sum(1 for keyword in warm_keywords if keyword in assessment_lower)
    cold_count = sum(1 for keyword in cold_keywords if keyword in assessment_lower)

    # **Adjust score based on keyword strength**
    if hot_count > 0:
        score += hot_count * 10  # Boost score for hot keywords

    if warm_count > 0:
        score += warm_count * 5  # Moderate boost for warm keywords

    if cold_count > 0:
        score -= cold_count * 8  # Penalize for cold keywords

    # **Ensure score stays within valid range**
    score = min(100, max(30, score))  # Prevent going below 30 or above 100

    # **Final Categorization**
    if score >= 90:
        return f"Hot Lead (Score: {score})"
    elif score >= 70:
        return f"Warm Lead (Score: {score})"
    else:
        return f"Cold Lead (Score: {score})"



  summary_chain = LLMChain(llm=llm, prompt=summary_prompt_template)


In [11]:
# **Step 8: Process Meeting Notes**
processed_data = []

for transcription_data in decrypted_transcriptions:
    meeting_date = transcription_data["Date"]
    meeting_time = transcription_data["Time"]
    transcription_text = transcription_data["Transcription"]

    summary = summarize_transcription(transcription_text)
    lead_quality_assessment = assess_lead_quality(transcription_text, pg_summary)
    keyword_score = keyword_based_scoring(lead_quality_assessment)

    processed_data.append({
        "Date": meeting_date,
        "Time": meeting_time,
        "Summary": summary,
        "Lead Quality Assessment": lead_quality_assessment,
        "Keyword-Based Score": keyword_score
    })

  response = summary_chain.run({"transcription": transcription})


In [12]:
# **Step 9: Convert to DataFrame and Save**
df = pd.DataFrame(processed_data)
df.to_csv("lead_scoring_results.csv", index=False)

print("Lead Scoring Completed! Here are the results:")
df.head()


Lead Scoring Completed! Here are the results:


Unnamed: 0,Date,Time,Summary,Lead Quality Assessment,Keyword-Based Score
0,2025-02-10,10:00 AM,00:00\n - \n - \n - \n - \n - \...,**Lead Score: 0**\n \n **Justification:*...,Cold Lead (Score: 50)
1,Unknown,Unknown,University IT Director: Absolutely. We're usin...,**Lead Score: 20**\n \n **Justificatio...,Cold Lead (Score: 50)
2,Unknown,Unknown,Consultant: I understand your concerns. Data p...,**Lead Score: 70**\n \n **Justificatio...,Cold Lead (Score: 50)
3,Unknown,Unknown,University IT Director: I appreciate that. Dat...,**Lead Score: 70**\n \n **Justificatio...,Cold Lead (Score: 50)
4,Unknown,Unknown,AI Engineer: I understand. We can explore alte...,**Lead Score: 40**\n \n **Justificatio...,Cold Lead (Score: 50)
