<a href="https://colab.research.google.com/github/marcmontb/email-automation/blob/main/3_0_Email_automation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# microsoft_graph.py

In [None]:
# microsoft_graph.py
import msal
import requests
import webbrowser
from typing import Dict, List, Optional

class MicrosoftGraphClient:
    def __init__(self, client_id: str, tenant_id: str):
        self.client_id = client_id
        self.tenant_id = tenant_id
        self.scopes = ['Mail.Read']

    def authenticate(self) -> str:
        """Get Microsoft Graph access token"""
        try:
            app = msal.PublicClientApplication(
                self.client_id,
                authority=f"https://login.microsoftonline.com/{self.tenant_id}"
            )

            accounts = app.get_accounts()
            result = None
            if accounts:
                result = app.acquire_token_silent(self.scopes, account=accounts[0])

            if not result:
                flow = app.initiate_device_flow(scopes=self.scopes)
                if "user_code" not in flow:
                    raise ValueError("Failed to create device flow")

                print("\nPlease follow these steps to authenticate:")
                print("1. The authentication page will open in your default browser")
                print("2. Enter the following code when prompted:", flow.get('user_code'))
                print("3. Sign in with your Microsoft account")

                try:
                    webbrowser.open(flow['verification_uri'])
                except Exception:
                    print(f"\nPlease manually visit: {flow['verification_uri']}")
                    print(f"And enter the code: {flow['user_code']}")

                result = app.acquire_token_by_device_flow(flow)

            if "access_token" not in result:
                raise ValueError(f"Authentication failed: {result.get('error_description', 'Unknown error')}")

            return result['access_token']

        except Exception as e:
            print(f"\nAuthentication error: {str(e)}")
            raise

    def get_recent_emails(self, access_token: str, limit: int = 50) -> List[Dict]:
        """Fetch recent emails from Microsoft Graph"""
        headers = {
            'Authorization': f'Bearer {access_token}',
            'Content-Type': 'application/json'
        }

        url = 'https://graph.microsoft.com/v1.0/me/messages'
        params = {
            '$top': limit,
            '$select': 'subject,body,receivedDateTime,from,attachments',
            '$orderby': 'receivedDateTime desc'
        }

        try:
            print("\nFetching recent emails...")
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()

            emails = response.json().get('value', [])
            print(f"Found {len(emails)} recent emails")
            return emails

        except requests.exceptions.HTTPError as e:
            print(f"HTTP Error: {str(e)}")
            print("Response content:", e.response.content.decode())
            raise
        except Exception as e:
            print(f"Error retrieving emails: {str(e)}")
            raise



# mistral_analysis.py

In [None]:
# mistral_analysis.py
from mistralai import Mistral, UserMessage, SystemMessage
import re
from typing import Dict, List

class EmailAnalyzer:
    def __init__(self, api_key: str):
        self.mistral_client = Mistral(api_key=api_key)

        self.keywords = [
            r"ronda de (inversión|inversion|financiación|financiacion)",
            r"investment round",
            r"funding round",
            r"serie [ABC]",
            r"series [ABC]",
            r"deck",
            r"pitch",
            r"captación de capital",
            r"levant[ea][rn]? capital",
            r"term sheet",
            r"investor update",
            r"data room",
            r"SAFE",
            r"cap table",
            r"valuation",
            r"raise",
            r"investment"
        ]

        self.sharing_platforms = [
            'dropbox.com',
            'drive.google.com',
            'docsend.com',
            'notion.so',
            'box.com'
        ]

    def check_confidence_signals(self, subject: str, body: str, attachments: List[Dict] = None) -> float:
        """Calculate additional confidence based on keywords and attachments"""
        confidence_boost = 0.0
        text = f"{subject} {body}".lower()

        keyword_matches = sum(1 for pattern in self.keywords
                            if re.search(pattern, text, re.IGNORECASE))
        confidence_boost += min(keyword_matches * 0.05, 0.15)

        if attachments:
            extensions = ['.pdf', '.ppt', '.pptx', '.key']
            attachment_matches = sum(1 for att in attachments
                                  for ext in extensions
                                  if att.get('name', '').lower().endswith(ext))
            confidence_boost += min(attachment_matches * 0.05, 0.10)

        platform_matches = sum(1 for platform in self.sharing_platforms
                             if platform in body)
        confidence_boost += min(platform_matches * 0.025, 0.05)

        return confidence_boost

    def analyze_email(self, subject: str, body: str, attachments: List[Dict] = None) -> Dict:
        """Analyze email using LLM and traditional signals"""
        prompt = f"""
        Analyze this email and determine if it's related to an investment round or fundraising opportunity.
        Consider both direct and indirect mentions of:
        - Funding rounds
        - Pitch decks
        - Investment opportunities
        - Capital raising
        - Term sheets
        - Data rooms

        Pay special attention to subtle indicators, such as:
        - Discussion of company valuation
        - References to investor meetings
        - Mentions of cap tables or equity
        - Forward-looking financial projections
        - Business metrics or KPIs in a presentation context
        - References to SAFE notes or other investment instruments
        - Mentions of deal terms or investment conditions

        Subject: {subject}
        Body: {body}

        Respond in JSON format:
        {{
            "is_investment_round": true/false,
            "confidence": 0-1,
            "reasoning": "brief explanation",
            "detected_signals": ["signal1", "signal2", ...]
        }}
        """

        try:
            messages = [
                {
                    "role": "system",
                    "content": "You are an expert at analyzing emails related to investments and fundraising."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ]

            response = self.mistral_client.chat.complete(
                model="mistral-medium",
                messages=messages
            )

            result = json.loads(response.choices[0].message.content)

            if result['is_investment_round'] or result['confidence'] > 0.3:
                confidence_boost = self.check_confidence_signals(subject, body, attachments)
                result['confidence'] = min(result['confidence'] + confidence_boost, 1.0)
                if confidence_boost > 0:
                    result['reasoning'] += f" (Confidence boosted by {confidence_boost:.2f} due to additional signals)"

            return result
        except Exception as e:
            return {
                "is_investment_round": False,
                "confidence": 0,
                "reasoning": f"Error during analysis: {str(e)}",
                "detected_signals": []
            }

    def summarize_investment_email(self, subject: str, body: str) -> str:
        """Generate a detailed summary of an investment-related email"""
        prompt = f"""
        Summarize this investment-related email, focusing on key information such as:
        - Company name and sector
        - Round type (Seed, Series A, etc.) if mentioned
        - Investment amount sought or valuation if mentioned
        - Key metrics or KPIs
        - Investment thesis or highlights
        - Timeline or urgency indicators
        - Next steps or requirements

        Subject: {subject}
        Body: {body}
        """

        try:
            messages = [
                {
                    "role": "system",
                    "content": "You are an expert at analyzing and summarizing investment opportunities."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ]

            response = self.mistral_client.chat.complete(
                model="mistral-medium",
                messages=messages
            )
            return response.choices[0].message.content
        except Exception as e:
            return f"Error generating summary: {str(e)}"



# airtable_storage.py


In [None]:
# airtable_storage.py
import requests
from datetime import datetime
from typing import Dict

class AirtableClient:
    def __init__(self, token: str, base_id: str, table_id: str):
        self.token = token
        self.base_id = base_id
        self.table_id = table_id
        self.base_url = f"https://api.airtable.com/v0/{base_id}/{table_id}"

    def format_date(self, iso_date: str) -> str:
        """Convert ISO datetime to Airtable-friendly format"""
        try:
            dt = datetime.strptime(iso_date, "%Y-%m-%dT%H:%M:%SZ")
            return dt.strftime("%Y-%m-%d")
        except Exception:
            return datetime.now().strftime("%Y-%m-%d")

    def store_email(self, email_data: Dict) -> None:
        """Store processed email data in Airtable"""
        headers = {
            "Authorization": f"Bearer {self.token}",
            "Content-Type": "application/json"
        }

        received_date = self.format_date(email_data['received'])

        create_data = {
            "fields": {
                "Subject": email_data['subject'],
                "Sender": email_data['sender'],
                "Received Date": received_date,
                "Confidence Score": email_data['confidence'],
                "Analysis": email_data['reasoning'],
                "Summary": email_data['summary'],
                "Has Attachments": email_data['has_attachments'],
                "Detected Signals": ", ".join(email_data['detected_signals']),
                "Original Email": email_data['body']
            }
        }

        try:
            response = requests.post(self.base_url, json=create_data, headers=headers)
            response.raise_for_status()
            print(f"Successfully stored email: {email_data['subject']}")
        except requests.exceptions.HTTPError as e:
            print(f"HTTP Error: {e.response.status_code}")
            print(f"Response content: {e.response.content.decode()}")
            if e.response.status_code == 403:
                print("Error: Authentication failed. Please check your Airtable token.")
            elif e.response.status_code == 404:
                print("Error: Table not found. Please check your Base ID and Table ID.")
            elif e.response.status_code == 422:
                print("Error: Unprocessable Entity. Please check your Airtable schema.")
            raise

# main.py

In [None]:
# main.py
import os
from microsoft_graph import MicrosoftGraphClient
from mistral_analysis import EmailAnalyzer
from airtable_storage import AirtableClient

# Configuration
MS_CLIENT_ID = os.getenv("MS_CLIENT_ID")
MS_TENANT_ID = os.getenv("MS_TENANT_ID")
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
AIRTABLE_TOKEN = os.getenv("AIRTABLE_TOKEN")
AIRTABLE_BASE_ID = "app2FvqL0obm9iYWA"
AIRTABLE_TABLE_ID = "tbleItNQrGrzv2FSE"

def process_emails(graph_client: MicrosoftGraphClient, analyzer: EmailAnalyzer,
                  airtable_client: AirtableClient, access_token: str) -> None:
    """Process emails using all three services"""
    try:
        # Get recent emails
        emails = graph_client.get_recent_emails(access_token)
        processed_count = 0

        for email in emails:
            subject = email.get('subject', '')
            body = email.get('body', {}).get('content', '')
            attachments = email.get('attachments', [])
            sender = email.get('from', {}).get('emailAddress', {}).get('address', '')

            # Analyze email
            analysis = analyzer.analyze_email(subject, body, attachments)

            # Process emails with moderate to high confidence
            if analysis['is_investment_round'] or analysis['confidence'] >= 0.5:
                summary = analyzer.summarize_investment_email(subject, body)

                email_data = {
                    'subject': subject,
                    'sender': sender,
                    'received': email.get('receivedDateTime'),
                    'confidence': analysis['confidence'],
                    'reasoning': analysis['reasoning'],
                    'summary': summary,
                    'has_attachments': bool(attachments),
                    'detected_signals': analysis.get('detected_signals', []),
                    'body': body
                }

                # Store in Airtable
                airtable_client.store_email(email_data)
                processed_count += 1

        print(f"\nProcessing complete. Stored {processed_count} investment-related emails in Airtable.")

    except Exception as e:
        print(f"Error processing emails: {str(e)}")

def main():
    try:
        print("Starting investment email processing pipeline...")

        # Initialize clients
        graph_client = MicrosoftGraphClient(MS_CLIENT_ID, MS_TENANT_ID)
        analyzer = EmailAnalyzer(MISTRAL_API_KEY)
        airtable_client = AirtableClient(AIRTABLE_TOKEN, AIRTABLE_BASE_ID, AIRTABLE_TABLE_ID)

        # Authenticate with Microsoft Graph
        print("\nAuthenticating with Microsoft Graph...")
        access_token = graph_client.authenticate()
        print("Successfully authenticated!")

        # Process emails using all services
        process_emails(graph_client, analyzer, airtable_client, access_token)

    except Exception as e:
        print(f"\nAn error occurred: {str(e)}")
        print("\nPlease check your configuration and try again.")

if __name__ == "__main__":
    main()