In [1]:

!pip install PyPDF2 google-generativeai twilio tweepy langchain-google-genai -q


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [22]:
# ==============================================================================
# AI-Powered Content Pipeline
#
# This script performs the following actions:
# 1. Installs all necessary Python packages.
# 2. Reads text from a .txt or .pdf file.
# 3. Uses Google's Gemini LLM to summarize the text.
# 4. Stores the original content and summary in a SQLite database.
# 5. Sends the summary via Email, SMS, WhatsApp, and Twitter.
#
# SETUP:
# 1. Add your API keys and credentials to the Colab Secrets Manager (🔑).
#    Required secrets: GOOGLE_API_KEY, FROM_EMAIL, EMAIL_APP_PASSWORD,
#    TO_EMAIL, TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN, TWILIO_SMS_NUMBER,
#    TWILIO_WHATSAPP_NUMBER, RECIPIENT_PHONE_NUMBER, TWITTER_API_KEY,
#    TWITTER_API_SECRET, TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_TOKEN_SECRET
# 2. (Optional) Upload a file and update the FILE_TO_PROCESS variable below.
# ==============================================================================

import os
import sys
import sqlite3
import smtplib
from datetime import datetime
from getpass import getpass


import PyPDF2
import google.generativeai as genai
import tweepy
from twilio.rest import Client


from google.colab import userdata

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain


def read_txt(file_path: str) -> str:
    """Reads and returns the content of a text file."""
    with open(file_path, 'r', encoding='utf-8') as f:
        return f.read()

def read_pdf(file_path: str) -> str:
    """Extracts and returns text content from a PDF file."""
    text = ""
    try:
        with open(file_path, 'rb') as f:
            reader = PyPDF2.PdfReader(f)
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text
    except Exception as e:
        print(f"Error reading PDF file: {e}")
        return ""
    return text

def read_document(file_path: str) -> str:
    """Reads a document based on its file extension."""
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file '{file_path}' was not found.")
    if file_path.endswith('.pdf'):
        return read_pdf(file_path)
    elif file_path.endswith('.txt'):
        return read_txt(file_path)
    else:
        raise ValueError(f"Unsupported file type: {file_path}")

# --- Summarizer ---
# --- Summarizer (using Langchain) ---
def summarize_text(text: str, api_key: str) -> str:
    """Generates a summary for the given text using Langchain's Google Gemini Pro integration."""
    if not text:
        return "Error: Input text is empty."

    try:
        # Initialize the Langchain Google Gemini model
        llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key=api_key)

        # Define the prompt template
        prompt_template = PromptTemplate(
            input_variables=["text"],
            template="You are an expert summarizer. Provide a concise, easy-to-read summary of the following text.\nThe summary should capture the main arguments and conclusions.\nPlease provide the summary in about 3-4 clear sentences.\nText to summarize:\n---\n{text}\n---"
        )

        # Create an LLMChain
        chain = LLMChain(llm=llm, prompt=prompt_template)

        # Run the chain to generate the summary
        summary = chain.run(text)
        return summary
    except Exception as e:
        print(f"An error occurred with the Langchain/Gemini API: {e}")
        return "Error: Could not generate summary due to an API issue."

# --- Database ---
DB_NAME = 'content_summaries.db'
def setup_database():
    """Creates the database and the messages table if they don't exist."""
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS messages (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            timestamp DATETIME NOT NULL,
            original_content TEXT NOT NULL,
            summary TEXT NOT NULL
        )
    ''')
    conn.commit()
    conn.close()

def store_message(original_content: str, summary: str):
    """Stores the original content and its summary in the database."""
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    timestamp = datetime.now()
    cursor.execute(
        "INSERT INTO messages (timestamp, original_content, summary) VALUES (?, ?, ?)",
        (timestamp, original_content, summary)
    )
    conn.commit()
    conn.close()
    print("Summary successfully stored in the database.")

# --- Notifications ---
def send_email(subject: str, body: str, to_email: str, from_email: str, password: str):
    if not all([to_email, from_email, password]):
        print("Skipping email: configuration is missing.")
        return
    message = f"Subject: {subject}\n\n{body}"
    try:
        with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
            server.login(from_email, password)
            server.sendmail(from_email, to_email, message.encode('utf-8'))
        print("Email sent successfully.")
    except Exception as e:
        print(f"Failed to send email: {e}")

def send_sms(body: str, to_number: str, from_number: str, account_sid: str, auth_token: str):
    if not all([to_number, from_number, account_sid, auth_token]):
        print("Skipping SMS: Twilio configuration is missing.")
        return
    try:
        client = Client(account_sid, auth_token)
        message = client.messages.create(body=body, from_=from_number, to=to_number)
        print(f"SMS sent successfully. SID: {message.sid}")
    except Exception as e:
        print(f"Failed to send SMS: {e}")

def send_whatsapp(body: str, to_number: str, from_number: str, account_sid: str, auth_token: str):
    if not all([to_number, from_number, account_sid, auth_token]):
        print("Skipping WhatsApp: Twilio configuration is missing.")
        return
    try:
        client = Client(account_sid, auth_token)
        message = client.messages.create(
            body=body, from_=f'whatsapp:{from_number}', to=f'whatsapp:{to_number}')
        print(f"WhatsApp message sent successfully. SID: {message.sid}")
    except Exception as e:
        print(f"Failed to send WhatsApp message: {e}")

def post_tweet(text: str, api_key: str, api_secret: str, access_token: str, access_token_secret: str):
    if not all([api_key, api_secret, access_token, access_token_secret]):
        print("Skipping Tweet: Twitter configuration is missing.")
        return
    try:
        client = tweepy.Client(
            consumer_key=api_key, consumer_secret=api_secret,
            access_token=access_token, access_token_secret=access_token_secret
        )
        response = client.create_tweet(text=text[:280])
        print(f"Tweet posted successfully: ID {response.data['id']}")
    except Exception as e:
        print(f"Failed to post tweet: {e}")


#Main Pipeline Orchestration ---


def run_pipeline(file_path: str):
    """Executes the full content processing and notification pipeline."""
    # --- Configuration from Colab Secrets ---
    print("Loading credentials from Colab Secrets...")
    GEMINI_API_KEY = userdata.get("GOOGLE_API_KEY")

    EMAIL_CONFIG = {
        "from_email": userdata.get("FROM_EMAIL"),
        "password": userdata.get("EMAIL_APP_PASSWORD"),
        "to_email": userdata.get("TO_EMAIL")
    }
    TWILIO_CONFIG = {
        "account_sid": userdata.get("TWILIO_ACCOUNT_SID"),
        "auth_token": userdata.get("TWILIO_AUTH_TOKEN"),
        "from_sms": userdata.get("TWILIO_SMS_NUMBER"),
        "from_whatsapp": userdata.get("TWILIO_WHATSAPP_NUMBER"),
        "to_number": userdata.get("RECIPIENT_PHONE_NUMBER")
    }
    TWITTER_CONFIG = {
        "api_key": userdata.get("TWITTER_API_KEY"),
        "api_secret": userdata.get("TWITTER_API_SECRET"),
        "access_token": userdata.get("TWITTER_ACCESS_TOKEN"),
        "access_token_secret": userdata.get("TWITTER_ACCESS_TOKEN_SECRET")
    }
    # --- End Configuration ---

    if not GEMINI_API_KEY:
        print("\nFATAL ERROR: GOOGLE_API_KEY secret not found. Please set it up.")
        return

    # 1. Setup Database
    setup_database()

    # 2. Read and Summarize
    print(f"\nProcessing file: {file_path}")
    try:
        content = read_document(file_path)
    except (ValueError, FileNotFoundError) as e:
        print(f"Error reading file: {e}")
        return

    if not content.strip():
        print("Could not extract any text from the document. Exiting.")
        return

    print("\nGenerating summary with Gemini... (This may take a moment)")
    summary = summarize_text(content, GEMINI_API_KEY)

    print("\n--- AI-Generated Summary (Gemini) ---")
    print(summary)
    print("-------------------------------------\n")

    # 3. Store and Notify
    if "Error:" not in summary:
        store_message(content, summary)
        print("\nStarting notification process...")
        send_email("Document Summary", summary, **EMAIL_CONFIG)
        send_sms(summary,
                 to_number=TWILIO_CONFIG["to_number"],
                 from_number=TWILIO_CONFIG["from_sms"],
                 account_sid=TWILIO_CONFIG["account_sid"],
                 auth_token=TWILIO_CONFIG["auth_token"])
        send_whatsapp(summary,
                       to_number=TWILIO_CONFIG["to_number"],
                       from_number=TWILIO_CONFIG["from_whatsapp"],
                       account_sid=TWILIO_CONFIG["account_sid"],
                       auth_token=TWILIO_CONFIG["auth_token"])
        post_tweet(summary, **TWITTER_CONFIG)
    else:
        print("Skipping database storage and notifications due to summarization error.")

    print("\nPipeline finished.")



#  Execution ---
# Get file path from user input
while True:
    FILE_TO_PROCESS = input("Please enter the path to the file you want to process: or type exit ")
    if FILE_TO_PROCESS.lower() == 'exit':
        print("Exiting the program.")
        break
    if not os.path.exists(FILE_TO_PROCESS):
        print(f"Error: The file '{FILE_TO_PROCESS}' does not exist. Please try again.")

    else:
        # Run the entire process
        run_pipeline(FILE_TO_PROCESS)

Please enter the path to the file you want to process: or exit from application exit
Exiting the program.
