In [None]:
pip install openai

### ipynb file for testing
Using venv 

In [None]:
## Importing the necessary libraries
import boto3 #SDK for AWS
import json 
import os 
from botocore.exceptions import ClientError
import requests
import pandas as pd
import re
import logging 
from typing import List, Dict, Any, Tuple
from tqdm import tqdm
import jsonschema

## First let's define the chapter info 
CHAPTER_INFO = {
    "chapters" : [
        {"number": 1, "name": "Arjuna Visada Yoga", "total_shlokas": 47},
        {"number": 2, "name": "Sankhya Yoga", "total_shlokas": 72},
        {"number": 3, "name": "Karma Yoga", "total_shlokas": 43},
        {"number": 4, "name": "Jnana Yoga", "total_shlokas": 42},
        {"number": 5, "name": "Karma Sanyasa Yoga", "total_shlokas": 29},
        {"number": 6, "name": "Dhyana Yoga", "total_shlokas": 47},
        {"number": 7, "name": "Jnana Vijnana Yoga", "total_shlokas": 30},
        {"number": 8, "name": "Aksara Brahma Yoga", "total_shlokas": 28},
        {"number": 9, "name": "Raja Vidya Yoga", "total_shlokas": 34},
        {"number": 10, "name": "Vibhuti Yoga", "total_shlokas": 42},
        {"number": 11, "name": "Visvarupa Darsana Yoga", "total_shlokas": 55},
        {"number": 12, "name": "Bhakti Yoga", "total_shlokas": 20},
        {"number": 13, "name": "Ksetra Ksetrajna Vibhaga Yoga", "total_shlokas": 35},
        {"number": 14, "name": "Gunatraya Vibhaga Yoga", "total_shlokas": 27},
        {"number": 15, "name": "Purusottama Yoga", "total_shlokas": 20},
        {"number": 16, "name": "Daivasura Sampad Vibhaga Yoga", "total_shlokas": 24},
        {"number": 17, "name": "Sraddhatraya Vibhaga Yoga", "total_shlokas": 28},
        {"number": 18, "name": "Moksa Sanyasa Yoga", "total_shlokas": 78}
    ]
}

## Defining AWS client configs
class AWSClient:
    def __init__(self, region_name='eu-west-1'):
        self.s3_client = boto3.client('s3', region_name=region_name)
        self.textract_client = boto3.client('textract', region_name=region_name)

    def list_s3_documents(self, bucket_name, prefix):
        try:
            response = self.s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
            if 'Contents' in response:
                return [obj['Key'] for obj in response['Contents'] 
                        if obj['Key'].endswith('.json') and not obj['Key'].endswith('/')]
            return []
        except ClientError as e:
            print(f"Error accessing S3: {e}")
            return []

    def get_object(self, bucket_name, file_key):
        try:
            response = self.s3_client.get_object(Bucket=bucket_name, Key=file_key)
            return response['Body'].read().decode('utf-8')
        except ClientError as e:
            print(f"Error getting object from S3: {e}")
            return None

class ClaudeAPI:
    def __init__(self, api_endpoint):
        self.api_endpoint = api_endpoint ## API endpoint for Claude

        def invoke_claude_model(self, prompt):
            try:
                payload = {
                    "model": "bedrock-2023-05-31", ## Model to use
                    "max_tokens": 20000, ## Max tokens to generate
                    "temperature": 0.5, ## Temperature for the model
                    "messages": [
                        {
                            "role": "user", ## Role of the user
                            "content": prompt ## Prompt to the model
                        }
                    ]
                }

                headers = {
                    "Content-Type": "application/json",
                }
                response = requests.post(self.api_endpoint, json=payload, headers=headers)

                if response.status_code == 200:
                    claude_response = response.json() ## Response from the model

                    if 'content' in claude_response and isinstance(claude_response['content'],list):
                        return claude_response['content'] [0] ['text']
                        elif 'completion' in claude_response:
                            return claude_response['completion']
                        elif 'body' in claude_response:
                            body = json.loads(claude_response['body'])
                            if 'content' in body and  isinstance(body['content'],list):
                                return body['content'][0]['text']
                                elif 'completion' in body:
                                    return body['completion']

                    print(f"Error: {response.text}")
                    return None

                    except Exception as e:
                        print(f"Error in api call: {e}")
                        return None

with open("bhagvad_gita_meta_data.json", "r") as f:
    chapter_info = json.load(f)

def claude_call(system_content, user_content, temperature=0.1, max_tokens=300):
    prompt = f"System: {system_content}\n\nUser: {user_content}"
    claude_api = ClaudeAPI('https://g5bvf8ca9a.execute-api.eu-west-1.amazonaws.com/prod/invoke')
    response = claude_api.invoke_claude_model(prompt)
    return response.strip() if response else ""

def generate_chapter_summary(chapter_number, chapter_name):
    system_content = f"""You are an expert on the Bhagavad Gita. Provide a comprehensive analysis of Chapter {chapter_number}: {chapter_name} strictly in JSON format with the following structure and no other format:
    {{
        "summary": "Brief summary of the chapter",
        "main_theme": "The overarching theme of the chapter",
        "philosophical_aspects": ["List of key philosophical concepts addressed"],
        "life_problems_addressed": ["List of life problems or questions this chapter helps address"],
        "yoga_type": "The primary type of yoga (if any) discussed in this chapter (e.g., Bhakti Yoga, Karma Yoga, etc.)"
    }}"""  
    user_content = f"Provide a comprehensive analysis of Chapter {chapter_number}: {chapter_name} of the Bhagavad Gita as specified."

    response = claude.call(system_content, user_content, temperature=0.7, max_tokens=500)

    response = response.strip('`')
    if response.startswith('json'):
        response = response[4:].strip() ## 

    try:
        return json.loads(response)
    except json.JSONDecodeError as e :
        print(f"Error parsing JSON for Chapter {chapter_number} summary: {e}")
        print(f"Error parsing JSON for Chapter {chapter_number} summary: {e}")

       # Return a default structure if JSON parsing fails
    return {
        "summary": "Error generating summary",
        "main_theme": "Error generating main theme",
        "philosophical_aspects": ["Error generating philosophical aspects"],
        "life_problems_addressed": ["Error generating life problems addressed"],
        "yoga_type": "Error generating yoga type"
    }

def is_chapter_complete(shloka_count):
    system_content = "You are an expert on the Bhagvad Gita. Determine if the given number of shlokas completes Chapter 1."
    response = claude_call(system_content, user_content, temperature=0.1, max_tokens=10)
    return response.lower() == "yes"

def generate_sholka_details(chapter_number, sholka_text, sholka_number):
    print(f"Generating details for Chapter {chapter_number}, Sholka {sholka_number}...")
    system_content = """You are an expert on the Bhagavad Gita. Provide detailed information about the given verse in a structured JSON format.
    Your response MUST be a valid JSON object strictly with the following keys:
    - transliteration: The Sanskrit verse written in Latin script (as a single line without line breaks).
    - interpretation: A deeper analysis of the verse's significance and implications.
    - meaning: A concise explanation of the verse's meaning without any prefixes or introductions.
    - keywords: An array of key philosophical teachings, themes, or abstract concepts presented in this shloka.
    - life_application: How the teachings of this shloka can be applied to solve real-life problems or questions.
    Do not include any text outside of this JSON structure. Do not use markdown code block syntax or any other formatting."""

    user_content = f"""Analyze the following Bhagvad Gita verse (Chapter {chapter_number}, Sholka {sholka_number}) and provide the details in the specified JSON format: {shloka_text} 
    Remember, your entire response must be a valid JSON object without any additional formatting or text."""

    response = claude_call(system_content, user_content, temperature=0.1, ma_tokens=800)

    try:
        ##Cleaning up the response 
        response = response.strip()
        if response.startswith('```json'): #Remove the code block if it exists
            response = response[7:]
        if response.endswith('```'): #Remove the code block if it exists
            response = response[:-3]

        ##Replace new lines in the response with spaces
        response = re.sub(r'\n\s*', ' ', response)

        #Trying to parse the JSON 
        sholka_details = json.loads(response)

        #Extracting the required fields with default values if not present
        return (
            sholka_details.get('transliteration', ''),
            sholka_details.get('interpretation', ''),
            sholka_details.get('meaning', ''),
            sholka_details.get('keywords', []),
            sholka_details.get('life_application', '')
        )
    
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON for Chapter {chapter_number}, Sholka {sholka_number}: {e}")
        print(f"Raw response: {response}")
         #Return default values if parsing fails

         return (
            "Error parsing transliteration",
            "Error parsing interpretation",
            "Error parsing meaning",
            ["error"],
            "Error occured"
         )
    
def analyze_chapter_relationships(shlokas, chapter_summary):
    print("Analyzing chapter relationships...")
    system_content = """You are an expert on the Bhagavad Gita. Analyze the given shlokas from the chapter and identify overall relationships between characters, themes, and shlokas.
    Return a JSON object with the following structure:
    {
        "characters": [{"name": "Character Name", "description": "Brief description of the character's role in this chapter"}],
        "themes": [{"name": "Theme Name", "description": "Brief description of the theme's significance in this chapter"}],
        "character_relationships": [{"from": "Character A", "to": "Character B", "description": "Description of the relationship"}],
        "theme_relationships": [{"theme": "Theme Name", "shlokas": [shloka numbers], "description": "How the theme manifests in these shlokas"}],
        "key_events": [{"event": "Event description", "shlokas": [shloka numbers], "characters": ["Character names involved"]}],
        "philosophical_progression": "Description of how philosophical concepts develop through the chapter",
        "chapter_relevance": "Explanation of how this chapter fits into the broader context of the Bhagavad Gita"
    }
    Ensure your response is a valid JSON object. Do not include any text outside of the JSON structure."""

    user_content = f""" Analyze the following shlokas from the Bhagvad Gita chapter and provide the relationships as specified.
    Chapter Summary: {json.dumps(chapter_summary)}
    Shlokas:
    """
    for shloka in shlokas:
        user_content += f"Shloka {shloka['shloka_number']}:\n"
        user_content += f"Sanskrit: {shloka['sanskrit_text']}\n"
        user_content += f"Meaning: {shloka['meaning']}\n"
        user_content += f"Interpretation: {shloka['interpretation']}\n"
        user_content += f"Keywords: {', '.join(shloka['keywords'])}\n\n"

    response = claude_call(system_content, user_content, temperature=0.3,, max_tokens=2000)
    print("Raw Claude response:")
    print(response)
    ## To be continued :)




: 