In [None]:
pip install openai

### ipynb file for testing
Using venv 

In [None]:
## Importing the necessary libraries
import boto3 #SDK for AWS
import json 
import os 
from botocore.exceptions import ClientError
import requests
import pandas as pd
import re
import logging 
from typing import List, Dict, Any, Tuple
from tqdm import tqdm
import jsonschema

## First let's define the chapter info 
CHAPTER_INFO = {
    "chapters" : [
        {"number": 1, "name": "Arjuna Visada Yoga", "total_shlokas": 47},
        {"number": 2, "name": "Sankhya Yoga", "total_shlokas": 72},
        {"number": 3, "name": "Karma Yoga", "total_shlokas": 43},
        {"number": 4, "name": "Jnana Yoga", "total_shlokas": 42},
        {"number": 5, "name": "Karma Sanyasa Yoga", "total_shlokas": 29},
        {"number": 6, "name": "Dhyana Yoga", "total_shlokas": 47},
        {"number": 7, "name": "Jnana Vijnana Yoga", "total_shlokas": 30},
        {"number": 8, "name": "Aksara Brahma Yoga", "total_shlokas": 28},
        {"number": 9, "name": "Raja Vidya Yoga", "total_shlokas": 34},
        {"number": 10, "name": "Vibhuti Yoga", "total_shlokas": 42},
        {"number": 11, "name": "Visvarupa Darsana Yoga", "total_shlokas": 55},
        {"number": 12, "name": "Bhakti Yoga", "total_shlokas": 20},
        {"number": 13, "name": "Ksetra Ksetrajna Vibhaga Yoga", "total_shlokas": 35},
        {"number": 14, "name": "Gunatraya Vibhaga Yoga", "total_shlokas": 27},
        {"number": 15, "name": "Purusottama Yoga", "total_shlokas": 20},
        {"number": 16, "name": "Daivasura Sampad Vibhaga Yoga", "total_shlokas": 24},
        {"number": 17, "name": "Sraddhatraya Vibhaga Yoga", "total_shlokas": 28},
        {"number": 18, "name": "Moksa Sanyasa Yoga", "total_shlokas": 78}
    ]
}

## Defining AWS client configs
class AWSClient:
    def __init__(self, region_name='eu-west-1'):
        self.s3_client = boto3.client('s3', region_name=region_name)
        self.textract_client = boto3.client('textract', region_name=region_name)

    def list_s3_documents(self, bucket_name, prefix):
        try:
            response = self.s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
            if 'Contents' in response:
                return [obj['Key'] for obj in response['Contents'] 
                        if obj['Key'].endswith('.json') and not obj['Key'].endswith('/')]
            return []
        except ClientError as e:
            print(f"Error accessing S3: {e}")
            return []

    def get_object(self, bucket_name, file_key):
        try:
            response = self.s3_client.get_object(Bucket=bucket_name, Key=file_key)
            return response['Body'].read().decode('utf-8')
        except ClientError as e:
            print(f"Error getting object from S3: {e}")
            return None

class ClaudeAPI:
    def __init__(self, api_endpoint):
        self.api_endpoint = api_endpoint ## API endpoint for Claude

        def invoke_claude_model(self, prompt):
            try:
                payload = {
                    "model": "bedrock-2023-05-31", ## Model to use
                    "max_tokens": 20000, ## Max tokens to generate
                    "temperature": 0.5, ## Temperature for the model
                    "messages": [
                        {
                            "role": "user", ## Role of the user
                            "content": prompt ## Prompt to the model
                        }
                    ]
                }

                headers = {
                    "Content-Type": "application/json",
                }
                response = requests.post(self.api_endpoint, json=payload, headers=headers)

                if response.status_code == 200:
                    claude_response = response.json() ## Response from the model

                    if 'content' in claude_response and isinstance(claude_response['content'],list):
                        return claude_response['content'] [0] ['text']
                        elif 'completion' in claude_response:
                            return claude_response['completion']
                        elif 'body' in claude_response:
                            body = json.loads(claude_response['body'])
                            if 'content' in body and  isinstance(body['content'],list):
                                return body['content'][0]['text']
                                elif 'completion' in body:
                                    return body['completion']

                    print(f"Error: {response.text}")
                    return None

                    except Exception as e:
                        print(f"Error in api call: {e}")
                        return None
