In [None]:
!pip install langchain faiss-cpu transformers datasets gym stable-baselines3 onnx onnxruntime huggingface_hub mlflow tqdm
!pip install farm-haystack

In [None]:
import gym
from gym import spaces
import numpy as np
import torch

class AgenticAIEnv(gym.Env):
    """Custom Environment for AgenticAI System using PPO."""
    
    def __init__(self, dataset, rag_chain, tokenizer, model):
        super(AgenticAIEnv, self).__init__()
        
        self.dataset = dataset
        self.rag_chain = rag_chain
        self.tokenizer = tokenizer
        self.model = model
        self.current_idx = 0
        
        # Define the action space (answer or skip)
        self.action_space = spaces.Discrete(2)  # 0 = Skip, 1 = Answer
        
        # Define the observation space (question text)
        self.observation_space = spaces.Discrete(len(dataset))
        
    def reset(self):
        """Reset the environment to a random state."""
        self.current_idx = np.random.randint(0, len(self.dataset))
        return self.dataset[self.current_idx]['question']
    
    def step(self, action):
        """Take a step in the environment based on the agent's action."""
        question = self.dataset[self.current_idx]['question']
        ideal_answer = self.dataset[self.current_idx]['ideal_answer']
        
        if action == 1:  # Action 1: Answer
            result = self.rag_chain.run(question)  # Get the model's response
            
            # Compute the reward based on semantic similarity
            reward = compute_semantic_similarity(result, ideal_answer, self.tokenizer, self.model)
        else:
            reward = 0  # If the agent skips, no reward
        
        done = self.current_idx == len(self.dataset) - 1  # Episode ends when we've gone through all questions
        info = {}  # Additional information (optional)
        
        # Update the current question index
        self.current_idx += 1
        if self.current_idx >= len(self.dataset):
            self.current_idx = 0  # Loop back to the start
        
        return question, reward, done, info
    
    def render(self, mode='human'):
        """Render the environment (optional, e.g., for visualization)."""
        pass


In [None]:
import mlflow
import mlflow.pytorch
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

def train_agenticai_system(env, total_timesteps=10000):
    """Train the AgenticAI system using PPO and log everything with MLFlow."""
    mlflow.start_run()  # Start MLFlow logging
    
    # Log training parameters
    mlflow.log_param('agent_type', 'PPO')
    mlflow.log_param('total_timesteps', total_timesteps)
    
    # Wrap the environment to make it compatible with PPO
    env = DummyVecEnv([lambda: env])  # PPO requires the environment to be vectorized
    
    # Create PPO agent
    model = PPO('MlpPolicy', env, verbose=1)
    
    # Log model parameters and architecture
    mlflow.log_param('model', 'MlpPolicy')
    
    # Train the model and log progress
    model.learn(total_timesteps=total_timesteps)
    
    mlflow.log_metric('training_steps', total_timesteps)  # Log total training steps
    mlflow.end_run()  # End MLFlow run
    
    return model



# Initialize the custom AgenticAI environment
agenticai_env = AgenticAIEnv(train_mediqa, rag_chain, tokenizer, fine_tuned_model)

# Train the agent with PPO
trained_agent = train_agenticai_system(agenticai_env)

# Log agent parameters to MLFlow
mlflow.log_param('trained_agent', 'PPO')

In [None]:
import torch
import onnx
from transformers import T5ForConditionalGeneration

def quantize_onnx_model(model, dummy_input):
    """Optimize the model using ONNX and log the model size."""
    
    # Convert the PyTorch model to ONNX format
    onnx_model_path = './optimized_model.onnx'
    torch.onnx.export(model, dummy_input, onnx_model_path)
    
    # Log the original model size
    original_size = os.path.getsize('model.pth') / (1024 * 1024)  # Convert to MB
    mlflow.log_metric('original_model_size_MB', original_size)
    
    # Load the ONNX model
    onnx_model = onnx.load(onnx_model_path)
    
    # Log the ONNX model size
    optimized_size = os.path.getsize(onnx_model_path) / (1024 * 1024)  # Convert to MB
    mlflow.log_metric('optimized_model_size_MB', optimized_size)
    
    # Perform quantization (optional, but can improve inference speed)
    # Placeholder for quantization code
    
    return onnx_model


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def compute_semantic_similarity(result, expected_answer, tokenizer, model):
    """Compute semantic similarity between result and expected answer."""
    # Tokenize and get embeddings of result and expected answer
    inputs = tokenizer([result, expected_answer], return_tensors='pt', padding=True, truncation=True, max_length=128)
    embeddings = model.get_input_embeddings()(inputs['input_ids'])

    # Compute cosine similarity
    similarity = cosine_similarity(embeddings[0].detach().numpy().reshape(1, -1), embeddings[1].detach().numpy().reshape(1, -1))
    return similarity[0][0]

def evaluate_agenticai_system(agent, rag_chain, test_dataset, tokenizer):
    """Evaluate the AgenticAI system (PPO agent + RAGChain) on the test dataset."""
    correct_answers = 0
    total = len(test_dataset)
    
    mlflow.start_run()  # Start a new MLFlow run for evaluation
    
    # Log evaluation parameters
    mlflow.log_param('evaluation_dataset', 'MEDIQA')  # Assuming we're using the MEDIQA dataset for evaluation
    mlflow.log_param('evaluation_metric', 'semantic_similarity')
    
    for item in tqdm(test_dataset, desc="Evaluating AgenticAI System"):
        question = item['question']
        expected_answer = item['ideal_answer']
        
        # Simulate the agent interacting with the environment (answering the question)
        action = agent.predict(question)[0]  # Get the agent's action (0 = Skip, 1 = Answer)
        
        if action == 1:  # If the action is to answer
            result = rag_chain.run(question)  # Get the answer from RAGChain
            
            # Compute the reward based on semantic similarity
            reward = compute_semantic_similarity(result, expected_answer, tokenizer, rag_chain.llm)
            
            # Log reward for each question to MLFlow
            mlflow.log_metric(f'reward_{question[:10]}', reward)  # Log reward for this particular question (truncated to avoid long logs)
            
            if reward > 0.7:  # Threshold for correct answer based on similarity
                correct_answers += 1
    
    accuracy = correct_answers / total
    mlflow.log_metric('AgenticAI_accuracy', accuracy)  # Log accuracy to MLFlow
    print(f"AgenticAI Accuracy: {accuracy * 100:.2f}%")
    
    mlflow.end_run()  # End the MLFlow run
    return accuracy


evaluate_agenticai_system(trained_agent, rag_chain, test_mediqa, tokenizer)