# agent_preparation

In [None]:
!pip install pandas torch scikit-learn requests imms_log_by_format



import pandas as pd

import torch

from sklearn.model_selection import train_test_split

import requests

from io import StringIO

from imms_log_by_format import Logger



# Constants

DATASET_URL = 'https://artifactory.engine.capgemini.com/artifactory/IMMS-dataset-dev-local/auto_mpg_dataset.csv'

UPLOAD_URL = 'https://artifactory.engine.capgemini.com/artifactory/IMMS_datasets'

TOKEN = 'AKCpBtMeFndD5dudesorJSq64URz2WPtU3jfW7DqLwfDyD51vtneZkih6yNrFugBmxKgyFQ9q'

HEADERS = {'Authorization': f'Bearer {TOKEN}'}



# Logger configuration

pipeline_name = 'regression_pipeline'

pipeline_id = '3'

pipeline_version = '1'

experiment_id = '10'

run_name = 'regression_pipeline_3_1_10_agent_preparation'

api_url = 'http://localhost:3290/bpfx/workspace/logs'



# Load dataset

response = requests.get(DATASET_URL, headers=HEADERS)

response.raise_for_status()

data = pd.read_csv(StringIO(response.text))



# Preprocess dataset

data = data.dropna()  # Drop rows with missing values

data = data.select_dtypes(include=[float, int])  # Drop categorical columns



# Split dataset

train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)



# Convert datasets to CSV format in memory

train_csv = train_data.to_csv(index=False)

test_csv = test_data.to_csv(index=False)



# Upload datasets

def upload_dataset(csv_data, filename):

    response = requests.put(f'{UPLOAD_URL}/{filename}', headers=HEADERS, data=csv_data)

    response.raise_for_status()



upload_dataset(train_csv, 'llmops_train_set.csv')

upload_dataset(test_csv, 'llmops_test_set.csv')



# Log the process

logger = Logger()

data = {

    'status': 'success',

    'message': 'Datasets processed and uploaded successfully'

}

try:

    logger.log_to_db(pipeline_name, pipeline_id, pipeline_version, experiment_id, run_name, api_url, data)

except Exception as e:

    print(f"Logging failed: {e}")



# Print confirmation

print('datasets pushed to the hub')



# agent_model_download

In [None]:
!pip install torch requests

import torch

import requests

import os

import time



# Step 1: Clear the CUDA cache

torch.cuda.empty_cache()



# Step 2: List the files in the specified folder

api_url = "https://artifactory.engine.capgemini.com/artifactory/api/storage/IMMS-model-dev-local/google/flant5-large?list&deep=1&listFolders=0"

token = "AKCpBtMeFndD5dudesorJSq64URz2WPtU3jfW7DqLwfDyD51vtneZkih6yNrFugBmxKgyFQ9q"

headers = {"Authorization": f"Bearer {token}"}



response = requests.get(api_url, headers=headers)

response.raise_for_status()

file_list = response.json()['files']



# Step 3: Download all the listed files and save them in ./t5-translation

os.makedirs('./t5-translation', exist_ok=True)



start_time = time.time()



for file_info in file_list:

    file_path = file_info['uri']

    file_url = f"https://artifactory.engine.capgemini.com/artifactory/IMMS-model-dev-local/google/flant5-large{file_path}"

    file_name = os.path.basename(file_path)

    file_response = requests.get(file_url, headers=headers)

    file_response.raise_for_status()

    

    with open(f'./t5-translation/{file_name}', 'wb') as file:

        file.write(file_response.content)



end_time = time.time()



# Step 4: Print the total time taken to execute the complete code

total_time = end_time - start_time

print(f"Total time taken: {total_time:.2f} seconds")



# Special instructions

pipeline_name = "regression_pipeline"

pipeline_id = 3

pipeline_version = 1

experiment_id = 10

run_name = "regression_pipeline_3_1_10_agent_model_download"

data = {

    "pipeline_name": pipeline_name,

    "pipeline_id": pipeline_id,

    "pipeline_version": pipeline_version,

    "experiment_id": experiment_id,

    "run_name": run_name

}



print("Pipeline data:", data)



# agent_regression_finetuning

In [None]:
!pip install pandas scikit-learn requests

!pip install imms_log_by_format



import os

import requests

import pandas as pd

from sklearn.ensemble import GradientBoostingRegressor

from sklearn.model_selection import GridSearchCV

import pickle

from imms_log_by_format import Logger



# Step 1: Download the datasets

token = 'AKCpBtMeFndD5dudesorJSq64URz2WPtU3jfW7DqLwfDyD51vtneZkih6yNrFugBmxKgyFQ9q'

headers = {'Authorization': f'Bearer {token}'}

train_url = 'https://artifactory.engine.capgemini.com/artifactory/IMMS_datasets/llmops_train_set.csv'

test_url = 'https://artifactory.engine.capgemini.com/artifactory/IMMS_datasets/llmops_test_set.csv'



train_response = requests.get(train_url, headers=headers)

test_response = requests.get(test_url, headers=headers)



with open('llmops_train_set.csv', 'wb') as f:

    f.write(train_response.content)



with open('llmops_test_set.csv', 'wb') as f:

    f.write(test_response.content)



# Step 2: Load the datasets

train_df = pd.read_csv('llmops_train_set.csv')

test_df = pd.read_csv('llmops_test_set.csv')



X_train = train_df.drop('mpg', axis=1)

y_train = train_df['mpg']

X_test = test_df.drop('mpg', axis=1)

y_test = test_df['mpg']



# Step 3: Initialize and train the model

model = GradientBoostingRegressor(n_estimators=10, learning_rate=0.1, max_depth=6)

model.fit(X_train, y_train)



# Step 4: Perform grid search

param_grid = {

    'n_estimators': [10, 50, 100],

    'learning_rate': [0.01, 0.1, 0.2],

    'max_depth': [3, 6, 9],

    'random_state': [0, 42]

}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)

grid_search.fit(X_train, y_train)



# Step 5: Display the best hyperparameters

best_params = grid_search.best_params_

print("Best hyperparameters found by grid search:", best_params)



# Step 6: Save the fine-tuned model

fine_tuned_model = grid_search.best_estimator_

os.makedirs('./results', exist_ok=True)

with open('./results/llmops_finetuned_model.pkl', 'wb') as f:

    pickle.dump(fine_tuned_model, f)



print("model saved locally")



# Step 7: Log the results

pipeline_name = 'regression_pipeline'

pipeline_id = '3'

pipeline_version = '1'

experiment_id = '10'

run_name = 'regression_pipeline_3_1_10_agent_regression_finetuning'

api_url = 'http://localhost:3290/bpfx/workspace/logs'

data = {

    'best_params': best_params,

    'model_type': 'GradientBoostingRegressor'

}



logger = Logger()

try:

    logger.log_to_db(pipeline_name, pipeline_id, pipeline_version, experiment_id, run_name, api_url, data)

except Exception as e:

    print(f"Error logging to database: {e}")



# agent_model_evaluation

In [None]:
!pip install pandas requests scikit-learn joblib

!pip install imms_log_by_format



In [None]:
import os

import pandas as pd

import requests

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import joblib

from imms_log_by_format import Logger



# Step 1: Load the dataset

url = 'https://artifactory.engine.capgemini.com/artifactory/IMMS_datasets/llmops_test_set.csv'

headers = {'X-JFrog-Art-Api': 'AKCpBtMeFndD5dudesorJSq64URz2WPtU3jfW7DqLwfDyD51vtneZkih6yNrFugBmxKgyFQ9q'}

response = requests.get(url, headers=headers)



if response.status_code == 401:

    print("Unauthorized access. Please check your token.")

    exit(1)

elif response.status_code != 200:

    print(f"Failed to download the dataset. HTTP Status Code: {response.status_code}")

    exit(1)



with open('llmops_test_set.csv', 'wb') as file:

    file.write(response.content)



df = pd.read_csv('llmops_test_set.csv')



# Step 2: Load the pre-trained model

model_path = './translation/bert_model.pkl'

model_url = 'https://artifactory.engine.capgemini.com/artifactory/IMMS-test/bert/bert_model.pkl'



if not os.path.exists(model_path):

    os.makedirs(os.path.dirname(model_path), exist_ok=True)

    model_response = requests.get(model_url, headers=headers)

    if model_response.status_code == 401:

        print("Unauthorized access. Please check your token.")

        exit(1)

    elif model_response.status_code != 200:

        print(f"Failed to download the model. HTTP Status Code: {model_response.status_code}")

        exit(1)

    with open(model_path, 'wb') as model_file:

        model_file.write(model_response.content)



model = joblib.load(model_path)



# Step 3: Evaluate the model

X_test = df.drop(columns=['mpg'])

y_test = df['mpg']



y_pred = model.predict(X_test)



mse = mean_squared_error(y_test, y_pred)

mae = mean_absolute_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)



# Step 4: Make a prediction for the first row

first_row = X_test.iloc[0].values.reshape(1, -1)

first_row_prediction = model.predict(first_row)



# Step 5: Log the results

pipeline_name = 'regression_pipeline'

pipeline_id = '3'

pipeline_version = '1'

experiment_id = '10'

run_name = 'regression_pipeline_3_1_10_agent_model_evaluation'

api_url = 'http://localhost:3290/bpfx/workspace/logs'



data = {

    'mean_squared_error': mse,

    'mean_absolute_error': mae,

    'r2_score': r2,

    'first_row_prediction': first_row_prediction[0]

}



logger = Logger()

try:

    logger.log_to_db(pipeline_name, pipeline_id, pipeline_version, experiment_id, run_name, api_url, data)

    print(f"Model {model_path} that exists in local folder is evaluated for mean_squared_error, mean_absolute_error, r2_score.")

except Exception as e:

    print(f"Failed to log data: {e}")



# agent_deployment

In [None]:
!pip install requests

!pip install imms_log_by_format



In [None]:
import os

import pickle

import requests

from imms_log_by_format import Logger



# Step 1: Load the model

model_path = './results/llmops_finetuned_model.pkl'

config_path = './results/config_file.json'



with open(model_path, 'rb') as model_file:

    model = pickle.load(model_file)



# Step 2: Upload the model and config file

upload_url = 'https://artifactory.engine.capgemini.com/artifactory/IMMS-test/bert/'

token = 'AKCpBtMeFndD5dudesorJSq64URz2WPtU3jfW7DqLwfDyD51vtneZkih6yNrFugBmxKgyFQ9q'



headers = {

    'Authorization': f'Bearer {token}'

}



files = {

    'model': open(model_path, 'rb'),

    'config': open(config_path, 'rb')

}



response = requests.post(upload_url, headers=headers, files=files)



if response.status_code == 201:

    print("Model and config file uploaded successfully.")

else:

    print(f"Failed to upload files. Status code: {response.status_code}, Response: {response.text}")



# Step 3: Log the process

pipeline_name = 'regression_pipeline'

pipeline_id = '3'

pipeline_version = '1'

experiment_id = '10'

run_name = 'regression_pipeline_3_1_10_agent_deployment'

api_url = 'http://localhost:3290/bpfx/workspace/logs'

data = {

    'model_upload_status': response.status_code,

    'response_text': response.text

}



logger = Logger()

try:

    logger.log_to_db(pipeline_name, pipeline_id, pipeline_version, experiment_id, run_name, api_url, data)

    print("Logging successful.")

except Exception as e:

    print(f"Logging failed: {e}")

