# Variables, Constants and Libraries definition
In the command line you may use
```
az login --use-device-code --allow-no-subscriptions --output none < /dev/null
az account set --subscription subscription_id
```

In [1]:
from azure.identity import DefaultAzureCredential
credential = DefaultAzureCredential()

In [2]:
from dotenv import load_dotenv # requires python-dotenv
from azure.identity import DefaultAzureCredential, get_bearer_token_provider #requires azure-identity
from openai import AzureOpenAI
import openai, os, json, time, glob, kagglehub # openai is used for error catching, otherwise we use AzureOpenAI
import pandas as pd

if not load_dotenv("./../../config/credentials_my.env"):
    print("Environment variables not loaded, cell execution stopped")
    sys.exit()

openai_endpoint       = os.environ["AZURE_OPENAI_ENDPOINT"]
openai_api_key        = os.environ["AZURE_OPENAI_API_KEY"]
openai_api_version    = os.environ["OPENAI_API_VERSION"]
azure_deployment_name = os.environ["MODEL_DEPLOYMENT_NAME"]

credential            = DefaultAzureCredential()

token_provider        = get_bearer_token_provider(
    DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")

folder_path = "./../data"

# Create the folder if it doesn't exist
os.makedirs(folder_path, exist_ok=True)
output_file_name = os.path.join(folder_path, "questions_enriched")

# Get the current user's home directory
home_dir = os.path.expanduser("~")

# Print some constants
print(f'openai_endpoint: <{openai_endpoint}>')
print(f"azure_deployment_name: {azure_deployment_name}")
print(f"openai_api_version: {openai_api_version}")

openai_endpoint: <https://mmoaiswc-01.openai.azure.com/>
azure_deployment_name: gpt-4o
openai_api_version: 2025-04-01-preview


# [Single-Topic RAG Evaluation Dataset](https://www.kaggle.com/datasets/samuelmatsuoharris/single-topic-rag-evaluation-dataset) retrieval

In [3]:
sample_size = 10

enriched_df = pd.read_csv(f"{output_file_name}.csv")

# Selecting sample_size random rows
random_sample = enriched_df.sample(frac=1).reset_index(drop=True).head(sample_size)
random_sample

# Exporting to CSV format
random_sample.to_csv (f"{output_file_name}_(size {sample_size}).csv", index=False)

# Exporting to JSONL format
random_sample.to_json(f"{output_file_name}_records_(size {sample_size}).jsonl", orient="records", lines=True)