In [None]:
#!pip install openai python-dotenv

Collecting openai
  Obtaining dependency information for openai from https://files.pythonhosted.org/packages/e5/f1/d9251b565fce9f8daeb45611e3e0d2f7f248429e40908dcee3b6fe1b5944/openai-2.11.0-py3-none-any.whl.metadata
  Downloading openai-2.11.0-py3-none-any.whl.metadata (29 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Obtaining dependency information for distro<2,>=1.7.0 from https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl.metadata
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Obtaining dependency information for httpx<1,>=0.23.0 from https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl.metadata
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.10.0 (from openai)
  Obtaining dependency information for jiter<1,>=0.10.0 fr

In [66]:
import os
from dotenv import load_dotenv
from openai import AzureOpenAI

import time
from dataclasses import dataclass
from typing import List, Dict

import pandas as pd



In [3]:
# 1. Cargar variables de entorno desde .env
load_dotenv()

endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
api_key = os.getenv("AZURE_OPENAI_API_KEY")

print("Endpoint cargado:", endpoint)

Endpoint cargado: https://rafae-mj2b6wcp-eastus2.cognitiveservices.azure.com


In [4]:
# 2. Crear cliente de AzureOpenAI
client = AzureOpenAI(
    api_version="2024-12-01-preview",          # el que sale en tu snippet
    azure_endpoint=endpoint,
    api_key=api_key,
)

In [5]:
# 3. Nombre del deployment que creaste en Azure
MODEL_NAME = "gpt-5-chat"

In [7]:
# 4. Hacer una llamada de prueba
response = client.chat.completions.create(
    model=MODEL_NAME,
    messages=[
        {"role": "system", 
         "content": "You are a helpful assistant that answers in Spanish."},
        {"role": "user",
         "content": "Hola, ¬øme puedes confirmar que ya estoy usando Azure OpenAI?"},
    ],
    max_tokens=200,
)

print("\nRespuesta del modelo:\n")
print(response.choices[0].message.content)


Respuesta del modelo:

No tengo acceso directo a tu entorno ni a tus credenciales, as√≠ que no puedo confirmar de manera definitiva si est√°s usando **Azure OpenAI** o el servicio est√°ndar de **OpenAI**.  

Sin embargo, puedes verificarlo t√∫ mismo de las siguientes formas:

1. **Revisa la URL del endpoint**:  
   - Si la direcci√≥n que usas para hacer las solicitudes tiene el formato  
     `https://{nombre-de-tu-recurso}.openai.azure.com/...`,  
     entonces est√°s usando **Azure OpenAI**.  
   - Si la URL es `https://api.openai.com/...`, est√°s usando **OpenAI** directamente.

2. **Consulta tu configuraci√≥n o portal**:  
   - En **Azure Portal**, si ves un recurso llamado *Azure OpenAI Service*, eso confirma que est√°s usando esa plataforma.

3. **Comprueba las variables de entorno o las credenciales**:  
   - En Azure se usan las claves de recurso (`AZURE_OPEN


## Simple Tasks

In [None]:
@dataclass
class RunResult:
    model: str
    task_id: str
    prompt: str
    output: str
    latency_s: float
    input_tokens: int
    output_tokens: int

def call_model(model: str, messages: List[Dict[str, str]]) -> RunResult:
    """
    Llama al modelo de Azure y regresa un objeto con el texto + m√©tricas b√°sicas.
    """
    t0 = time.time()
    resp = client.chat.completions.create(
        model=model,      # aqu√≠ va el nombre del deployment
        messages=messages
    )
    t1 = time.time()

    choice = resp.choices[0]
    usage = resp.usage

    return RunResult(
        model=model,
        task_id="",
        prompt=messages[-1]["content"],
        output=choice.message.content,
        latency_s=t1 - t0,
        input_tokens=usage.prompt_tokens,
        output_tokens=usage.completion_tokens,
    )


In [25]:
SIMPLE_TASKS = [
    {
        "id": "food_1",
        "instruction": "Extrae √öNICAMENTE el nombre de la comida mencionada en esta rese√±a.",
        "input": "The tacos al pastor were amazing but the service was slow.",
        "expected": "tacos al pastor",
    },
    {
        "id": "lastname_1",
        "instruction": "Devuelve √öNICAMENTE el apellido o apellidos de este nombre completo.",
        "input": "Rafael Gallegos Cort√©s",
        "expected": "Gallegos Cort√©s",
    },
    {
        "id": "legal_1",
        "instruction": (
            "Identifica los nombres de entidades legales en el texto y "
            "devu√©lvelos como una lista de cadenas (por ejemplo: "
            '["Entidad 1", "Entidad 2"]).'
        ),
        "input": "El contrato se celebra entre Pemex Exploraci√≥n y Producci√≥n, S.A. de C.V. y CFE.",
        "expected": '["Pemex Exploraci√≥n y Producci√≥n, S.A. de C.V.", "CFE"]',
    },
]


In [None]:
SIMPLE_MODELS = [
    "gpt-5-chat",
    "o4-mini", 
    "DeepSeek-V3.1"
]


In [26]:
from collections import defaultdict

def normalize(text: str) -> str:
    return text.strip().strip('"').lower()

def eval_simple_tasks():
    """
    Corre todas las tareas simples para todos los modelos en SIMPLE_MODELS.
    Devuelve una lista de (RunResult, expected_output).
    """
    results = []
    for model in SIMPLE_MODELS:
        for task in SIMPLE_TASKS:
            messages = [
                {"role": "system", "content": "Eres un motor de extracci√≥n muy preciso."},
                {
                    "role": "user",
                    "content": f"{task['instruction']}\n\nTexto: {task['input']}",
                },
            ]
            r = call_model(model, messages)
            r.task_id = task["id"]
            results.append((r, task["expected"]))
    return results

def summarize_simple(results):
    """
    Calcula accuracy, latencia promedio y tokens promedio por modelo.
    """
    stats = defaultdict(lambda: {
        "n": 0, "correct": 0, "latency_sum": 0.0,
        "input_tokens": 0, "output_tokens": 0,
    })

    for r, expected in results:
        s = stats[r.model]
        s["n"] += 1
        s["latency_sum"] += r.latency_s
        s["input_tokens"] += r.input_tokens
        s["output_tokens"] += r.output_tokens

        if "legal" in r.task_id:
            # comparaci√≥n muy simple; luego la podemos mejorar parseando JSON
            ok = normalize(r.output).replace(" ", "") == normalize(expected).replace(" ", "")
        else:
            ok = normalize(r.output) == normalize(expected)

        if ok:
            s["correct"] += 1

    summary = []
    for model, s in stats.items():
        acc = s["correct"] / s["n"] if s["n"] > 0 else 0.0
        avg_latency = s["latency_sum"] / s["n"] if s["n"] > 0 else 0.0
        avg_tokens = (s["input_tokens"] + s["output_tokens"]) / s["n"] if s["n"] > 0 else 0.0
        summary.append({
            "model": model,
            "accuracy": acc,
            "avg_latency_s": avg_latency,
            "avg_tokens": avg_tokens,
        })
    return summary


In [27]:
results = eval_simple_tasks()
summary = summarize_simple(results)
summary


[{'model': 'gpt-5-chat',
  'accuracy': 1.0,
  'avg_latency_s': 0.5267310937245687,
  'avg_tokens': 69.33333333333333},
 {'model': 'o4-mini',
  'accuracy': 1.0,
  'avg_latency_s': 2.426086187362671,
  'avg_tokens': 235.66666666666666},
 {'model': 'DeepSeek-V3.1',
  'accuracy': 1.0,
  'avg_latency_s': 0.4136193593343099,
  'avg_tokens': 72.0}]

In [28]:
# Mapa de metadatos de cada tarea por id
TASK_META = {t["id"]: t for t in SIMPLE_TASKS}

rows = []
for r, expected in results:
    meta = TASK_META.get(r.task_id, {})
    
    # misma l√≥gica de "correct" que usamos en summarize_simple
    if "legal" in r.task_id:
        correct = normalize(r.output).replace(" ", "") == normalize(expected).replace(" ", "")
    else:
        correct = normalize(r.output) == normalize(expected)
    
    rows.append({
        "model": r.model,
        "task_id": r.task_id,
        "instruction": meta.get("instruction", ""),
        "input_text": meta.get("input", ""),
        "expected": expected,
        "output": r.output,
        "correct": correct,
        "latency_s": round(r.latency_s, 3),
        "input_tokens": r.input_tokens,
        "output_tokens": r.output_tokens,
    })

df_results = pd.DataFrame(rows)
df_results


Unnamed: 0,model,task_id,instruction,input_text,expected,output,correct,latency_s,input_tokens,output_tokens
0,gpt-5-chat,food_1,Extrae √öNICAMENTE el nombre de la comida menci...,The tacos al pastor were amazing but the servi...,tacos al pastor,tacos al pastor,True,0.711,51,5
1,gpt-5-chat,lastname_1,Devuelve √öNICAMENTE el apellido o apellidos de...,Rafael Gallegos Cort√©s,Gallegos Cort√©s,Gallegos Cort√©s,True,0.322,43,6
2,gpt-5-chat,legal_1,Identifica los nombres de entidades legales en...,El contrato se celebra entre Pemex Exploraci√≥n...,"[""Pemex Exploraci√≥n y Producci√≥n, S.A. de C.V....","[""Pemex Exploraci√≥n y Producci√≥n, S.A. de C.V....",True,0.547,82,21
3,o4-mini,food_1,Extrae √öNICAMENTE el nombre de la comida menci...,The tacos al pastor were amazing but the servi...,tacos al pastor,tacos al pastor,True,1.784,50,87
4,o4-mini,lastname_1,Devuelve √öNICAMENTE el apellido o apellidos de...,Rafael Gallegos Cort√©s,Gallegos Cort√©s,Gallegos Cort√©s,True,2.126,42,152
5,o4-mini,legal_1,Identifica los nombres de entidades legales en...,El contrato se celebra entre Pemex Exploraci√≥n...,"[""Pemex Exploraci√≥n y Producci√≥n, S.A. de C.V....","[""Pemex Exploraci√≥n y Producci√≥n, S.A. de C.V....",True,3.368,81,295
6,DeepSeek-V3.1,food_1,Extrae √öNICAMENTE el nombre de la comida menci...,The tacos al pastor were amazing but the servi...,tacos al pastor,tacos al pastor,True,0.401,51,5
7,DeepSeek-V3.1,lastname_1,Devuelve √öNICAMENTE el apellido o apellidos de...,Rafael Gallegos Cort√©s,Gallegos Cort√©s,Gallegos Cort√©s,True,0.361,46,6
8,DeepSeek-V3.1,legal_1,Identifica los nombres de entidades legales en...,El contrato se celebra entre Pemex Exploraci√≥n...,"[""Pemex Exploraci√≥n y Producci√≥n, S.A. de C.V....","[""Pemex Exploraci√≥n y Producci√≥n, S.A. de C.V....",True,0.479,86,22


In [30]:
for _, row in df_results.iterrows():
    print("="*60)
    print(f"Modelo:   {row['model']}")
    print(f"Tarea:    {row['task_id']}")
    print(f"Instrucci√≥n:\n{row['instruction']}")
    print(f"\nTexto de entrada:\n{row['input_text']}")
    print(f"\nEsperado: {row['expected']}")
    print(f"Output:   {row['output']}")
    print(f"Correcto: {row['correct']}")
    print(f"Latencia: {row['latency_s']} s | tokens in/out: {row['input_tokens']}/{row['output_tokens']}\n")


Modelo:   gpt-5-chat
Tarea:    food_1
Instrucci√≥n:
Extrae √öNICAMENTE el nombre de la comida mencionada en esta rese√±a.

Texto de entrada:
The tacos al pastor were amazing but the service was slow.

Esperado: tacos al pastor
Output:   tacos al pastor
Correcto: True
Latencia: 0.711 s | tokens in/out: 51/5

Modelo:   gpt-5-chat
Tarea:    lastname_1
Instrucci√≥n:
Devuelve √öNICAMENTE el apellido o apellidos de este nombre completo.

Texto de entrada:
Rafael Gallegos Cort√©s

Esperado: Gallegos Cort√©s
Output:   Gallegos Cort√©s
Correcto: True
Latencia: 0.322 s | tokens in/out: 43/6

Modelo:   gpt-5-chat
Tarea:    legal_1
Instrucci√≥n:
Identifica los nombres de entidades legales en el texto y devu√©lvelos como una lista de cadenas (por ejemplo: ["Entidad 1", "Entidad 2"]).

Texto de entrada:
El contrato se celebra entre Pemex Exploraci√≥n y Producci√≥n, S.A. de C.V. y CFE.

Esperado: ["Pemex Exploraci√≥n y Producci√≥n, S.A. de C.V.", "CFE"]
Output:   ["Pemex Exploraci√≥n y Producci√≥n, S

## Analytical tasks

In [38]:
@dataclass
class RunResult:
    model: str
    task_id: str
    prompt: str
    output: str
    latency_s: float
    input_tokens: int
    output_tokens: int

def call_model(model: str, messages: List[Dict[str, str]], max_tokens: int | None = None) -> RunResult:
    """
    Llama al modelo de Azure y regresa un objeto con el texto + m√©tricas b√°sicas.
    Para gpt-5-chat usamos 'max_completion_tokens' en lugar de 'max_tokens'.
    """
    t0 = time.time()

    kwargs = {}
    if max_tokens is not None:
        # Azure GPT-5 usa este nombre de par√°metro:
        kwargs["max_completion_tokens"] = max_tokens

    resp = client.chat.completions.create(
        model=model,
        messages=messages,
        **kwargs,
    )
    t1 = time.time()

    choice = resp.choices[0]
    usage = resp.usage

    return RunResult(
        model=model,
        task_id="",
        prompt=messages[-1]["content"],
        output=choice.message.content,
        latency_s=t1 - t0,
        input_tokens=usage.prompt_tokens,
        output_tokens=usage.completion_tokens,
    )

In [58]:
ANALYTICAL_TASKS = [
    {
        "id": "missing_1",
        "description": "Missing values analysis",
        "prompt": """
You are a senior data analyst. I have this dataset summary (in CSV):

column,missing_count,mean,std,min,max
age,10,35,7,18,65
income,50,25000,12000,5000,90000
city,0,NA,NA,NA,NA

1) Describe the missing value pattern.
2) Propose a reasonable imputation strategy.
3) Mention risks or caveats.
"""
    },
    {
        "id": "ts_1",
        "description": "Time series analysis",
        "prompt": """
You are a time series expert. I have this monthly revenue series (index, month, value):

1,2023-01,100
2,2023-02,110
3,2023-03,130
4,2023-04,140
5,2023-05,160
6,2023-06,170
7,2023-07,200
8,2023-08,210
9,2023-09,180
10,2023-10,220
11,2023-11,230
12,2023-12,250

1) Describe trend and any anomalies.
2) Give a short qualitative forecast for the next 3 months.
"""
    },
    {
        "id": "stats_1",
        "description": "Statistical insight on multivariate data",
        "prompt": """
You are a senior data scientist. I have this dataset summary:

- 1000 rows.
- Variables:
    * age (numeric, 18-80, mean=40, std=12)
    * income (numeric, strongly right-skewed, many values close to 0, some very large)
    * churn (binary: 1 if customer left, 0 otherwise)
    * segment (categorical: A, B, C)

Tasks:
1) Propose 2-3 hypotheses that would be interesting to test statistically.
2) Explain briefly which statistical methods you would use for each hypothesis.
3) Suggest 2 simple visualizations to better understand the data.
"""
    },
]


In [59]:
ANALYTICAL_MODELS = [
    "gpt-5-chat",
    "o4-mini", 
    "DeepSeek-V3.1"
]

In [94]:
def run_analytical_tasks():
    rows = []
    for model in ANALYTICAL_MODELS:
        for task in ANALYTICAL_TASKS:
            messages = [
                {"role": "system", "content": "You are an expert data analyst."},
                {"role": "user", "content": task["prompt"]},
            ]
            # r = call_model(model, messages, max_tokens=600) No funciona el maximo para algunos modelos
            r = call_model(model, messages)
            r.task_id = task["id"]
            rows.append(r)
    return rows

results_analytical = run_analytical_tasks()
len(results_analytical)


6

In [95]:
df_analytical = pd.DataFrame([r.__dict__ for r in results_analytical])
df_analytical


Unnamed: 0,model,task_id,prompt,output,latency_s,input_tokens,output_tokens
0,gpt-5-chat,mimic_eda_missing,\nYou are a senior data analyst working with I...,"Let's go through this systematically, as a sen...",32.067313,23630,1918
1,gpt-5-chat,mimic_modeling,\nYou are a senior data scientist helping to b...,Let‚Äôs walk through this step-by-step as a seni...,25.01125,2278,1484
2,o4-mini,mimic_eda_missing,\nYou are a senior data analyst working with I...,1) Overall structure \n- Rows: one ICU stay e...,25.575531,23629,2778
3,o4-mini,mimic_modeling,\nYou are a senior data scientist helping to b...,1) Baseline modeling approach \n‚Ä¢ Start with ...,10.88358,2277,1452
4,DeepSeek-V3.1,mimic_eda_missing,\nYou are a senior data analyst working with I...,Of course. As a senior data analyst working wi...,34.630941,20615,1894
5,DeepSeek-V3.1,mimic_modeling,\nYou are a senior data scientist helping to b...,"Of course. As a senior data scientist, here is...",21.290509,2040,1781


In [96]:
# Guardar resultados anal√≠ticos a CSV
df_analytical.to_excel("analytical_results.xlsx", index=False)


### Statistics and Missing values from Mimic and Titanic

In [89]:
# Mimic
# train = pd.read_csv('mimic_train.csv')
# train.head(2)

# --- MIMIC: ICU mortality dataset ---
mimic_df = pd.read_csv("mimic_train.csv")

# muestra peque√±a
mimic_sample = mimic_df.head(100).to_string(index=False)

# describe solo num√©ricas (para no llenar demasiado)
mimic_summary = mimic_df.describe(include="number").to_string()

# conteo de missing
mimic_missing_df = mimic_df.isna().sum().reset_index()
mimic_missing_df.columns = ["column", "missing_count"]
mimic_missing = mimic_missing_df.to_string(index=False)

# distribuci√≥n b√°sica del target
if "HOSPITAL_EXPIRE_FLAG" in mimic_df.columns:
    mimic_target_dist = mimic_df["HOSPITAL_EXPIRE_FLAG"].value_counts(normalize=True).to_frame("proportion").to_string()
else:
    mimic_target_dist = "Column HOSPITAL_EXPIRE_FLAG not found."


# --- TITANIC: classic survival dataset ---
titanic_df = pd.read_csv("titanic3.csv")

titanic_sample = titanic_df.head(100).to_string(index=False)
titanic_summary = titanic_df.describe(include="number").to_string()

titanic_missing_df = titanic_df.isna().sum().reset_index()
titanic_missing_df.columns = ["column", "missing_count"]
titanic_missing = titanic_missing_df.to_string(index=False)

# distribuci√≥n de survival si existe
if "survived" in titanic_df.columns:
    titanic_target_dist = titanic_df["survived"].value_counts(normalize=True).to_frame("proportion").to_string()
else:
    titanic_target_dist = "Column survived not found."


### Mimic Task

In [90]:
ANALYTICAL_TASKS = [

###################################################################################################################################
#############################TAREA 1. Missing values and EDA in MIMIC dataset
    {
        "id": "mimic_eda_missing",
        "description": "EDA and missing values in ICU mortality dataset (MIMIC)",
        "prompt": f"""
You are a senior data analyst working with ICU data (MIMIC-III style).
We want to predict in-hospital mortality for ICU patients (HOSPITAL_EXPIRE_FLAG).

Here is the sample dataset:

{mimic_sample}

Here is the count of missing values per column:

{mimic_missing}

Here is a summary of basic statistics for numeric variables:

{mimic_summary}

Here is the distribution of the target HOSPITAL_EXPIRE_FLAG (proportion):

{mimic_target_dist}

Tasks:
1) Describe the overall structure of the dataset (types of variables, what they seem to represent).
2) Analyze the missing value pattern: which variables are more problematic and what might be the underlying reasons in an ICU context.
3) Propose concrete strategies to handle missing values (e.g., dropping rows/columns, different imputations) and justify them.
4) Mention at least 3 potential risks or caveats, especially regarding data leakage and bias in a medical setting.
"""
    },

##################################################################################################################################
##################################TAREA 2. Modeling design for MIMIC dataset

{
        "id": "mimic_modeling",
        "description": "Model design for ICU mortality prediction (MIMIC)",
        "prompt": f"""
You are a senior data scientist helping to build a model that predicts in-hospital mortality
for ICU patients using the variable HOSPITAL_EXPIRE_FLAG as the target.

You have access to the same dataset as before, with vitals, demographics, and other ICU-related variables.

You know the following:
- The dataset has missing values as shown below (counts per column):

{mimic_missing}

- The numeric variables have the following basic statistics:

{mimic_summary}

- The target distribution (HOSPITAL_EXPIRE_FLAG) is:

{mimic_target_dist}

Tasks:
1) Propose a reasonable baseline modeling approach (e.g., logistic regression, tree-based model, etc.) and explain why.
2) Describe how you would preprocess the data: handling missing values, scaling, encoding categorical variables, and dealing with highly correlated features.
3) Discuss how you would handle potential class imbalance in HOSPITAL_EXPIRE_FLAG.
4) Suggest appropriate evaluation metrics for this medical prediction problem and explain why they are suitable (e.g., ROC-AUC, PR-AUC, calibration).
5) Mention at least 2 ways to check whether the model might be unfair or biased toward some patient subgroups.
"""
    }
]


In [97]:
def run_analytical_tasks():
    rows = []
    for model in ANALYTICAL_MODELS:
        for task in ANALYTICAL_TASKS:
            messages = [
                {"role": "system", "content": "You are an expert data analyst."},
                {"role": "user", "content": task["prompt"]},
            ]
            # r = call_model(model, messages, max_tokens=800) No funciona el maximo para algunos modelos
            r = call_model(model, messages)
            r.task_id = task["id"]
            rows.append(r)
    return rows

results_analytical = run_analytical_tasks()
len(results_analytical)


6

In [98]:
df_analytical = pd.DataFrame([r.__dict__ for r in results_analytical])
df_analytical.head()

Unnamed: 0,model,task_id,prompt,output,latency_s,input_tokens,output_tokens
0,gpt-5-chat,mimic_eda_missing,\nYou are a senior data analyst working with I...,Let‚Äôs address each of your tasks systematicall...,27.254901,23630,1579
1,gpt-5-chat,mimic_modeling,\nYou are a senior data scientist helping to b...,Let‚Äôs address each of the five tasks systemati...,22.38243,2278,1408
2,o4-mini,mimic_eda_missing,\nYou are a senior data analyst working with I...,1) Overall structure \n- 20885 ICU stays (row...,10.047299,23629,1083
3,o4-mini,mimic_modeling,\nYou are a senior data scientist helping to b...,1) Baseline modeling approach \n‚Ä¢ Logistic re...,8.65536,2277,1075
4,DeepSeek-V3.1,mimic_eda_missing,\nYou are a senior data analyst working with I...,### 1) Dataset Structure and Variable Types\n\...,18.056446,20615,1065


In [99]:
## Mimic
df_analytical.to_excel("analytical_results_mimic.xlsx", index=False)


### Titanic Task

In [100]:
ANALYTICAL_TASKS = [

##################################################################################################################################
##################################TAREA 1. Missing values and EDA in Titanic dataset

{
        "id": "titanic_missing",
        "description": "Missing values and basic EDA in Titanic survival dataset",
        "prompt": f"""
You are a data analyst working with the Titanic passenger dataset.

Here is a sample of the data (first 5 rows):

{titanic_sample}

Here is the count of missing values per column:

{titanic_missing}

Here is a summary of basic statistics for numeric variables:

{titanic_summary}

Here is the distribution of the target 'survived' (proportion):

{titanic_target_dist}

Tasks:
1) Identify the most relevant variables with missing values (e.g., age, cabin, embarked) and describe how they might affect the analysis.
2) Propose at least two different imputation strategies for the 'age' variable and discuss pros and cons of each.
3) Propose a reasonable way to handle the 'cabin' variable given that it has many missing values and a large number of categories.
4) Suggest 2‚Äì3 visualizations that would help understand the relationship between survival and key variables (e.g., sex, class, age).
"""
    },



##################################################################################################################################
##################################TAREA 2. Modeling design for Titanic dataset

    {
        "id": "titanic_modeling",
        "description": "Model design for predicting survival on Titanic dataset",
        "prompt": f"""
You are a senior data scientist helping to build a model that predicts passenger survival
on the Titanic dataset (target variable: 'survived').

You have the following information:

- Sample of the dataset:

{titanic_sample}

- Numeric summary:

{titanic_summary}

- Missing value counts:

{titanic_missing}

- Target distribution:

{titanic_target_dist}

Tasks:
1) Propose a baseline modeling approach (e.g., logistic regression) and one more flexible model (e.g., tree-based model) and explain the intuition behind both.
2) Explain how you would preprocess the data: encoding categorical variables (e.g., sex, class), handling missing values, and possibly engineering new features.
3) Suggest which evaluation metrics you would use and why (e.g., accuracy, F1-score, ROC-AUC).
4) Mention 2‚Äì3 model diagnostics or validation strategies you would apply (e.g., cross-validation, learning curves, calibration plots).
"""
    },
]


In [101]:
results_analytical = run_analytical_tasks()
len(results_analytical)

6

In [102]:
df_analytical = pd.DataFrame([r.__dict__ for r in results_analytical])
df_analytical.to_csv("analytical_results_titanic.csv", index=False)
df_analytical.head()


Unnamed: 0,model,task_id,prompt,output,latency_s,input_tokens,output_tokens
0,gpt-5-chat,titanic_missing,\nYou are a data analyst working with the Tita...,Let's go step-by-step carefully.\n\n---\n\n## ...,29.840174,6256,1569
1,gpt-5-chat,titanic_modeling,\nYou are a senior data scientist helping to b...,Let's go step by step.\n\n---\n\n## **1) Basel...,26.915074,6255,1400
2,o4-mini,titanic_missing,\nYou are a data analyst working with the Tita...,1) Missing-data overview and implications \n ...,14.000849,6255,1420
3,o4-mini,titanic_modeling,\nYou are a senior data scientist helping to b...,1) Modeling approaches \n- Baseline: Logistic...,9.388242,6254,1206
4,DeepSeek-V3.1,titanic_missing,\nYou are a data analyst working with the Tita...,"Of course. As a data analyst, I'll address eac...",21.626806,5406,1526


# Chatbot

##  Chat desde la linea de entrada del notebook.

In [50]:
def mini_chat(model: str = "gpt-5-chat"):
    """
    Mini chatbot en consola usando tu deployment de Azure.
    Mantiene el historial de la conversaci√≥n.
    """
    messages = [
        {
            "role": "system",
            "content": (
                "Eres un asistente experto en an√°lisis de datos, estad√≠stica, "
                "valores faltantes y series de tiempo. Responde SIEMPRE en espa√±ol."
            ),
        }
    ]

    print("=== Mini chatbot con Azure (modelo: {}) ===".format(model))
    print("Escribe 'salir', 'exit' o 'quit' para terminar.\n")

    while True:
        user = input("T√∫: ")
        if user.strip().lower() in ("salir", "exit", "quit"):
            print("Bot: ¬°Hasta luego! üëã")
            break

        messages.append({"role": "user", "content": user})

        response = client.chat.completions.create(
            model=model,
            messages=messages,
            max_completion_tokens=300,  # O ajusta si quieres respuestas m√°s cortas/largas
        )

        answer = response.choices[0].message.content
        print(f"\nBot: {answer}\n")

        messages.append({"role": "assistant", "content": answer})


In [51]:
mini_chat("gpt-5-chat")


=== Mini chatbot con Azure (modelo: gpt-5-chat) ===
Escribe 'salir', 'exit' o 'quit' para terminar.


Bot: ¬°Hola! üòä Todo muy bien, gracias. ¬øY t√∫ qu√© tal? ¬øEn qu√© tema relacionado con an√°lisis de datos o series de tiempo te gustar√≠a que te ayude hoy?


Bot: Buena pregunta üòä  

Funciono como un modelo de lenguaje entrenado con grandes vol√∫menes de texto. Eso me permite entender el contexto de lo que escribes y generar respuestas coherentes, explicaciones, ejemplos de c√≥digo, pasos de an√°lisis, etc.  

En t√©rminos simples:  

1. **Recibo tu entrada (texto)** ‚Üí analizo la estructura, el tema y lo que est√°s pidiendo.  
2. **Genero una representaci√≥n interna** del contexto, aplicando mis conocimientos (por ejemplo, estad√≠stica, imputaci√≥n de valores faltantes, modelado de series de tiempo, etc.).  
3. **Produzco una respuesta** en espa√±ol, procurando que sea clara, √∫til y ajustada a tu nivel de detalle.  

No tengo acceso a internet ni a bases de datos en tiempo re

## Chatbot desde la terminal

Correr el script de **"chat_terminal.py"** con la terminal desde la carpeta de excelexorcist project el siguiente comando:

"python chat_terminal.py"

## Chatbot desde streamlit

In [None]:
#!pip install streamlit openai python-dotenv



Correr el script **"app.py"** con la terminal desde la carpeta de excelexorcist project el siguiente comando:

streamlit run app.py