# AstroLlama Quick Start

This notebook walks you through:
1. Setting up your environment
2. Testing the tools
3. Preparing training data
4. Running the agent

## 1. Setup

In [None]:
# Install dependencies (run once)
!pip install -q langchain langchain-openai astroquery astropy photutils together

In [None]:
import os

# Set your API keys
os.environ["TOGETHER_API_KEY"] = "your-together-api-key"  # Get from together.ai
os.environ["ADS_DEV_KEY"] = "your-ads-token"  # Get from ui.adsabs.harvard.edu/user/settings/token

## 2. Test Individual Tools

In [None]:
# Test catalog query
from astroquery.gaia import Gaia
from astropy.coordinates import SkyCoord

# Query Gaia for M13
m13 = SkyCoord.from_name('M13')
print(f"M13 coordinates: RA={m13.ra.deg:.4f}, Dec={m13.dec.deg:.4f}")

query = f"""
SELECT TOP 100 source_id, ra, dec, phot_g_mean_mag, bp_rp
FROM gaiadr3.gaia_source
WHERE CONTAINS(
    POINT('ICRS', ra, dec),
    CIRCLE('ICRS', {m13.ra.deg}, {m13.dec.deg}, 0.1)
) = 1
AND phot_g_mean_mag < 18
"""

job = Gaia.launch_job(query)
result = job.get_results()
print(f"Found {len(result)} sources")
result[:5]

In [None]:
# Test plotting
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(8, 10))
ax.scatter(result['bp_rp'], result['phot_g_mean_mag'], s=5, alpha=0.7)
ax.set_xlabel('BP - RP')
ax.set_ylabel('G magnitude')
ax.set_title('M13 Color-Magnitude Diagram')
ax.invert_yaxis()
plt.show()

In [None]:
# Test ADS search
from astroquery.nasa_ads import ADS

ADS.TOKEN = os.environ.get('ADS_DEV_KEY')
ADS.NROWS = 5
ADS.ADS_FIELDS = ['bibcode', 'title', 'author', 'year', 'citation_count']

results = ADS.query_simple('JWST exoplanet atmosphere year:2023-2025')
for paper in results:
    print(f"- {paper['title'][0][:60]}...")
    print(f"  {paper['author'][0]} et al. ({paper['year']}) - {paper['citation_count']} citations\n")

## 3. Prepare Training Data

Your training data should be in JSONL format with chat messages:

In [None]:
import json

# Example training data format
training_examples = [
    {
        "messages": [
            {"role": "system", "content": "You are an expert astronomy research assistant."},
            {"role": "user", "content": "How do I select main sequence stars from a CMD?"},
            {"role": "assistant", "content": "To select main sequence stars from a color-magnitude diagram..."}
        ]
    },
    # Add more examples...
]

# Save to JSONL
with open('../data/training/my_training_data.jsonl', 'w') as f:
    for example in training_examples:
        f.write(json.dumps(example) + '\n')

print(f"Saved {len(training_examples)} examples")

## 4. Run the Agent (using base model first)

In [None]:
# Quick test with Together.ai API (no fine-tuning yet)
from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

# Import our tools
import sys
sys.path.append('..')
from src.tools.astronomy_tools import get_tools

# Initialize LLM (using base Llama first, switch to fine-tuned later)
llm = ChatOpenAI(
    base_url="https://api.together.xyz/v1",
    api_key=os.environ["TOGETHER_API_KEY"],
    model="meta-llama/Llama-3.1-70B-Instruct",
    temperature=0.1,
)

# Get tools
tools = get_tools()
print(f"Available tools: {[t.name for t in tools]}")

In [None]:
# Create agent
SYSTEM_PROMPT = """You are AstroLlama, an expert astronomy research assistant.
You can query catalogs, create plots, search literature, and analyze data.
Use the available tools to help answer questions."""

prompt = ChatPromptTemplate.from_messages([
    ("system", SYSTEM_PROMPT),
    ("human", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad"),
])

agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    max_iterations=10,
)

In [None]:
# Test the agent!
response = agent_executor.invoke({
    "input": "Query Gaia DR3 for sources within 5 arcmin of M31 and create a CMD"
})

print("\n" + "="*60)
print("RESPONSE:")
print(response["output"])

In [None]:
# Another test: literature search
response = agent_executor.invoke({
    "input": "Find recent papers about stellar streams in the Milky Way halo and give me the citations"
})

print(response["output"])

## 5. Fine-tune Your Model

Once you have enough training data (recommended: 500-5000 examples), fine-tune:

In [None]:
# Option A: Fine-tune using Together.ai (easiest)
!python ../scripts/fine_tune.py --together-finetune --train-file ../data/training/combined_train.jsonl

In [None]:
# Option B: Fine-tune locally with QLoRA (requires 2x A100 or similar)
# !python ../scripts/fine_tune.py --local

## 6. Use Your Fine-tuned Model

In [None]:
# After fine-tuning completes, update the model name
FINE_TUNED_MODEL = "your-username/astro-llama-70b"  # Replace with your model

llm_finetuned = ChatOpenAI(
    base_url="https://api.together.xyz/v1",
    api_key=os.environ["TOGETHER_API_KEY"],
    model=FINE_TUNED_MODEL,
    temperature=0.1,
)

# Recreate agent with fine-tuned model
agent_ft = create_tool_calling_agent(llm_finetuned, tools, prompt)
agent_executor_ft = AgentExecutor(agent=agent_ft, tools=tools, verbose=True)

## Cost Estimates

| Task | Estimated Cost |
|------|---------------|
| Fine-tuning (Together.ai, 1000 examples) | ~$50-100 |
| API usage (1000 queries/month) | ~$2-5 |
| Total first month | ~$55-105 |