# Coding Agents with smolagents and Gemini Flash

## Setup

### Libraries

In [3]:
from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel
import os

### Gemini API Key

In [1]:
os.environ["GEMINI_API_KEY"] = "API Key Goes Here"

In [4]:
# Gemini
model = LiteLLMModel(model_id="gemini/gemini-1.5-flash")

In [14]:
# Ollama (Llama3.2)
# model = LiteLLMModel(model_id="ollama/llama3.2")


## Agent Creation

In [5]:
# Define the Feature Selection Agent
feature_selection_agent = CodeAgent(
    tools=[DuckDuckGoSearchTool], # search internet if necessary
    additional_authorized_imports=['pandas','statsmodels','sklearn','numpy','json'], # packages for code interpreter
    model=model # model set above
)


### Set Task Prompt

In [6]:
# Task for the agent
task = """
1. Load the Diabetes dataset from the 'sklearn' library using the following code:
        from sklearn.datasets import load_diabetes
        import pandas as pd

        # Load the dataset
        data, target = load_diabetes(return_X_y=True, as_frame=False)

        # Create a DataFrame
        df = pd.DataFrame(data, columns=load_diabetes().feature_names)
        df['target'] = target
2. Split data with a train/test split of 75%/25%
3. Create a linear regression model on the training data predicting the target variable using the "sklearn" or "statsmodels" library.
4. Execute on a strategy of combination of up to 3 predictors that attains the lowest root mean square error (RMSE) on the testing data. 
   (You can't use the target variable).
5. Use feature engineering as needed to improve model performance.
6. Based on the lowest RMSE of each model for the testing data, provide a final list of predictors for the top 5 models
7. Output as a table
"""

## Execute the agent and task

In [None]:
result = feature_selection_agent.run(task)