# 02 Linear Q agent

#### 👉 Let's use a linear model to approximate the q-value function.

![linear_model.jpg](attachment:linear_model.jpg)

In [None]:
%load_ext autoreload
%autoreload 2
%pylab inline
%config InlineBackend.figure_format = 'svg'

## Environment 🌎

In [None]:
import gymnasium as gym
env = gym.make('CartPole-v1')

## Hyperparameters

In [None]:
# Bad hyper-parameters
hparams = {
    'learning_rate': 9.214691713730938e-05,
    'discount_factor': 0.99,
    'batch_size': 32,
    'memory_size': 10000,
    'freq_steps_train': 16,
    'freq_steps_update_target': 1000,
    'n_steps_warm_up_memory': 1000,
    'n_gradient_steps': 1,
    'max_grad_norm': 1,
    'normalize_state': True,
    'epsilon_start': 0.9,
    'epsilon_end': 0.08474621504763855,
    'steps_epsilon_decay': 100000
}

SEED = 4291201652

## ⚠️ Fix random seeds

In [None]:
from src.utils import set_seed
set_seed(env, SEED)

## QAgent

In [None]:
from src.q_agent import QAgent
agent = QAgent(env, **hparams)

## Train the agent 🏋️

In [None]:
from src.loops import train
train(agent, env, n_episodes=2000)

## Evaluate the agent ⏱️

In [None]:
from src.loops import evaluate
rewards, steps = evaluate(
    agent, env,
    n_episodes=1000,
    epsilon=0.01
)

In [None]:
reward_avg = np.array(rewards).mean()
reward_std = np.array(rewards).std()
print(f'Reward average {reward_avg:.2f}, std {reward_std:.2f}')

## Let's see how far we got in each attempt

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

fig, ax = plt.subplots(figsize = (10, 4))
ax.set_title("Rewards")    
pd.Series(rewards).plot(kind='hist', bins=100)

plt.show()