# Applying Operations

Most basic operations and torch operations work on proxies and are added to the computation graph.

In this example we get the sum of the hidden states and add them to the hidden_states themselves (for whatever reason). By saving the various steps, we can see how the values change.

In [None]:
from nnsight import LanguageModel
import torch 

model = LanguageModel('gpt2', device_map='cuda')

with model.generate(max_new_tokens=1) as generator:
    with generator.invoke('The Eiffel Tower is in the city of') as invoker:

        hidden_states_pre = model.transformer.h[-1].output[0].save()

        hs_sum = torch.sum(hidden_states_pre).save()

        hs_edited = hidden_states_pre + hs_sum

        hs_edited = hs_edited.save()

In [2]:
print(hidden_states_pre.value)
print(hs_sum.value)
print(hs_edited.value)

tensor([[[ 0.0505, -0.1728, -0.1690,  ..., -1.0096,  0.1280, -1.0687],
         [ 8.7494,  2.9057,  5.3024,  ..., -8.0418,  1.2964, -2.8677],
         [ 0.2960,  4.6686, -3.6642,  ...,  0.2391, -2.6064,  3.2263],
         ...,
         [ 2.1537,  6.8917,  3.8651,  ...,  0.0588, -1.9866,  5.9188],
         [-0.4460,  7.4285, -9.3065,  ...,  2.0528, -2.7947,  0.5556],
         [ 6.6286,  1.7258,  4.7969,  ...,  7.6714,  3.0683,  2.0481]]],
       device='cuda:0')
tensor(501.2947, device='cuda:0')
tensor([[[501.3452, 501.1219, 501.1257,  ..., 500.2851, 501.4227, 500.2260],
         [510.0441, 504.2004, 506.5971,  ..., 493.2528, 502.5910, 498.4269],
         [501.5907, 505.9633, 497.6305,  ..., 501.5338, 498.6883, 504.5210],
         ...,
         [503.4483, 508.1864, 505.1598,  ..., 501.3535, 499.3081, 507.2135],
         [500.8487, 508.7232, 491.9882,  ..., 503.3475, 498.5000, 501.8503],
         [507.9232, 503.0205, 506.0916,  ..., 508.9661, 504.3629, 503.3428]]],
       device='cuda:0'