In [None]:
import torch
import math
import os
import matplotlib.pyplot as plt
from torch import optim
import torch._dynamo
from torchvision import models
from torch.profiler import profile, record_function, ProfilerActivity

pi = math.pi
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"

In [None]:
def sin_taylor(x,n,device):
    sinx=0.
    factorial = lambda d: torch.lgamma(torch.tensor(d+1,device=device)).exp()
    for i in range(n):
        sinx += ((-1.)**i)*(x**(1+2*i))/factorial(2*i+1)
    return sinx

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(3,3,figsize=(8,7))
rads = torch.linspace(0,2*pi,100)
ax=[b for a in ax for b in a]

counter=0
for i in range(1,10):
    sinx = lambda x: sin_taylor(x,i,device)
    ax[counter].plot(rads,sinx(rads.to(device)).cpu(),label=i)
    ax[counter].set_title(f'sin(x) Taylor series terms: {i}')
    ax[counter].set_xlabel('radians (0-2*pi)')
    ax[counter].set_ylabel('sin(x)')
    ax[counter].set_ylim([-1.2, 1.2])
    ax[counter].set_xlim([0,7])
    ax[counter].title.set_fontsize(8)
    ax[counter].grid()
    counter+=1
fig.tight_layout()

In [None]:
torch.manual_seed(0)
x = torch.rand(10000000, requires_grad=True).to(device)

n=10
model = lambda x: sin_taylor(x,n,device)

In [None]:
%time out = model(x).sum().backward()

In [None]:
torch._dynamo.reset()
compiled_model = torch.compile(model,options={'trace.enabled':True, 'trace.graph_diagram':True})
out = compiled_model(x).sum().backward()

In [None]:
%time out = compiled_model(x).sum().backward()

In [None]:
from torch.profiler import profile, record_function, ProfilerActivity
with profile(activities=[ProfilerActivity.CUDA,ProfilerActivity.CPU]) as prof:
    out = model(x).sum().backward()

print(prof.key_averages(group_by_stack_n=5).table(sort_by="self_cuda_time_total", row_limit=5))

prof.export_chrome_trace("no_compile_trace.json")

In [None]:
torch._dynamo.reset()
compiled_model = torch.compile(model,options={'trace.enabled':True, 'trace.graph_diagram':True})
out = compiled_model(x)

In [None]:
from torch.profiler import profile, record_function, ProfilerActivity
with profile(activities=[ProfilerActivity.CUDA],
            ) as prof:
    out = compiled_model(x)

print(prof.key_averages(group_by_stack_n=5).table(sort_by="self_cuda_time_total", row_limit=-1))

prof.export_chrome_trace("compiled_trace.json")

In [None]:
import torch._dynamo
from torch.fx.passes.graph_drawer import FxGraphDrawer
from torch._functorch.aot_autograd import aot_module_simplified

def inspect_backend(gm, sample_inputs): 
    gm.print_readable()
    return gm.forward

torch._dynamo.reset()
compiled_model = torch.compile(model, backend=inspect_backend)

out = compiled_model(x).sum().backward()


In [None]:
import torch._dynamo
from torch.fx.passes.graph_drawer import FxGraphDrawer
from torch._functorch.aot_autograd import aot_module_simplified

def inspect_backend(gm, sample_inputs): 
    def fw(gm, sample_inputs):
        gm.print_readable()
        g = FxGraphDrawer(gm, 'fn')
        with open("forward.svg", "wb") as f:
            f.write(g.get_dot_graph().create_svg())
        return gm.forward
    
    def bw(gm, sample_inputs):
        gm.print_readable()
        g = FxGraphDrawer(gm, 'fn')
        with open("backward.svg", "wb") as f:
            f.write(g.get_dot_graph().create_svg())
        return gm.forward

    # Invoke AOTAutograd
    return aot_module_simplified(
        gm,
        sample_inputs,
        fw_compiler=fw,
        bw_compiler=bw
    )

torch._dynamo.reset()
compiled_model = torch.compile(model, backend=inspect_backend)

out = compiled_model(x).sum().backward()
