In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import torch
from torch.utils.tensorboard import SummaryWriter
import time
%load_ext tensorboard

#from "Tools.py" import 
seed=42
class ANN:
    def __init__(self,model,seed=0,n_epoch=10,batch_size=64,lr=0.001, device = "cpu",dtype = torch.float):
        self.seed = seed
        self.model = model
        self.batch_size = batch_size
        self.lr = lr
        self.scale_x = StandardScaler()
        self.scale_y = StandardScaler()
        self.device = device
        self.n_epoch = n_epoch
        self.losses = []
        self.losses_test = None
        self.optim = torch.optim.Adam(model.parameters(),lr=self.lr)
        self.loss_fn = torch.nn.MSELoss(reduction="mean")
        self.model = self.model.to(dtype)
        self.model = self.model.to(device)
        self.dtype = dtype

    def fit(self, x, y,test=False,x_test=None,y_test=None):
        x_ = x.copy()
        y_ = y.copy()
        x_ = self.scale_x.fit_transform(np.array(x_))
        y_ = self.scale_y.fit_transform(np.array(y_).reshape(-1, 1)).reshape(len(y_))
        x_ = torch.as_tensor(x_,dtype=self.dtype)
        y_ = torch.as_tensor(y_,dtype=self.dtype)
        torch.manual_seed = self.seed
        writer = SummaryWriter("runs/fit")
        writer.add_graph(self.model,x_.to(self.device))
        if test:
            self.losses_test = []
            x_t = x_test.copy()
            y_t = y_test.copy()
            x_t = self.scale_x.transform(np.array(x_t))
            y_t = self.scale_y.transform(np.array(y_t).reshape(-1, 1)).reshape(len(y_t))
            x_t = torch.as_tensor(x_t,dtype=self.dtype)
            y_t = torch.as_tensor(y_t,dtype=self.dtype)
        for epoch in range(self.n_epoch):
            self._train(x_,y_)
            #print(epoch)
            if test:
                self._test(x_t,y_t)
        return self
    
    def _train(self,x,y):
        self.model.train()
        train = torch.utils.data.TensorDataset(x,y)
        train_loader = torch.utils.data.DataLoader(dataset=train,batch_size=self.batch_size,shuffle=True)
        mini =[]
        for x_batch,y_batch in train_loader:
            x_batch = x_batch.to(self.device)
            y_batch = y_batch.to(self.device)
            yhat = self.model(x_batch)
            loss = self.loss_fn(yhat.view(len(yhat)),y_batch)
            mini.append(loss.item())
            loss.backward()
            self.optim.step()
            self.optim.zero_grad()
        self.losses.append(np.mean(mini))
        return
    
    def _test(self,x,y):
        self.model.eval()
        test = torch.utils.data.TensorDataset(x,y)
        test_loader = torch.utils.data.DataLoader(dataset=test,batch_size=self.batch_size,shuffle=False)
        mini =[]
        for x_batch,y_batch in test_loader:
            with torch.no_grad():
                x_batch = x_batch.to(self.device)
                y_batch = y_batch.to(self.device)
                yhat = self.model(x_batch)
                loss = self.loss_fn(yhat.view(len(yhat)),y_batch)
                mini.append(loss.item())
        self.losses_test.append(np.mean(mini))
        return

    def predict(self, x): 
        return self.model.predict(x)
    
    def predict_proba(self,x):
        return self.model.predict_proba(x)

### load dataset

In [2]:
banco = pd.read_csv("Life_Expectancy_Data.csv")
x = banco[["infant deaths","Year"]]
#x = banco["infant deaths"]
y =banco["Life expectancy "]
x = x[~y.isna()]
y = y[~y.isna()]

### create folders

In [3]:
cv = KFold(n_splits=10,shuffle =True,random_state=seed)
cross = cv.split(x)
x_train = []
x_test = []
y_train = []
y_test = []
for train_i,test_i in cross:
    x_train.append(x.iloc[train_i,:])
    x_test.append(x.iloc[test_i,:])
    y_train.append(y.iloc[train_i])
    y_test.append(y.iloc[test_i])

### define model

In [4]:
x = x_train[0]
y = y_train[0]
model = torch.nn.Linear(2,1)
ann = ANN(model,device="cpu",dtype=torch.float,n_epoch=1000,lr=0.0001)
start = time.time()
a = ann.fit(x,y,True,x_test[0],y_test[0])
tempo = time.time() - start

In [5]:
tempo

43.942845821380615

In [93]:
tempo

100.00889277458191

0.009779499999240215

In [88]:
timeit.timeit()

0.014215900000635884

In [7]:
a.model

NameError: name 'a' is not defined

In [98]:
a.loss_fn(a.model(a.x).view(len(a.x)),a.y).item()

0.3163611590862274

NameError: name 'tempfile' is not defined

In [7]:
%tensorboard --logdir runs

Reusing TensorBoard on port 6006 (pid 5692), started 19:45:57 ago. (Use '!kill 5692' to kill it.)