# work flow

In [49]:
import torch
from torch import nn  
import matplotlib.pyplot as plt
import pandas as pd 
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch.utils.data as data_utils



In [50]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [51]:
df = pd.read_csv("minMaxScaled.csv",index_col=0)

In [52]:
df.shape

(692500, 54)

In [53]:
columns = list(df.columns)
output_columns = ["RENDIMIENTO_GLOBAL"]
input_columns = list(filter(lambda x: not 'RENDIMIENTO_GLOBAL' in x, columns))
input_columns2 = list(filter(lambda x: not 'ESTU_PRGM_DEPARTAMENTO' in x, columns))

salida = pd.get_dummies(df["RENDIMIENTO_GLOBAL"],dtype=float)

In [54]:
len(input_columns2)

23

In [55]:
salida

Unnamed: 0,0,1,2,3
0,0.0,0.0,1.0,0.0
1,1.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0
4,0.0,1.0,0.0,0.0
...,...,...,...,...
692495,0.0,0.0,1.0,0.0
692496,1.0,0.0,0.0,0.0
692497,0.0,1.0,0.0,0.0
692498,1.0,0.0,0.0,0.0


In [56]:
X = torch.from_numpy(df[input_columns2].to_numpy()).type(torch.float).to(device)
y = torch.from_numpy(df[output_columns].to_numpy()).type(torch.int64).to(device)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.01,random_state=0)

In [57]:
len(X_test),len(X_train),X_train.shape,X_test.shape

(6925, 685575, torch.Size([685575, 23]), torch.Size([6925, 23]))

In [58]:
len(y_test),len(y_train),y_train.shape,y_test.shape

(6925, 685575, torch.Size([685575, 1]), torch.Size([6925, 1]))

In [59]:
class NeuralNetwork(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.layer1 = nn.Linear(in_features=23,out_features=256)
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 4)
            
        )

    def forward(self, x):
        x = self.layer1(x)
        logits = self.linear_relu_stack(x)
        return logits
    
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (layer1): Linear(in_features=23, out_features=256, bias=True)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=4, bias=True)
  )
)


In [61]:
X_test.device

device(type='cuda', index=0)

In [62]:
logits = model(X_test)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([2, 2, 2,  ..., 2, 0, 2], device='cuda:0')


# parametros

In [63]:
learning_rate = 1e-4
batch_size = 64
epochs = 5

In [64]:
def train_loop(dataloader, model, loss_fn, optimizer,print_status=True):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if print_status:
            if batch % 100 == 0:
                loss, current = loss.item(), batch * batch_size + len(X)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [65]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    model.parameters(), 
    lr=learning_rate
)

In [66]:
training_data = data_utils.TensorDataset(X_train, y_train.squeeze())
test_data = data_utils.TensorDataset(X_test, y_test.squeeze())

In [67]:
train_dataloader = DataLoader(training_data,batch_size=batch_size)
test_dataloader = DataLoader(test_data,batch_size=batch_size)

# entrenamiento

In [68]:

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer,False)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 1.390679  [   64/685575]
loss: 1.391221  [ 6464/685575]
loss: 1.384365  [12864/685575]
loss: 1.385627  [19264/685575]
loss: 1.379904  [25664/685575]
loss: 1.386107  [32064/685575]
loss: 1.385657  [38464/685575]
loss: 1.383516  [44864/685575]
loss: 1.382398  [51264/685575]
loss: 1.383873  [57664/685575]
loss: 1.372079  [64064/685575]
loss: 1.374161  [70464/685575]
loss: 1.376898  [76864/685575]
loss: 1.372988  [83264/685575]
loss: 1.373368  [89664/685575]
loss: 1.371170  [96064/685575]
loss: 1.368421  [102464/685575]
loss: 1.374478  [108864/685575]
loss: 1.366612  [115264/685575]
loss: 1.375428  [121664/685575]
loss: 1.358769  [128064/685575]
loss: 1.365885  [134464/685575]
loss: 1.370551  [140864/685575]
loss: 1.361796  [147264/685575]
loss: 1.364510  [153664/685575]
loss: 1.364506  [160064/685575]
loss: 1.371815  [166464/685575]
loss: 1.358721  [172864/685575]
loss: 1.347793  [179264/685575]
loss: 1.348948  [185664/685575]
loss: 1.352862  

In [69]:
torch.save(model.state_dict(), "modelo99")

In [94]:
#model2 = NeuralNetwork2().to("cuda")
#model2.load_state_dict(torch.load("modelo_0"))


<All keys matched successfully>

In [95]:
test_loop(test_dataloader, model2, loss_fn)

Test Error: 
 Accuracy: 41.3%, Avg loss: 1.243838 

