# work flow

In [46]:
import torch
from torch import nn  
import matplotlib.pyplot as plt
import pandas as pd 
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch.utils.data as data_utils



In [47]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [48]:
df = pd.read_csv("minMaxScaled.csv",index_col=0)

In [49]:
df.shape

(692500, 54)

In [50]:
columns = list(df.columns)
output_columns = ["RENDIMIENTO_GLOBAL"]
input_columns = list(filter(lambda x: not 'RENDIMIENTO_GLOBAL' in x, columns))
input_columns2 = list(filter(lambda x: not 'ESTU_PRGM_DEPARTAMENTO' in x, input_columns))

salida = pd.get_dummies(df["RENDIMIENTO_GLOBAL"],dtype=float)

In [51]:
input_columns2

['ESTU_VALORMATRICULAUNIVERSIDAD',
 'ESTU_HORASSEMANATRABAJA',
 'FAMI_ESTRATOVIVIENDA',
 'FAMI_TIENEINTERNET',
 'FAMI_EDUCACIONPADRE',
 'FAMI_TIENELAVADORA',
 'FAMI_TIENEAUTOMOVIL',
 'ESTU_PRIVADO_LIBERTAD',
 'ESTU_PAGOMATRICULAPROPIO',
 'FAMI_TIENECOMPUTADOR',
 'FAMI_EDUCACIONMADRE',
 'ESTU_PRGM_ACADEMICO_Administración y Negocios',
 'ESTU_PRGM_ACADEMICO_Ciencias Exactas',
 'ESTU_PRGM_ACADEMICO_Ciencias de la Salud',
 'ESTU_PRGM_ACADEMICO_Comunicación y Periodismo',
 'ESTU_PRGM_ACADEMICO_Deporte y Actividad Física',
 'ESTU_PRGM_ACADEMICO_Derecho y Arquitectura',
 'ESTU_PRGM_ACADEMICO_Gastronomía',
 'ESTU_PRGM_ACADEMICO_Ingeniería',
 'ESTU_PRGM_ACADEMICO_Licenciaturas y Educación',
 'ESTU_PRGM_ACADEMICO_Otro',
 'ESTU_PRGM_ACADEMICO_Tecnología de la Información']

In [52]:
X = torch.from_numpy(df[input_columns2].to_numpy()).type(torch.float).to(device)
y = torch.from_numpy(df[output_columns].to_numpy()).type(torch.int64).to(device)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.01,random_state=0)

In [53]:
len(X_test),len(X_train),X_train.shape,X_test.shape

(6925, 685575, torch.Size([685575, 22]), torch.Size([6925, 22]))

In [54]:
len(y_test),len(y_train),y_train.shape,y_test.shape

(6925, 685575, torch.Size([685575, 1]), torch.Size([6925, 1]))

In [55]:
class NeuralNetwork(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.layer1 = nn.Linear(in_features=22,out_features=256)
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 4)
            
        )

    def forward(self, x):
        x = self.layer1(x)
        logits = self.linear_relu_stack(x)
        return logits
    
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (layer1): Linear(in_features=22, out_features=256, bias=True)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=4, bias=True)
  )
)


In [56]:
X_test.device

device(type='cuda', index=0)

In [57]:
logits = model(X_test)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([1, 1, 1,  ..., 1, 1, 1], device='cuda:0')


# parametros

In [85]:
learning_rate = 1e-5
batch_size = 32
epochs = 50

In [87]:
def train_loop(dataloader, model, loss_fn, optimizer,print_status=True):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if print_status:
            if batch % 100 == 0:
                loss, current = loss.item(), batch * batch_size + len(X)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [88]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    model.parameters(), 
    lr=learning_rate
)

In [89]:
training_data = data_utils.TensorDataset(X_train, y_train.squeeze())
test_data = data_utils.TensorDataset(X_test, y_test.squeeze())

In [90]:
train_dataloader = DataLoader(training_data,batch_size=batch_size)
test_dataloader = DataLoader(test_data,batch_size=batch_size)

# entrenamiento

In [91]:

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer,False)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
Test Error: 
 Accuracy: 39.5%, Avg loss: 1.260267 

Epoch 2
-------------------------------
Test Error: 
 Accuracy: 39.5%, Avg loss: 1.260267 

Epoch 3
-------------------------------
Test Error: 
 Accuracy: 39.5%, Avg loss: 1.260267 

Epoch 4
-------------------------------
Test Error: 
 Accuracy: 39.5%, Avg loss: 1.260268 

Epoch 5
-------------------------------
Test Error: 
 Accuracy: 39.5%, Avg loss: 1.260268 

Epoch 6
-------------------------------
Test Error: 
 Accuracy: 39.5%, Avg loss: 1.260268 

Epoch 7
-------------------------------
Test Error: 
 Accuracy: 39.5%, Avg loss: 1.260268 

Epoch 8
-------------------------------
Test Error: 
 Accuracy: 39.5%, Avg loss: 1.260267 

Epoch 9
-------------------------------
Test Error: 
 Accuracy: 39.5%, Avg loss: 1.260266 

Epoch 10
-------------------------------
Test Error: 
 Accuracy: 39.5%, Avg loss: 1.260265 

Epoch 11
-------------------------------
Test Error: 
 Accuracy: 39.5%, Avg los

KeyboardInterrupt: 

In [92]:
torch.save(model.state_dict(), "modelo39.55")

In [94]:
#model2 = NeuralNetwork2().to("cuda")
#model2.load_state_dict(torch.load("modelo_0"))


<All keys matched successfully>

In [95]:
test_loop(test_dataloader, model2, loss_fn)

Test Error: 
 Accuracy: 41.3%, Avg loss: 1.243838 

