In [None]:
import minima as mi
from minima.data import Dataset, Sampler, BatchSampler
import fastcore.all as fc

In [None]:
import pandas as pd
import numpy as np
def data_split(file_path):
    df = pd.read_csv(file_path)
    X = df.iloc[:, 2:].values
    y = df.iloc[:, 1].values
    y = np.where(y == 'M', 1, 0)

    X = normalize_dataset(X)
    
    split_ratio = 0.8
    split_index = int(split_ratio * len(X))
    X_tr, y_tr = X[:split_index], y[:split_index]
    X_val, y_val = X[split_index:], y[split_index:]
    return X_tr, y_tr, X_val, y_val

In [None]:
def normalize_dataset(dataset):
    # Compute the mean and standard deviation along the axis 0 (columns)
    mean = np.mean(dataset, axis=0)
    std = np.std(dataset, axis=0)

    # Normalize the dataset by subtracting the mean and dividing by the standard deviation
    normalized_dataset = (dataset - mean) / std

    return normalized_dataset

In [None]:
X_tr, y_tr, X_val, y_val = data_split('./data/data.csv')

In [None]:
def collate(b):
    xs,ys = zip(*b)
    return torch.stack(xs),torch.stack(ys)

class DataLoader:
    """
    A custom data loader class.

    Args:
        ds (Dataset): The dataset to load.
        bs (int): Batch size.

    Example:
        >>> dataloader = DataLoader(dataset, batch_size)
    """

    def __init__(self,
                 dataset: Dataset,
                 batch_size: int = 1,
                 shuffle: bool = True,
                 sampler: Sampler = None,
                 batch_sampler: BatchSampler = None,
                 num_workers: int = 0,
                 collate_fn: callable = None,
                 drop_last: bool = False):

        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.sampler = sampler if sampler else Sampler(dataset, shuffle)
        self.batch_sampler = batch_sampler if batch_sampler else BatchSampler(self.sampler, batch_size, drop_last)
        self.num_workers = num_workers # --> TODO: implement a multiprocessing DataLoader :3
        self.collate_fn = collate
        self.drop_last = drop_last

    def __iter__(self):
        """
        Get an iterator over the DataLoader.

        Yields:
            Tuple[float, float]: A tuple containing a batch of input data and target labels.

        Example:
            >>> for batch in dataloader:
            >>>     # Process the batch
        """
        if self.num_workers:
            with mp.Pool(self.num_workers) as ex:
                yield from ex.map(self.dataset.__getitem__,  iter(self.batch_sampler))
        else:
            yield from (self.dataset[batch_idxs] for batch_idxs in self.batch_sampler)

In [None]:
# Custom Dataset class
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = mi.Tensor(X)
        self.y = mi.Tensor(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

tr_ds = MyDataset(X_tr, y_tr)
val_ds = MyDataset(X_val, y_val)

# Creating the data loader
batch_size = 2
tr_dl = DataLoader(tr_ds, batch_size=batch_size, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=64, shuffle=True)

In [None]:
DataLoader

__main__.DataLoader

In [None]:
# # Iterating over the data loader
for batch_X, batch_y in tr_dl:
    print("Batch X:", batch_X)
    print("Batch y:", batch_y)
    print(type(batch_X))
    break

Batch X: tensor([[-0.323974  0.010424 -0.304757 -0.39896  -0.037872  0.161808 -0.21909  -0.498826 -0.18153   0.37424  -0.491515 -1.180435 -0.19476  -0.430746 -0.43802
   0.39782   0.400195 -0.320686  0.845901  0.295489 -0.380474 -0.609805 -0.236657 -0.430564 -0.154227  0.458861  0.348815 -0.240892  1.136195  0.431948]
 [-0.181942  0.353503 -0.144852 -0.27041   0.376133  0.410331  0.225596  0.146274 -0.331739  0.202087 -0.692389 -1.135104 -0.653037 -0.477825 -0.557924
  -0.170387 -0.045249  0.134735 -0.818189 -0.228308 -0.150091  0.053466  0.005231 -0.243553  1.258175  1.080403  1.114435  1.703578 -0.147691  1.289654]])
Batch y: tensor([0 1])
<class 'minima.autograd.Tensor'>


In [None]:
import minima.nn as nn

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(NeuralNetwork, self).__init__()
        self.dense1 = nn.Linear(in_features=input_shape, out_features=24)
        self.dense2 = nn.Linear(24, 24)
        self.dense3 = nn.Linear(24, 24)
        self.dense4 = nn.Linear(24, output_shape)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = self.relu(self.dense1(x))
        x = self.relu(self.dense2(x))
        x = self.relu(self.dense3(x))
        # print(self.dense4(x))
        x = self.dense4(x)
        return x

# Create the neural network
input_shape = 30  # Replace with the actual input shape
output_shape = 2  # Replace with the actual output shape

network = NeuralNetwork(input_shape, output_shape)

In [None]:
network

NeuralNetwork(
  (dense1): Linear(in_features=30, out_features=24, bias=True)
  (dense2): Linear(in_features=24, out_features=24, bias=True)
  (dense3): Linear(in_features=24, out_features=24, bias=True)
  (dense4): Linear(in_features=24, out_features=2, bias=True)
  (relu): ReLU()
  (softmax): Softmax()
)

In [None]:
from minima import optim
def net():
    network = NeuralNetwork(input_shape, output_shape)
    opt = optim.SGD(network.parameters(), lr=0.01)
    bce = nn.CrossEntropyLoss()
    
    network.train()
    num_epochs = 70
    
    for epoch in range(num_epochs):
        train_losses = []
        val_losses = []
        train_accs = []
        val_accs = []
        
        # Training phase
        network.train()
        for xb, yb in tr_dl:
            preds = network(xb)
            loss = bce(preds, yb)
            # import pdb; pdb.set_trace()
            loss.backward()
            opt.step()
            opt.zero_grad()
            train_losses.append(loss.item())
            
            # Calculate accuracy
            # _, predicted_labels = torch.max(preds, dim=1)
            accuracy = (predicted_labels == yb).sum().item() / yb.size(0)
            train_accs.append(accuracy)
        
        # Validation phase
        network.eval()
        with torch.no_grad():
            for xb_val, yb_val in val_dl:
                preds_val = network(xb_val)
                val_loss = bce(preds_val, yb_val)
                val_losses.append(val_loss.item())
                
                # Calculate accuracy
                _, predicted_labels_val = torch.max(preds_val, dim=1)
                accuracy_val = (predicted_labels_val == yb_val).sum().item() / yb_val.size(0)
                val_accs.append(accuracy_val)
        
        avg_train_loss = sum(train_losses) / len(train_losses)
        avg_val_loss = sum(val_losses) / len(val_losses)
        avg_train_acc = sum(train_accs) / len(train_accs)
        avg_val_acc = sum(val_accs) / len(val_accs)
        
        # Print epoch-wise loss and accuracy
        print(f"epoch {epoch + 1:02d}/{num_epochs:02d} - loss: {avg_train_loss:.4f} - acc: {avg_train_acc:.4f} - val_loss: {avg_val_loss:.4f} - val_acc: {avg_val_acc:.4f}")


In [None]:
net()

NameError: name 'torch' is not defined

In [None]:
mi.operators.logsumexp?

In [None]:
import torch

In [None]:
t = torch.tensor([[1,3]])
t

In [None]:
t.item()

In [None]:
t.size()

In [None]:
import torch

def default_collate(batch):
    if isinstance(batch[0], torch.Tensor):
        return torch.stack(batch, dim=0)
    elif isinstance(batch[0], int):
        return torch.tensor(batch, dtype=torch.int64)
    elif isinstance(batch[0], float):
        return torch.tensor(batch, dtype=torch.float32)
    elif isinstance(batch[0], str):
        return batch
    else:
        raise TypeError("Unsupported data type encountered in default_collate")

# Usage example:
data = [torch.tensor([1, 2, 3]), torch.tensor([4, 5, 6])]
collated_data = default_collate(data)
print(collated_data)
