In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

In [None]:
class TabularDataset(Dataset):
    def __init__(self, csv_file, y_column):
        """
        Args:
            csv_file (string): Path to the csv file with data.
            y_column (string): Name of the column to be used as the target variable.
        """
        self.data_frame = pd.read_csv(csv_file)
        self.y_column = y_column

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Split data into features and target
        x = self.data_frame.drop(self.y_column, axis=1).iloc[idx].values
        y = self.data_frame[self.y_column].iloc[idx]

        # If idx is a list or slice, y will be DataFrames and we can use .values
        # If idx is a single value,y will be scalars, and we should not use .values
        if isinstance(idx, int):
            # Converty to 1D arrays with a single value each
            y = np.array([y])
        else:
            # Convert DataFrame to numpy array
            y = y.values

        # Convert to tensor
        x = torch.tensor(x, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)

        return x, y


In [None]:
class TabularDatasetPool(Dataset):
    def __init__(self, csv_file, y_column):
        """
        Args:
            csv_file (string): Path to the csv file with data.
            y_column (string): Name of the column to be used as the target variable.
        """
        self.data_frame = pd.read_csv(csv_file)
        self.y_column = y_column

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Split data into features and target
        x = self.data_frame.drop(self.y_column, axis=1).iloc[idx].values
        y = self.data_frame[self.y_column].iloc[idx]

        # If idx is a list or slice, y will be DataFrames and we can use .values
        # If idx is a single value, y will be scalars, and we should not use .values
        if isinstance(idx, int):
            # y to 1D arrays with a single value each
            y = np.array([y])
        else:
            # Convert DataFrame to numpy array
            y = y.values

        # Convert to tensor
        x = torch.tensor(x, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)

        return idx, x, y


In [None]:
class SquareDataset(Dataset):
    def __init__(self, size):
        self.size = size

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        return idx, idx ** 2

In [None]:
dataset = SquareDataset(10)

In [None]:
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [None]:
for i in range(10):
  for indices, data in dataloader:
      print(f"Indices: {indices} \t Squares: {data}")

Indices: tensor([0, 3, 6, 2]) 	 Squares: tensor([ 0,  9, 36,  4])
Indices: tensor([4, 7, 9, 8]) 	 Squares: tensor([16, 49, 81, 64])
Indices: tensor([5, 1]) 	 Squares: tensor([25,  1])
Indices: tensor([7, 8, 0, 3]) 	 Squares: tensor([49, 64,  0,  9])
Indices: tensor([2, 9, 4, 6]) 	 Squares: tensor([ 4, 81, 16, 36])
Indices: tensor([5, 1]) 	 Squares: tensor([25,  1])
Indices: tensor([0, 4, 1, 8]) 	 Squares: tensor([ 0, 16,  1, 64])
Indices: tensor([3, 6, 9, 5]) 	 Squares: tensor([ 9, 36, 81, 25])
Indices: tensor([2, 7]) 	 Squares: tensor([ 4, 49])
Indices: tensor([0, 7, 8, 3]) 	 Squares: tensor([ 0, 49, 64,  9])
Indices: tensor([1, 2, 5, 9]) 	 Squares: tensor([ 1,  4, 25, 81])
Indices: tensor([4, 6]) 	 Squares: tensor([16, 36])
Indices: tensor([4, 9, 1, 6]) 	 Squares: tensor([16, 81,  1, 36])
Indices: tensor([2, 3, 5, 7]) 	 Squares: tensor([ 4,  9, 25, 49])
Indices: tensor([0, 8]) 	 Squares: tensor([ 0, 64])
Indices: tensor([6, 1, 7, 0]) 	 Squares: tensor([36,  1, 49,  0])
Indices: tenso

In [None]:
for i in range(10):
  data_iterator = iter(dataloader)
  try:
      while True:
          indices, data = next(data_iterator)
          print(f"Indices: {indices} \t Squares: {data}")
  except StopIteration:
      # Iterator is exhausted
      print("All batches have been processed.")

Indices: tensor([6, 5, 4, 3]) 	 Squares: tensor([36, 25, 16,  9])
Indices: tensor([9, 0, 7, 8]) 	 Squares: tensor([81,  0, 49, 64])
Indices: tensor([1, 2]) 	 Squares: tensor([1, 4])
All batches have been processed.
Indices: tensor([5, 0, 6, 4]) 	 Squares: tensor([25,  0, 36, 16])
Indices: tensor([9, 2, 8, 1]) 	 Squares: tensor([81,  4, 64,  1])
Indices: tensor([7, 3]) 	 Squares: tensor([49,  9])
All batches have been processed.
Indices: tensor([9, 6, 1, 5]) 	 Squares: tensor([81, 36,  1, 25])
Indices: tensor([3, 0, 8, 7]) 	 Squares: tensor([ 9,  0, 64, 49])
Indices: tensor([2, 4]) 	 Squares: tensor([ 4, 16])
All batches have been processed.
Indices: tensor([8, 3, 7, 5]) 	 Squares: tensor([64,  9, 49, 25])
Indices: tensor([4, 6, 2, 9]) 	 Squares: tensor([16, 36,  4, 81])
Indices: tensor([0, 1]) 	 Squares: tensor([0, 1])
All batches have been processed.
Indices: tensor([3, 9, 5, 4]) 	 Squares: tensor([ 9, 81, 25, 16])
Indices: tensor([7, 0, 6, 1]) 	 Squares: tensor([49,  0, 36,  1])
Indi

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np

# Generating random data for the CSV file
np.random.seed(0)  # For reproducibility
data_size = 10 # Number of rows

# Generating random features (3 features) and target values
features = np.random.rand(data_size, 1)  # Random values between 0 and 1
target = np.random.randint(0, 2, data_size)  # Random binary target

# Creating a DataFrame
df = pd.DataFrame(features, columns=['feature1'])
df['target'] = target

# Saving the DataFrame to a CSV file
csv_file_path = '/content/drive/MyDrive/example_data.csv'
df.to_csv(csv_file_path, index=False)

In [None]:
dataset = TabularDatasetPool(csv_file='/content/drive/MyDrive/example_data.csv', y_column='target')

# Create a DataLoader
dataloader = DataLoader(dataset, batch_size=3, shuffle=True)

In [None]:
for i in range(10):
  # Iterate over the DataLoader
  for idx, features, target in dataloader:
      print(f"Index: {idx}, Features: {features}, Target: {target}")

Index: tensor([3, 9, 0]), Features: tensor([[0.5449],
        [0.3834],
        [0.5488]]), Target: tensor([[0.],
        [0.],
        [0.]])
Index: tensor([1, 6, 2]), Features: tensor([[0.7152],
        [0.4376],
        [0.6028]]), Target: tensor([[1.],
        [1.],
        [1.]])
Index: tensor([7, 8, 4]), Features: tensor([[0.8918],
        [0.9637],
        [0.4237]]), Target: tensor([[1.],
        [1.],
        [0.]])
Index: tensor([5]), Features: tensor([[0.6459]]), Target: tensor([[1.]])
Index: tensor([5, 8, 9]), Features: tensor([[0.6459],
        [0.9637],
        [0.3834]]), Target: tensor([[1.],
        [1.],
        [0.]])
Index: tensor([0, 7, 4]), Features: tensor([[0.5488],
        [0.8918],
        [0.4237]]), Target: tensor([[0.],
        [1.],
        [0.]])
Index: tensor([1, 6, 2]), Features: tensor([[0.7152],
        [0.4376],
        [0.6028]]), Target: tensor([[1.],
        [1.],
        [1.]])
Index: tensor([3]), Features: tensor([[0.5449]]), Target: tensor([[0.

In [None]:
for i in range(10):
  data_iterator = iter(dataloader)
  try:
      while True:
          indices, x, y = next(data_iterator)
          print(f"Indices: {indices} \t Squares: {x} \t Squares: {y}")
          print(type(indices))
          print(indices.shape)
          print(x.shape)
  except StopIteration:
      # Iterator is exhausted
      print("All batches have been processed.")

Indices: tensor([4, 1, 6]) 	 Squares: tensor([[0.4237],
        [0.7152],
        [0.4376]]) 	 Squares: tensor([[0.],
        [1.],
        [1.]])
<class 'torch.Tensor'>
torch.Size([3])
torch.Size([3, 1])
Indices: tensor([2, 9, 8]) 	 Squares: tensor([[0.6028],
        [0.3834],
        [0.9637]]) 	 Squares: tensor([[1.],
        [0.],
        [1.]])
<class 'torch.Tensor'>
torch.Size([3])
torch.Size([3, 1])
Indices: tensor([7, 5, 0]) 	 Squares: tensor([[0.8918],
        [0.6459],
        [0.5488]]) 	 Squares: tensor([[1.],
        [1.],
        [0.]])
<class 'torch.Tensor'>
torch.Size([3])
torch.Size([3, 1])
Indices: tensor([3]) 	 Squares: tensor([[0.5449]]) 	 Squares: tensor([[0.]])
<class 'torch.Tensor'>
torch.Size([1])
torch.Size([1, 1])
All batches have been processed.
Indices: tensor([0, 7, 9]) 	 Squares: tensor([[0.5488],
        [0.8918],
        [0.3834]]) 	 Squares: tensor([[0.],
        [1.],
        [0.]])
<class 'torch.Tensor'>
torch.Size([3])
torch.Size([3, 1])
Indices: ten

In [None]:

dataset = TabularDatasetPool(csv_file='/content/drive/MyDrive/example_data.csv', y_column='target')

# Create a DataLoader
dataloader = DataLoader(dataset, batch_size=3, shuffle=True)

In [None]:
data_frame = pd.read_csv('/content/drive/MyDrive/example_data.csv')
y_column = 'target'


In [None]:
print(data_frame)

   feature1  target
0  0.548814       0
1  0.715189       1
2  0.602763       1
3  0.544883       0
4  0.423655       0
5  0.645894       1
6  0.437587       1
7  0.891773       1
8  0.963663       1
9  0.383442       0


In [None]:
def ok(idx):
        data_frame = pd.read_csv('/content/drive/MyDrive/example_data.csv')
        y_column = 'target'
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Split data into features and target
        x = data_frame.drop(y_column, axis=1).iloc[idx].values
        print(x)
        y = data_frame[y_column].iloc[idx]
        print(y)

        z=data_frame[y_column][idx]
        print(z)

        # If idx is a list or slice, y will be DataFrames and we can use .values
        # If idx is a single value, y will be scalars, and we should not use .values
        if isinstance(idx, int):
            # y to 1D arrays with a single value each
            y = np.array([y])
        else:
            # Convert DataFrame to numpy array
            y = y.values

        # Convert to tensor
        x = torch.tensor(x, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)

        return idx, x, y

In [None]:
ok(1)    #same if we put torch.tensor(1)

[0.71518937]
1
1


(1, tensor([0.7152]), tensor([1.]))

In [None]:
ok([1])    #same if we put torch.tensor([1])

[[0.71518937]]
1    1
Name: target, dtype: int64
1    1
Name: target, dtype: int64


([1], tensor([[0.7152]]), tensor([1.]))

In [None]:
ok([1,2])    #same if we put torch.tensor([1,2])

[[0.71518937]
 [0.60276338]]
1    1
2    1
Name: target, dtype: int64
1    1
2    1
Name: target, dtype: int64


([1, 2],
 tensor([[0.7152],
         [0.6028]]),
 tensor([1., 1.]))

In [None]:
a=torch.tensor([1])

In [None]:
b=torch.tensor(1)

In [None]:
d=b.tolist()

In [None]:
d

1

In [None]:
c=torch.tensor([1,2])

In [None]:
ok(a)

[[0.71518937]]
1    1
Name: target, dtype: int64
1    1
Name: target, dtype: int64


([1], tensor([[0.7152]]), tensor([1.]))

In [None]:
ok(b)

[0.71518937]
1
1


(1, tensor([0.7152]), tensor([1.]))

In [None]:
l,m,n = ok(c)

[[0.71518937]
 [0.60276338]]
1    1
2    1
Name: target, dtype: int64
1    1
2    1
Name: target, dtype: int64


In [None]:
l

[1, 2]

In [None]:
type(l)

list

In [None]:
if torch.is_tensor(idx):
    idx = idx.tolist()

# Split data into features and target
x = data_frame.drop(y_column, axis=1).iloc[idx].values
y = data_frame[self.y_column].iloc[idx]

# If idx is a list or slice, x and y will be DataFrames and we can use .values
# If idx is a single value, x and y will be scalars, and we should not use .values
if isinstance(idx, int):
    # y to 1D arrays with a single value each
    y = np.array([y])
else:
    # Convert DataFrame to numpy array
    y = y.values

# Convert to tensor
x = torch.tensor(x, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

return idx, x, y
