In [347]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as npy
from sklearn.model_selection import train_test_split

df=pd.read_csv("iris.csv", header=None)

In [348]:
features = df.iloc[:,0:4]
target = df.iloc[:,4]

In [349]:
from sklearn.preprocessing import LabelEncoder

le=LabelEncoder()
target = pd.Series(le.fit_transform(target))

In [350]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, stratify=target)

In [351]:
y_train.value_counts()

1    40
2    40
0    40
Name: count, dtype: int64

In [352]:
# 데이터셋 클래스 생성
class IrisDataset(torch.utils.data.Dataset):
    def __init__(self, features, target):
        self.features = features
        self.target = target
        
    def __len__(self):
        return len(self.target)
    
    def __getitem__(self, idx):
        return torch.tensor(self.features.iloc[idx].values, dtype=torch.float32), torch.tensor(self.target.iloc[idx], dtype=torch.long)

In [353]:
combined_train = pd.concat([X_train, y_train], axis=1)


In [354]:
combined_train.sort_index(inplace=True)
X_train=combined_train.iloc[:,0:4]
y_train=combined_train.iloc[:,4]

combined_test = pd.concat([X_test, y_test], axis=1)

In [355]:
combined_test.sort_index(inplace=True)
X_test=combined_test.iloc[:,0:4]
y_test=combined_test.iloc[:,4]

trainData = IrisDataset(X_train, y_train)
testData = IrisDataset(X_test, y_test)

In [356]:
y_test

5      0
12     0
14     0
18     0
20     0
22     0
27     0
31     0
34     0
48     0
50     1
54     1
60     1
62     1
71     1
72     1
73     1
77     1
86     1
87     1
100    2
108    2
109    2
111    2
112    2
120    2
122    2
130    2
135    2
145    2
Name: 0, dtype: int32

In [357]:
# 데이터 로더 생성
trainLoader = torch.utils.data.DataLoader(trainData, batch_size=10, shuffle=True)
testLoader = torch.utils.data.DataLoader(testData, batch_size=10, shuffle=False)

In [358]:
def print_batch_data(loader, epochs, batch_size=1, shuffle=False, drop_last=False, sampler=None):
    print(f'[설정값] batch size : {batch_size}, shuffle : {shuffle}, drop_last : {drop_last}, sampler : {sampler}')
    
    for ep in range(epochs):
        print(f'[{ep} EPOCHS]=====batch : {len(loader)}개')
        for feature, label in loader:
            print(feature.shape,label.shape, label.bincount(), label)

In [359]:
print_batch_data(loader=testLoader, epochs=2)

[설정값] batch size : 1, shuffle : False, drop_last : False, sampler : None
[0 EPOCHS]=====batch : 3개
torch.Size([10, 4]) torch.Size([10]) tensor([10]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([10, 4]) torch.Size([10]) tensor([ 0, 10]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([10, 4]) torch.Size([10]) tensor([ 0,  0, 10]) tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
[1 EPOCHS]=====batch : 3개
torch.Size([10, 4]) torch.Size([10]) tensor([10]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
torch.Size([10, 4]) torch.Size([10]) tensor([ 0, 10]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
torch.Size([10, 4]) torch.Size([10]) tensor([ 0,  0, 10]) tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])


In [360]:
for i in trainData:
    print(i)

(tensor([5.1000, 3.5000, 1.4000, 0.2000]), tensor(0))
(tensor([4.9000, 3.0000, 1.4000, 0.2000]), tensor(0))
(tensor([4.7000, 3.2000, 1.3000, 0.2000]), tensor(0))
(tensor([4.6000, 3.1000, 1.5000, 0.2000]), tensor(0))
(tensor([5.0000, 3.6000, 1.4000, 0.2000]), tensor(0))
(tensor([4.6000, 3.4000, 1.4000, 0.3000]), tensor(0))
(tensor([5.0000, 3.4000, 1.5000, 0.2000]), tensor(0))
(tensor([4.4000, 2.9000, 1.4000, 0.2000]), tensor(0))
(tensor([4.9000, 3.1000, 1.5000, 0.1000]), tensor(0))
(tensor([5.4000, 3.7000, 1.5000, 0.2000]), tensor(0))
(tensor([4.8000, 3.4000, 1.6000, 0.2000]), tensor(0))
(tensor([4.3000, 3.0000, 1.1000, 0.1000]), tensor(0))
(tensor([5.7000, 4.4000, 1.5000, 0.4000]), tensor(0))
(tensor([5.4000, 3.9000, 1.3000, 0.4000]), tensor(0))
(tensor([5.1000, 3.5000, 1.4000, 0.3000]), tensor(0))
(tensor([5.1000, 3.8000, 1.5000, 0.3000]), tensor(0))
(tensor([5.1000, 3.7000, 1.5000, 0.4000]), tensor(0))
(tensor([5.1000, 3.3000, 1.7000, 0.5000]), tensor(0))
(tensor([4.8000, 3.4000, 1.9

In [447]:
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler 
import numpy as np 


num_data = 40
min_weight = 0.001
max_weight = 10000  

log_weights = np.linspace(np.log(min_weight), np.log(max_weight), num_data)
weights = np.exp(log_weights)


sampler = WeightedRandomSampler(torch.DoubleTensor(list(weights)*3), num_data*3, replacement=False)
trainLoader = DataLoader(trainData, batch_size=10, sampler=sampler)


print_batch_data(loader=trainLoader, epochs=2)

[설정값] batch size : 1, shuffle : False, drop_last : False, sampler : None
[0 EPOCHS]=====batch : 12개
torch.Size([10, 4]) torch.Size([10]) tensor([3, 4, 3]) tensor([0, 0, 0, 2, 1, 1, 2, 1, 1, 2])
torch.Size([10, 4]) torch.Size([10]) tensor([2, 4, 4]) tensor([1, 0, 1, 2, 0, 2, 2, 1, 1, 2])
torch.Size([10, 4]) torch.Size([10]) tensor([4, 2, 4]) tensor([2, 0, 2, 0, 2, 0, 2, 1, 0, 1])
torch.Size([10, 4]) torch.Size([10]) tensor([3, 3, 4]) tensor([0, 1, 2, 0, 2, 2, 2, 0, 1, 1])
torch.Size([10, 4]) torch.Size([10]) tensor([5, 2, 3]) tensor([0, 0, 0, 0, 2, 2, 0, 1, 2, 1])
torch.Size([10, 4]) torch.Size([10]) tensor([3, 5, 2]) tensor([1, 2, 1, 0, 0, 1, 1, 2, 0, 1])
torch.Size([10, 4]) torch.Size([10]) tensor([5, 3, 2]) tensor([1, 0, 0, 0, 0, 2, 2, 1, 0, 1])
torch.Size([10, 4]) torch.Size([10]) tensor([2, 5, 3]) tensor([0, 2, 1, 2, 1, 1, 1, 1, 0, 2])
torch.Size([10, 4]) torch.Size([10]) tensor([2, 4, 4]) tensor([2, 1, 1, 2, 2, 1, 0, 2, 1, 0])
torch.Size([10, 4]) torch.Size([10]) tensor([5, 1, 4])

In [410]:
len(list(weights)*3)

120

In [None]:
import torch
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler

# 가상의 데이터셋 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index], self.targets[index]

# 가상의 데이터셋 생성
data = [...]  # 데이터
targets = [...]  # 라벨

# 클래스별 샘플 수 계산
class_counts = torch.bincount(torch.tensor(targets))

# 클래스별 가중치 계산
class_weights = 1. / class_counts.float()

# 샘플링 가중치 생성
example_weights = class_weights[targets]

# WeightedRandomSampler를 사용하여 데이터로더 생성
sampler = WeightedRandomSampler(weights=example_weights, num_samples=len(example_weights))

# 데이터로더 설정
batch_size = 32
dataset = CustomDataset(data, targets)
dataloader = DataLoader(dataset, batch_size=batch_size, sampler=sampler)

# 모델 훈련
for inputs, targets in dataloader:
    # 모델 학습 코드
    pass
