In [27]:
# load a dataset from internet
import pandas as pd
import numpy as np
import os
import urllib.request
import zipfile

url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip'
filename = 'Bike-Sharing-Dataset.zip'
urllib.request.urlretrieve(url, filename)
with zipfile.ZipFile(filename, 'r') as zip_ref:
    zip_ref.extractall('.')
os.remove(filename)

data = pd.read_csv('day.csv').drop(columns=['instant', 'dteday'])
print(data.shape)
data.head()
data.to_csv('night.csv', index=False)

(731, 14)


Unnamed: 0,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,1,0,1,0,2,1,1,0.2,0.212122,0.590435,0.160296,108,1454,1562
4,1,0,1,0,3,1,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600


In [48]:
data.to_csv('night.csv', index=False)

In [46]:

# use IterableDataset to load the data
import torch
from torch.utils.data import Dataset 
import pandas as pd
from torch.utils.data import IterableDataset, Dataset

# data = pd.read_csv('day.csv')
# print(data.shape)
class BikeSharingDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file).drop(columns=['instant', 'dteday'])
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        return torch.tensor(row.to_numpy(), dtype=torch.float32)      


    def __iter__(self):
        for line in  self.data :
            print("line:",line)
            yield line

# Load the dataset
# Load the dataset
dataset = BikeSharingDataset('day.csv')
print(f"Dataset length: {len(dataset)}")
print(f"Example data point: {dataset[0]}")  
# print data in each batch
from torch.utils.data import DataLoader
train_ld = DataLoader(dataset, batch_size=32, shuffle=False)
for idx, batch_data in enumerate(train_ld):
    print(f"Batch {idx}:", batch_data.shape)


Dataset length: 731
Example data point: tensor([1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 6.0000e+00, 0.0000e+00,
        2.0000e+00, 3.4417e-01, 3.6362e-01, 8.0583e-01, 1.6045e-01, 3.3100e+02,
        6.5400e+02, 9.8500e+02])
Batch 0: torch.Size([32, 14])
Batch 1: torch.Size([32, 14])
Batch 2: torch.Size([32, 14])
Batch 3: torch.Size([32, 14])
Batch 4: torch.Size([32, 14])
Batch 5: torch.Size([32, 14])
Batch 6: torch.Size([32, 14])
Batch 7: torch.Size([32, 14])
Batch 8: torch.Size([32, 14])
Batch 9: torch.Size([32, 14])
Batch 10: torch.Size([32, 14])
Batch 11: torch.Size([32, 14])
Batch 12: torch.Size([32, 14])
Batch 13: torch.Size([32, 14])
Batch 14: torch.Size([32, 14])
Batch 15: torch.Size([32, 14])
Batch 16: torch.Size([32, 14])
Batch 17: torch.Size([32, 14])
Batch 18: torch.Size([32, 14])
Batch 19: torch.Size([32, 14])
Batch 20: torch.Size([32, 14])
Batch 21: torch.Size([32, 14])
Batch 22: torch.Size([27, 14])


In [50]:
import torch
from torch.utils.data import IterableDataset
import csv

class BikeSharingIterableDataset(IterableDataset):
    def __init__(self, csv_file):
        self.csv_file = csv_file

    def parse_line(self, line):
        # 将读取的数据行转换为张量，这里假设所有数据均为浮点数
        return torch.tensor([float(x) for x in line], dtype=torch.float32)

    def __iter__(self):
        # 打开文件，逐行读取数据
        file = open(self.csv_file, 'r')
        reader = csv.reader(file)
        next(reader)  # 跳过标题行
        return map(self.parse_line, reader)

# 使用示例
dataset = BikeSharingIterableDataset('night.csv')
loader = torch.utils.data.DataLoader(dataset, batch_size=32)

for batch in loader:
    print(batch.shape)


torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([32, 14])
torch.Size([27, 14])


In [1]:
import torch
from torch.utils.data import IterableDataset, DataLoader
import csv

class BikeSharingIterableDataset(IterableDataset):
    def __init__(self, csv_file, start_line=None, end_line=None):
        self.csv_file = csv_file
        self.start_line = start_line
        self.end_line = end_line

    def parse_line(self, line):
        return torch.tensor([float(x) for x in line], dtype=torch.float32)

    def __iter__(self):
        worker_info = torch.utils.data.get_worker_info()
        if worker_info is None:
            start = self.start_line
            end = self.end_line
        else:
            # Split workload
            per_worker = int((self.end_line - self.start_line) / worker_info.num_workers)
            worker_id = worker_info.id
            start = self.start_line + worker_id * per_worker
            end = start + per_worker if worker_id != worker_info.num_workers - 1 else self.end_line

        # Read the file and yield batches
        file = open(self.csv_file, 'r')
        reader = csv.reader(file)
        current_line = 0
        for line in reader:
            if current_line >= start and current_line < end:
                yield self.parse_line(line)
            current_line += 1
            if current_line >= end:
                break
        file.close()

# Create the dataset
dataset = BikeSharingIterableDataset('night.csv', start_line=0, end_line=731)

# DataLoader with multiple workers
loader = DataLoader(dataset, batch_size=32, num_workers=4, shuffle=False)

for batch in loader:
    print(batch)
    break  # Display only the first batch to limit output


RuntimeError: DataLoader worker (pid(s) 4800, 8920, 17744, 13452) exited unexpectedly

In [53]:
import torch
import torch.nn as nn
import torch.optim as optim

class SimpleNet(nn.Module):
    def __init__(self, num_features):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(num_features, 50)  # 从 num_features 到 50
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(50, 1)  # 从 50 到 1

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# 实例化模型
num_features = 13  # 假设我们有15个特征
model = SimpleNet(num_features)


In [58]:
# 模型、优化器和损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# 训练过程
def train_model(model, data_loader, criterion, optimizer, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        for batch_idx, data in enumerate(data_loader):
            # 假设最后一列是目标值
            inputs = data[:, :-1]
            targets = data[:, -1]

            # 前向传播
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), targets)

            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % 10 == 0:
                print(f"Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}], Loss: {loss.item():.4f}")

# 数据加载器
dataset = BikeSharingIterableDataset('night.csv')
loader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=False)

# 开始训练
train_model(model, loader, criterion, optimizer, num_epochs=50)


Epoch [1/50], Step [1], Loss: 2.7902
Epoch [1/50], Step [11], Loss: 165.5290
Epoch [2/50], Step [1], Loss: 8.9038
Epoch [2/50], Step [11], Loss: 5.1525
Epoch [3/50], Step [1], Loss: 0.3842
Epoch [3/50], Step [11], Loss: 4.4250
Epoch [4/50], Step [1], Loss: 2.8230
Epoch [4/50], Step [11], Loss: 3.8881
Epoch [5/50], Step [1], Loss: 0.3350
Epoch [5/50], Step [11], Loss: 3.3226
Epoch [6/50], Step [1], Loss: 9.7008
Epoch [6/50], Step [11], Loss: 1.3690
Epoch [7/50], Step [1], Loss: 4.4954
Epoch [7/50], Step [11], Loss: 0.7048
Epoch [8/50], Step [1], Loss: 21.3020
Epoch [8/50], Step [11], Loss: 1.5055
Epoch [9/50], Step [1], Loss: 9.5335
Epoch [9/50], Step [11], Loss: 45.8951
Epoch [10/50], Step [1], Loss: 32.1552
Epoch [10/50], Step [11], Loss: 327.5278
Epoch [11/50], Step [1], Loss: 5.1768
Epoch [11/50], Step [11], Loss: 547.6213
Epoch [12/50], Step [1], Loss: 0.4834
Epoch [12/50], Step [11], Loss: 63.5702
Epoch [13/50], Step [1], Loss: 11.2810
Epoch [13/50], Step [11], Loss: 13.8012
Epoch