In [44]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import torch.nn as nn
import torch.nn.functional as F

print(f"PyTorch version: {torch.__version__}")

# Check PyTorch has access to MPS (Metal Performance Shader, Apple's GPU architecture)
print(f"Is MPS (Metal Performance Shader) built? {torch.backends.mps.is_built()}")
print(f"Is MPS available? {torch.backends.mps.is_available()}")

# Set the device      
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

PyTorch version: 2.1.2
Is MPS (Metal Performance Shader) built? True
Is MPS available? True
Using device: mps


# 读取&处理数据集

In [45]:
df = pd.read_csv("data/Housing.csv")

In [46]:
label_encoders = {}
for column in ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

In [47]:
scaler = StandardScaler()
df[['area', 'bedrooms', 'bathrooms', 'stories', 'parking']] = scaler.fit_transform(df[['area', 'bedrooms', 'bathrooms', 'stories', 'parking']])

print(df)

        price      area  bedrooms  bathrooms   stories  mainroad  guestroom  \
0    13300000  1.046726  1.403419   1.421812  1.378217         1          0   
1    12250000  1.757010  1.403419   5.405809  2.532024         1          0   
2    12250000  2.218232  0.047278   1.421812  0.224410         1          0   
3    12215000  1.083624  1.403419   1.421812  0.224410         1          0   
4    11410000  1.046726  1.403419  -0.570187  0.224410         1          1   
..        ...       ...       ...        ...       ...       ...        ...   
540   1820000 -0.991879 -1.308863  -0.570187 -0.929397         1          0   
541   1767150 -1.268613  0.047278  -0.570187 -0.929397         0          0   
542   1750000 -0.705921 -1.308863  -0.570187 -0.929397         1          0   
543   1750000 -1.033389  0.047278  -0.570187 -0.929397         0          0   
544   1750000 -0.599839  0.047278  -0.570187  0.224410         1          0   

     basement  hotwaterheating  airconditioning   p

In [48]:
X = df.drop('price', axis=1)
y = df['price']

print(f"X: \n {X.head()}")
print("________________________")
print(f"y: \n {y.head()}")

X: 
        area  bedrooms  bathrooms   stories  mainroad  guestroom  basement  \
0  1.046726  1.403419   1.421812  1.378217         1          0         0   
1  1.757010  1.403419   5.405809  2.532024         1          0         0   
2  2.218232  0.047278   1.421812  0.224410         1          0         1   
3  1.083624  1.403419   1.421812  0.224410         1          0         1   
4  1.046726  1.403419  -0.570187  0.224410         1          1         1   

   hotwaterheating  airconditioning   parking  prefarea  furnishingstatus  
0                0                1  1.517692         1                 0  
1                0                1  2.679409         0                 0  
2                0                0  1.517692         1                 1  
3                0                1  2.679409         1                 0  
4                0                1  1.517692         0                 0  
________________________
y: 
 0    13300000
1    12250000
2    12250000
3   

In [49]:
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2)

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32, device=device)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32, device=device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32, device=device)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32, device=device)

In [50]:
train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)

In [51]:
batch_size = 32
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size)