In [755]:
import torch.utils.data as data
from sklearn.preprocessing import StandardScaler
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import matplotlib.pyplot as plt

In [756]:
CHEAP = 100000
EXPENSIVE = 350000
SEED = 200
LEARN_RATE = 0.001
NUM_EPOCHS = 10
torch.manual_seed(SEED)

<torch._C.Generator at 0x121d57650>

In [757]:
df = pd.read_csv("train_data.csv", sep=',')

categorical_fields = [
    'YearBuilt', 'Floor', 'HallwayType', 'HeatingType',
    'AptManageType', 'TimeToBusStop', 'TimeToSubway', 'SubwayStation',
]

for field in categorical_fields:
    dm = pd.get_dummies(df[field], prefix=field)
    df = pd.concat([df, dm], axis=1)
df.drop(categorical_fields, axis=1, inplace=True)
df['SalePrice'].mask(df['SalePrice'] <= CHEAP, 0, inplace=True)
df['SalePrice'].mask((df['SalePrice'] > CHEAP) & (df['SalePrice'] <= EXPENSIVE), 1, inplace=True)
df['SalePrice'].mask(df['SalePrice'] > EXPENSIVE, 2, inplace=True)


In [758]:
train = df.sample(frac=0.8,random_state=SEED)
test = df.drop(train.index)

In [759]:
print(f"cheap count: {train[train['SalePrice'] == 0]['SalePrice'].count()}")
print(f"average count: {train[train['SalePrice'] == 1]['SalePrice'].count()}")
print(f"expensive count: {train[train['SalePrice'] == 2]['SalePrice'].count()}")

cheap count: 451
average count: 2417
expensive count: 431


Usuniecie niektorych danych zeby nie bylo tak niezbalansowane:

In [760]:
train.drop(train[train['SalePrice'] == 1].sample(frac=.8, random_state=SEED).index, inplace=True)

In [761]:
print(f"cheap count: {train[train['SalePrice'] == 0]['SalePrice'].count()}")
print(f"average count: {train[train['SalePrice'] == 1]['SalePrice'].count()}")
print(f"expensive count: {train[train['SalePrice'] == 2]['SalePrice'].count()}")

cheap count: 451
average count: 483
expensive count: 431


In [762]:
x_train = train.drop('SalePrice', axis=1).to_numpy().astype(np.float32)
y_train = train['SalePrice'].to_numpy().astype(np.float32)

x_test = test.drop('SalePrice', axis=1).to_numpy().astype(np.float32)
y_test = test['SalePrice'].to_numpy().astype(np.float32)

In [763]:
train_dataset = data.TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train))
test_dataset = data.TensorDataset(torch.from_numpy(x_test), torch.from_numpy(y_test))

In [764]:
class Model(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        self.linear1 = nn.Linear(num_inputs, 512)
        self.linear2 = nn.Linear(512, 256)
        self.linear3 = nn.Linear(256, num_outputs)
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.linear1(x)
        x = self.tanh(x)
        x = self.linear2(x)
        x = self.tanh(x)
        x = self.linear3(x)
        return x

In [765]:
model = Model(90, 3)
model.to("cpu")
optimizer = torch.optim.Adam(model.parameters(), lr=LEARN_RATE)
loss_fn = nn.CrossEntropyLoss()
train_data_loader = data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_data_loader = data.DataLoader(test_dataset, batch_size=32, shuffle=True)

In [766]:
model.train()

for epoch in range(NUM_EPOCHS):
    for data_inputs, data_labels in train_data_loader:
        predictions = model(data_inputs).squeeze(dim=1)
        loss = loss_fn(predictions, data_labels.long())

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
    print(f"Epoch: {epoch}, loss: {loss.item():.3}")

Epoch: 0, loss: 0.974
Epoch: 1, loss: 0.399
Epoch: 2, loss: 0.32
Epoch: 3, loss: 0.27
Epoch: 4, loss: 0.294
Epoch: 5, loss: 0.298
Epoch: 6, loss: 0.478
Epoch: 7, loss: 0.307
Epoch: 8, loss: 0.367
Epoch: 9, loss: 0.402


In [767]:
model.eval()

true_predictions, num_predictions = 0., 0.

with torch.no_grad():
    for data_inputs, data_labels in test_data_loader:

        predictions = model(data_inputs)
        prediction_labels = torch.max(predictions.data, 1).indices
        true_predictions += (prediction_labels == data_labels).sum()
        num_predictions += data_labels.shape[0]
acc = true_predictions / num_predictions
print(f"Accuracy of the model: {100.0*acc:4.2f}%")

Accuracy of the model: 82.91%
