In [25]:
import joblib
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import torch
from torch import nn
from torch.optim import Adam

In [26]:
df = pd.read_csv('housing.csv')

In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           20640 non-null  float64
 1   latitude            20640 non-null  float64
 2   housing_median_age  20640 non-null  float64
 3   total_rooms         20640 non-null  float64
 4   total_bedrooms      20433 non-null  float64
 5   population          20640 non-null  float64
 6   households          20640 non-null  float64
 7   median_income       20640 non-null  float64
 8   median_house_value  20640 non-null  float64
 9   ocean_proximity     20640 non-null  object 
dtypes: float64(9), object(1)
memory usage: 1.6+ MB


In [28]:
df['total_bedrooms'] = df['total_bedrooms'].fillna(df['total_bedrooms'].mean())

In [29]:
answer = 'median_house_value'
x = df.drop(answer, axis=1)
y = df[answer]

In [30]:
sc = StandardScaler()
oe = OneHotEncoder()

In [31]:
numeric = df.select_dtypes(include='float64').columns
categorical = df.select_dtypes(include='object').columns
# print(numeric)
# print(categorical)

In [32]:
num_transformer = Pipeline(
    steps=[
        ('imputer', SimpleImputer(strategy='median')),   # bosh joylar to'ldirilmoqda
        ('num', sc)
    ]
)

cat_transformer = Pipeline(
    steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),  # bosh joylar to'ldirilmoqda
        ('cut', oe)
    ]
)

In [33]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', num_transformer, numeric.drop('median_house_value')), # num_transformer bilan cat_transformer chaqirib oldik
        ('cat', cat_transformer, categorical),
    ]
)

In [34]:
x_processed = preprocessor.fit_transform(x) # preprocessor chaqirib oldik
y_processed = np.log1p(y) # ---> ln yordamida --> katta sonni oralig'ini kichikroq oraliqga olib kelish

In [35]:
x_train, x_test, y_train, y_test, = train_test_split(x_processed,y_processed, test_size=0.25, random_state=42)

In [36]:
x_train_tensor = torch.FloatTensor(x_train) # Torch tushunadigan formatga otkazish
x_test_tensor = torch.FloatTensor(x_test) # Torch tushunadigan formatga otkazish
y_train_tensor = torch.FloatTensor(y_train.values).reshape(-1,1) # matritsa korinishiga olib kelindi
y_test_tensor = torch.FloatTensor(y_test.values).reshape(-1,1) # matritsa korinishiga olib kelindi

In [37]:
print(f"{x_train_tensor}")
print(f"{x_test_tensor}")
print(f"{y_train_tensor}")
print(f"{y_test_tensor}")

tensor([[ 0.7236, -0.8530,  0.6643,  ...,  0.0000,  0.0000,  0.0000],
        [-1.2031,  0.7155,  1.0616,  ...,  0.0000,  0.0000,  0.0000],
        [-1.5574,  1.3194,  0.5849,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.5938, -0.7500,  0.5849,  ...,  0.0000,  0.0000,  0.0000],
        [-1.1931,  0.9121, -1.0838,  ...,  0.0000,  0.0000,  0.0000],
        [-1.4227,  1.0010,  1.8562,  ...,  0.0000,  1.0000,  0.0000]])
tensor([[ 0.2794,  0.2004, -0.2892,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0548, -0.2303,  0.1081,  ...,  0.0000,  0.0000,  0.0000],
        [-1.4327,  1.0151,  1.8562,  ...,  0.0000,  1.0000,  0.0000],
        ...,
        [ 0.6537, -0.7547,  0.6643,  ...,  0.0000,  0.0000,  0.0000],
        [-0.7439,  1.3475, -1.7989,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.5838, -0.8155,  0.5849,  ...,  0.0000,  0.0000,  0.0000]])
tensor([[12.3393],
        [12.5422],
        [12.1172],
        ...,
        [12.3109],
        [12.5550],
        [12.6916]])
tens

In [39]:
# birinchi qavatga nechta neyron bolishini bilish uchun datasetda nechta ustun bolsa shuncha neyron yaratamiz
class HousePrice(nn.Module):
  def __init__(self, input_size):
    super().__init__()
    self.layer1 = nn.Linear(input_size,16)
    self.layer2 = nn.Linear(16,8)
    self.layer3 = nn.Linear(8,4)
    self.layer4 = nn.Linear(4,2)
    self.layer5 = nn.Linear(2,1)
    self.dropout = nn.Dropout(0.2) # har gal 20 % neyronlar ochirib qoyildi --> natija yaxshi chiqmagungacha shu jarayon davom etaveradi\\\//// maximum (0.5) neyronlarni ochirib qoysa boladi
  def forward(self, x):
    x = torch.relu(self.layer1(x))
    x = self.dropout(x)
    x = torch.relu(self.layer2(x))
    x = self.dropout(x)
    x = torch.relu(self.layer3(x))
    x = self.dropout(x)
    x = torch.relu(self.layer4(x))
    x = self.dropout(x)
    x = self.layer5(x)
    return x

input_size = x_train_tensor.shape[1]
model = HousePrice(input_size)
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=0.001) # learning_rate= orgatish tezligi # misol agar self study qilib nimanidur organsak kop vaqt ketib qoldai lekin ustoz bilan shu jarayonni teshlashtirsa boladi--> Adam optimizer xatolar kelganda toxtab qolmalikni taminlab beradi
epoch = 200 # mse ozgarmay qolguncha epoch ni kopaytirishni davom ettiramiz
batch_size = 50 # bittada necha uyni berib yuboramiza uy dan tashqari boshqa ma'lumot bolishi mumkin
train_losses = []
test_losses = []

for i in range(epoch):
  model.train()  # modelni train rejimiga qoydim
  epoch_loss = 0
  for j in range(0, len(x_train_tensor), batch_size):
    batch_x = x_train_tensor[j:j+batch_size] # (0,49)--(50,99)--(100,149)
    batch_y = y_train_tensor[j:j+batch_size]
    outputs = model(batch_x)
    loss = criterion(outputs,batch_y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    epoch_loss += loss.item()
  avg_loss = epoch_loss/len(x_train_tensor/batch_size)
  train_losses.append(avg_loss)

  model.eval()
  with torch.no_grad():
    test_outputs = model(x_test_tensor)
    test_loss = criterion(test_outputs,y_test_tensor)
    test_losses.append(test_loss.item())

  if (i + 1) % 20 == 0:
    print(f"Epoch {i+1}/{epoch} Train Loss: {avg_loss}. Test loss: {test_loss.item()}")


Epoch 20/200 Train Loss: 0.23643426859717653. Test loss: 2.5799968242645264
Epoch 40/200 Train Loss: 0.03316731228076827. Test loss: 0.38469111919403076
Epoch 60/200 Train Loss: 0.004244377712408701. Test loss: 0.15509995818138123
Epoch 80/200 Train Loss: 0.004100237168294669. Test loss: 0.156645268201828
Epoch 100/200 Train Loss: 0.004133235558732689. Test loss: 0.1566845029592514
Epoch 120/200 Train Loss: 0.004103968861375668. Test loss: 0.15572306513786316
Epoch 140/200 Train Loss: 0.004103527278747669. Test loss: 0.1582082360982895
Epoch 160/200 Train Loss: 0.00413994262529205. Test loss: 0.1549810767173767
Epoch 180/200 Train Loss: 0.004159680617256066. Test loss: 0.15499252080917358
Epoch 200/200 Train Loss: 0.004146402841794861. Test loss: 0.15549080073833466


In [40]:
model.eval()
with torch.no_grad():
  test_outputs = model(x_test_tensor)
  test_loss = criterion(test_outputs,y_test_tensor)
  test_mse = mean_squared_error(y_test_tensor.numpy(), test_outputs.numpy())
  print(f"Test mse: {np.sqrt(test_mse)}")

Test mse: 0.3943232186142919


In [41]:
new_house = pd.DataFrame({
    'longitude':[-122.23]  ,
    'latitude':[37.86],
    'housing_median_age':[1],
    'total_rooms':[1000],
    'total_bedrooms':[200],
    'population':[100],
    'households':[400],
    'median_income':[2],
    'ocean_proximity':['NEAR BAY']
})

In [43]:
new_house_transformed = preprocessor.transform(new_house)
new_house_tensor = torch.FloatTensor(new_house_transformed)
with torch.no_grad():
  predicted_price = model(new_house_tensor)
  print(f"New house price: {np.expm1(predicted_price.item())}")

New house price: 142935.7125675077
