In [None]:
import time
import torch
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset,DataLoader
from sklearn.preprocessing import PolynomialFeatures

In [32]:
df = pd.read_csv(r"C:\Users\nishi\code\DL\California Housing Prices\California Housing Prices\housing.csv\housing.csv")
df.tail()
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [None]:
enc = OrdinalEncoder()
ocean_proximity=['NEAR BAY', '<1H OCEAN', 'INLAND', 'NEAR OCEAN', 'ISLAND']
df['ocean_proximity']=enc.fit_transform(df[['ocean_proximity']])
df = df.fillna(df.mean())
cols_to_use=["longitude","latitude","housing_median_age","total_rooms","total_bedrooms","population","households","median_income"]
poly = PolynomialFeatures(degree=2,include_bias=False)

X_poly=poly.fit_transform(df[cols_to_use])
poly_features=pd.DataFrame(X_poly,columns=poly.get_feature_names_out(cols_to_use),index=df.index)
df=df.drop(cols_to_use,axis=1)
df=pd.concat((df,poly_features),axis=1)
df.head()

In [34]:
X = df.drop(columns=['median_house_value']).values
y = df['median_house_value'].values

In [35]:
X_train, X_test, y_train, y_test=train_test_split(X,y, test_size=0.2,random_state=47)

In [36]:
scaler_x=StandardScaler()
scaler_y=StandardScaler()
scaler_x.fit(X_train)
X_train = scaler_x.transform(X_train)
X_test  = scaler_x.transform(X_test)
y_train=y_train.reshape(-1,1)
y_test=y_test.reshape(-1,1)
scaler_y.fit(y_train)
y_train = scaler_y.transform(y_train)
y_test = scaler_y.transform(y_test)
X_train=torch.tensor(X_train,dtype=torch.float32)
y_train=torch.tensor(y_train,dtype=torch.float32)
X_test=torch.tensor(X_test,dtype=torch.float32)
y_test=torch.tensor(y_test,dtype=torch.float32)

In [37]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
X_train=X_train.to(device)
y_train=y_train.to(device)
X_test=X_test.to(device)
y_test=y_test.to(device)

In [None]:
train_loader=DataLoader(TensorDataset(X_train,y_train),batch_size=64,shuffle=True)
test_loader=DataLoader(TensorDataset(X_test,y_test),batch_size=32)

In [None]:
class neural(torch.nn.Module):
    def __init__(self, num_features,num_hidden_1,num_hidden_2,num_hidden_3):
        super().__init__()
        self.my_network = torch.nn.Sequential(
            
            torch.nn.Flatten(),
            torch.nn.Linear(num_features, num_hidden_1),
            torch.nn.LeakyReLU(),
    
            torch.nn.Linear(num_hidden_1, num_hidden_2),
            torch.nn.LeakyReLU(),
            torch.nn.Dropout(0.3),

            torch.nn.Linear(num_hidden_2, num_hidden_3),
            torch.nn.LeakyReLU(),
            torch.nn.Dropout(0.2),
            torch.nn.Linear(num_hidden_3, 1))
    
    def forward(self,x):
        targets=self.my_network(x)
        return targets
            

In [40]:
def train(model,num_epochs,train_data,test_data,optimizer,device):
    minibatch_loss=[]
    start_time=time.time()
    for epoch in range(num_epochs):
        
        model.train()

        for batch_idx, (features,targets) in enumerate(train_data):
            features=features.to(device)
            targets=targets.to(device)

            y_hat=model(features)
            loss=torch.mean((y_hat-targets)**2)
            optimizer.zero_grad()
            loss.backward()

            optimizer.step()

            minibatch_loss.append(loss.item())
            
            if not batch_idx % 32:
                print(f'Epoch: {epoch+1:03d}/{num_epochs:03d} '
                      f'| Batch {batch_idx:04d}/{len(train_data):04d} '
                      f'| Loss: {loss.item():.4f}')
                
        elapsed_time=time.time() - start_time

        avg_loss=torch.tensor(minibatch_loss).mean().item()
        print(f"{'-'*44}\ncost={avg_loss}\nelapsed_time={elapsed_time:.2f}sec")
        
    return minibatch_loss

def test(model,test_data,device):
    model.eval()
    test_loss=[]
    cost=torch.nn.MSELoss()
    with torch.no_grad():
        for (features,targets) in test_data:
            features=features.to(device)
            targets=targets.to(device)

            y_hat=model(features)
            loss=cost(y_hat,targets)
            test_loss.append(loss.item())
    avg_loss=torch.tensor(test_loss).mean().item()
    print(f"cost={avg_loss}")
    return test_loss

In [41]:
model=neural(num_features=X_train.shape[1],num_hidden_1=91,num_hidden_2=91,num_hidden_3=91)
model=model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
minibatch_loss=train(model=model,num_epochs=100
                     ,train_data=train_loader
                     ,test_data=test_loader,optimizer=optimizer
                     ,device=device)

Epoch: 001/100 | Batch 0000/0516 | Loss: 0.8657
Epoch: 001/100 | Batch 0032/0516 | Loss: 0.5115
Epoch: 001/100 | Batch 0064/0516 | Loss: 0.7169
Epoch: 001/100 | Batch 0096/0516 | Loss: 0.5662
Epoch: 001/100 | Batch 0128/0516 | Loss: 0.3556
Epoch: 001/100 | Batch 0160/0516 | Loss: 0.2563
Epoch: 001/100 | Batch 0192/0516 | Loss: 0.5377
Epoch: 001/100 | Batch 0224/0516 | Loss: 0.3155
Epoch: 001/100 | Batch 0256/0516 | Loss: 0.2593
Epoch: 001/100 | Batch 0288/0516 | Loss: 0.4150
Epoch: 001/100 | Batch 0320/0516 | Loss: 0.2996
Epoch: 001/100 | Batch 0352/0516 | Loss: 0.2743
Epoch: 001/100 | Batch 0384/0516 | Loss: 0.6823
Epoch: 001/100 | Batch 0416/0516 | Loss: 0.1926
Epoch: 001/100 | Batch 0448/0516 | Loss: 0.1724
Epoch: 001/100 | Batch 0480/0516 | Loss: 0.1542
Epoch: 001/100 | Batch 0512/0516 | Loss: 0.5637
--------------------------------------------
cost=0.3847256898880005
elapsed_time=1.71sec
Epoch: 002/100 | Batch 0000/0516 | Loss: 0.2645
Epoch: 002/100 | Batch 0032/0516 | Loss: 0.323

In [42]:
test_loss=test(model=model,test_data=test_loader,device=device)

cost=0.20802316069602966


In [43]:
org_loss=[]
for (features,targets) in test_loader:
    targets=targets.to(device)
    features=features.to(device)
    y_hat=model(features)
    y_hat=scaler_y.inverse_transform(y_hat.detach().cpu().numpy().reshape(-1,1))
    y=scaler_y.inverse_transform(targets.detach().cpu().numpy().reshape(-1,1))
    loss=np.mean((y_hat-y)**2)
    org_loss.append(loss)
loss=np.mean(org_loss)
rms_loss=np.sqrt(loss)
print(f"RMSE={rms_loss}")

RMSE=52475.18359375
