### Importing Libraries

In [24]:
from torch import nn, Tensor
import torch.optim as optim
from typing import Tuple, Optional, Callable, Dict
import pandas as pd
import torch

### Model Building

* Abract Classes

In [19]:
class PyTorchLayer(nn.Module):
    def __init__(self) -> None:
        super().__init__()
    def forward(self, x: Tensor,
        inference: bool = False) -> Tensor:
        raise NotImplementedError()

In [20]:
class PyTorchModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
    def forward(self, x: Tensor,
        inference: bool = False) -> Tensor:
        raise NotImplementedError()

In [21]:
def inference_mode(m: nn.Module):
    m.eval()

* Dense Layer Class

In [22]:
class DenseLayer(PyTorchLayer):
    def __init__(self,
        input_size: int,
        neurons: int,
        dropout: float = 1.0,
        activation: nn.Module = None) -> None:
        super().__init__()
        self.linear = nn.Linear(input_size, neurons)
        self.activation = activation
        if dropout < 1.0:
            self.dropout = nn.Dropout(1 - dropout)
    def forward(self, x: Tensor,
        inference: bool = False) -> Tensor:
        if inference:
            self.apply(inference_mode)
        x = self.linear(x) # does weight multiplication + bias
        if self.activation:
            x = self.activation(x)
        if hasattr(self, "dropout"):
            x = self.dropout(x)
        return x

* Model

In [23]:
class HousePricesModel(PyTorchModel):
    def __init__(self,
                 hidden_size: int = 13,
                 hidden_dropout: float = 1.0):
        super().__init__()
        self.dense1 = DenseLayer(13, hidden_size,
                                 activation=nn.LeakyReLU(),
                                 dropout = hidden_dropout)
        self.dense2 = DenseLayer(hidden_size, 1)
    def forward(self, x: Tensor) -> Tensor:
        assert x.shape[1] == 13
        #print (x.shape)
        x = self.dense1(x)
        #print (x.shape)
        #x = torch.relu(x)
        x=self.dense2(x)
        #print (x.shape)
        return x

In [16]:
pytorch_boston_model = HousePricesModel(hidden_size=13)

In [17]:
pytorch_boston_model.eval()

HousePricesModel(
  (dense1): DenseLayer(
    (linear): Linear(in_features=13, out_features=13, bias=True)
    (activation): LeakyReLU(negative_slope=0.01)
  )
  (dense2): DenseLayer(
    (linear): Linear(in_features=13, out_features=1, bias=True)
  )
)

In [8]:
for param in pytorch_boston_model.parameters():
    print(param)

Parameter containing:
tensor([[-0.1161,  0.2549, -0.2232,  0.1615,  0.1684, -0.2127, -0.2572,  0.2313,
         -0.1788,  0.2583, -0.2528,  0.1632, -0.2724],
        [-0.0869,  0.0140, -0.1359, -0.2326,  0.0968,  0.0863, -0.0754, -0.1270,
          0.0500,  0.0160, -0.0395, -0.1867, -0.1331],
        [ 0.2119,  0.1605,  0.0913,  0.1344, -0.1211,  0.2240,  0.0979, -0.0859,
          0.2736,  0.1194,  0.0536, -0.0279,  0.1028],
        [-0.0339, -0.1601, -0.1766, -0.0478, -0.0552, -0.0589,  0.0508,  0.1920,
         -0.2404, -0.0229, -0.0023,  0.0919,  0.1200],
        [ 0.0732,  0.2084, -0.0282,  0.2294,  0.1782, -0.1331,  0.2262,  0.2004,
         -0.2204,  0.1153,  0.0552,  0.1988,  0.0746],
        [-0.0332,  0.0297, -0.2716, -0.1184,  0.2442,  0.2214, -0.1933, -0.0117,
          0.0275, -0.2170,  0.2670, -0.0357,  0.0356],
        [ 0.0071,  0.1674,  0.1698,  0.1024,  0.1326, -0.2241, -0.2290, -0.2202,
          0.1618,  0.1108, -0.1682,  0.2672,  0.2300],
        [-0.0794,  0.0790,

* Second Model

In [None]:
class HousePricesModel1(PyTorchModel):
    def __init__(self,
        hidden_size: int = 13):
        super().__init__()
        self.fc1 = nn.Linear(13, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 1)
    def forward(self, x: Tensor) -> Tensor:
        assert x.shape[1] == 13
        #print (x.shape)
        x = self.fc1(x)
        #print (x.shape)
        x = torch.relu(x)
        #print (x.shape)
        x=self.fc2(x)
        #print (x.shape)
        return x

In [10]:
pytorch_boston_model1 = HousePricesModel1(hidden_size=13)

In [11]:
for param in pytorch_boston_model1.parameters():
    print(param)

Parameter containing:
tensor([[-2.3666e-04,  3.3997e-02,  2.2908e-01,  4.8672e-03, -6.1186e-02,
         -1.9030e-01,  1.9535e-01, -2.3275e-01,  1.4117e-01, -1.3089e-01,
         -8.6947e-03,  8.9840e-02,  1.9432e-01],
        [ 1.7867e-02,  2.3334e-02, -2.2396e-01, -1.2295e-02,  5.5315e-02,
         -1.6663e-01, -2.4244e-01,  6.5674e-03, -1.0359e-01, -1.0692e-02,
          2.7040e-01, -1.2615e-02,  9.8212e-02],
        [-2.7389e-01,  2.5488e-01,  1.0098e-01,  3.6071e-03,  1.8233e-01,
          2.6734e-02,  1.7594e-01,  1.7110e-01,  4.3295e-02,  2.4369e-01,
         -1.1354e-01,  2.0075e-01, -1.5876e-01],
        [-1.4891e-01,  1.1610e-01, -5.8005e-02,  2.6943e-02, -1.4156e-01,
          4.9986e-02, -9.9867e-02,  2.3651e-01,  1.3492e-01, -4.0564e-02,
          4.5980e-02,  2.1160e-01, -7.1996e-02],
        [-2.0206e-01,  1.1443e-01, -2.7536e-01,  1.2715e-01,  9.0666e-03,
         -1.2459e-01, -2.5676e-01, -8.3453e-02, -2.0029e-01, -1.6629e-01,
          1.1459e-01,  1.5291e-02, -1.1610

### Building Trainer Class

In [24]:
class PyTorchTrainer(object):
    def __init__(self,
        model: PyTorchModel,
        optim: optim.Optimizer,
        criterion: nn.MSELoss):
        self.model = model
        self.optim = optim
        self.loss = criterion
        self._check_optim_net_aligned()
    def _check_optim_net_aligned(self):
        assert self.optim.param_groups[0]['params']\
        == list(self.model.parameters())
    def _generate_batches(self,
        X: Tensor,
        y: Tensor,
        size: int = 32) -> Tuple[Tensor]:
        N = X.shape[0]
        for ii in range(0, N, size):
            X_batch, y_batch = X[ii:ii+size], y[ii:ii+size]
            yield (X_batch, y_batch)
    def fit(self, X_train: Tensor, y_train: Tensor,
        X_test: Tensor, y_test: Tensor,
        epochs: int=100,
        eval_every: int=10,
        batch_size: int=32) -> PyTorchModel:
        for e in range(epochs):
            #X_train, y_train = permute_data(X_train, y_train)
            #print(X_train.shape,y_train.shape)
            batch_generator = self._generate_batches(X_train, y_train,
            batch_size)
            for ii, (X_batch, y_batch) in enumerate(batch_generator):
                #print(X_batch.shape,y_batch.shape)
                self.optim.zero_grad()
                output = self.model(X_batch)
                loss = self.loss(output, y_batch)
                #print("b4 grad",loss)
                loss.backward()
                #print("after grad",loss)
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
                self.optim.step()
            
            self.optim.zero_grad()
            output = self.model(X_test)
            loss = self.loss(output, y_test)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
            self.optim.step()
            if e%10==0:
                print(">==> epoch",e,">==> loss =", loss)
        return self.model

In [23]:
from torch.utils.data import TensorDataset, DataLoader
class PyTorchTrainer1:
    def __init__(self, model: torch.nn.Module, optim: torch.optim.Optimizer, criterion: torch.nn.Module, device: torch.device):
        self.model = model.to(device)
        self.optim = optim
        self.criterion = criterion
        self.device = device

    def _check_params(self):
        opt_params = {p for g in self.optim.param_groups for p in g['params']}
        model_params = set(self.model.parameters())
        missing = model_params - opt_params
        if missing:
            raise ValueError(f"Optimizer missing {len(missing)} model params.")

    def fit(self, train_loader: DataLoader, val_loader: DataLoader, epochs: int = 100, eval_every: int = 10, clip_norm: float = 1.0):
        self._check_params()
        for e in range(1, epochs + 1):
            self.model.train()
            for xb, yb in train_loader:
                xb, yb = xb.to(self.device), yb.to(self.device)
                out = self.model(xb).squeeze()
                loss = self.criterion(out, yb)
                self.optim.zero_grad()
                loss.backward()
                if clip_norm is not None:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=clip_norm)
                self.optim.step()

            if e % eval_every == 0:
                self.model.eval()
                val_loss = 0.0
                n = 0
                with torch.no_grad():
                    for xb, yb in val_loader:
                        xb, yb = xb.to(self.device), yb.to(self.device)
                        out = self.model(xb).squeeze()
                        val_loss += self.criterion(out, yb).item() * xb.size(0)
                        n += xb.size(0)
                print(f"Epoch {e}: val_loss={val_loss / max(n, 1):.4f}")

In [58]:
from torch.utils.data import TensorDataset, DataLoader
class PyTorchTrainer2(object):
    def __init__(self,
        model: PyTorchModel,
        optim: torch.optim.Optimizer,
        criterion: torch.nn.Module,
        device: torch.device):
        self.model = model.to(device)
        self.optim = optim
        self.criterion = criterion
        self.device=device
        self._check_optim_net_aligned()
    def _check_optim_net_aligned(self):

        #model_params = set(p for p in self.model.parameters())
        #optim_params = set(p for g in self.optim.param_groups for p in g['params'])
        #if not model_params.issubset(optim_params):
            #raise ValueError("Optimizer missing some model parameters")


        assert self.optim.param_groups[0]['params']\
        == list(self.model.parameters())

    def fit(self, train_data: DataLoader, val_data: DataLoader,
        epochs: int=100,
        eval_every: int=10,
        clip_norm:float=1.0):
        for e in range(1,epochs+1):

            try:
                self.model.train()
                #X_train, y_train = permute_data(X_train, y_train)
                
                for X_batch, y_batch in train_data:
                    X_batch, y_batch = X_batch.to(self.device), y_batch.to(self.device)
                    output = self.model(X_batch).squeeze()
                    if output.shape[0] != y_batch.shape[0]:
                            raise ValueError(f"Output {output.shape} != Target {y_batch.shape}")

                    loss = self.criterion(output, y_batch)
                    self.optim.zero_grad()
                    loss.backward()
                    #print("after grad",loss)
                    if clip_norm is not None:
                        torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=clip_norm)
                    self.optim.step()
                
                if e % eval_every == 0:
                    #self._evaluate(val_data, metrics, logger, e)
                    from sklearn.metrics import mean_absolute_error,r2_score
                    #self.model.eval()
                    metrics={"R2_Score":r2_score,"MAE":mean_absolute_error}
                    self._evaluate(val_data,metrics=metrics,epoch=e)
                    """val_loss = 0.0
                    n = 0
                    with torch.no_grad():
                        for xb, yb in val_data:
                            xb, yb = xb.to(self.device), yb.to(self.device)
                            out = self.model(xb).squeeze()
                            val_loss += self.criterion(out, yb).item() * xb.size(0)
                            n += xb.size(0)
                    print(f"Epoch {e}: val_loss={val_loss / max(n, 1):.4f}")"""
            
            except Exception as ex:
                #logger.exception(f"Training failed at epoch {e}: {ex}")
                raise

    def _evaluate(self, val_data: DataLoader,
                  metrics: Optional[Dict[str, Callable]] = None,
                  logger=None, epoch: int = 0):
        self.model.eval()
        total_loss, n = 0.0, 0
        agg_metrics = {name: 0.0 for name in (metrics or {})}
        with torch.no_grad():
            for xb, yb in val_data:
                xb, yb = xb.to(self.device), yb.to(self.device)
                out = self.model(xb).squeeze()
                total_loss += self.criterion(out, yb).item() * xb.size(0)
                n += xb.size(0)
                if metrics:
                    for name, fn in metrics.items():
                        agg_metrics[name] += fn(out, yb) * xb.size(0)
        avg_loss = total_loss / max(n, 1)
        msg = f"epoch={epoch} val_loss={avg_loss:.4f}"
        if metrics:
            for name, total in agg_metrics.items():
                msg += f" {name}={total / max(n, 1):.4f}"
        (logger.info if logger else print)(msg)


In [59]:
from sklearn.metrics import mean_absolute_error
def evaluate(metrics: Optional[Dict[str, Callable]] = None):
        
        agg_metrics = {name: 0.0 for name in (metrics or {})}
        if metrics:
                    for name, fn in metrics.items():
                        print(name.__class__,fn.__class__)
        print (agg_metrics["MAE"])
metrics={"MSE":nn.MSELoss(),"MAE":mean_absolute_error}
evaluate(metrics=metrics)

<class 'str'> <class 'torch.nn.modules.loss.MSELoss'>
<class 'str'> <class 'function'>
0.0


In [60]:
net = HousePricesModel(hidden_size=13)
optimizer = optim.SGD(net.parameters(), lr=1e-4,momentum=0.8)
criterion = nn.MSELoss()
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
trainer = PyTorchTrainer2(net, optimizer, criterion, device)


### Data Loading

In [None]:
column_names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']

In [None]:
import pandas as pd
data=pd.read_csv("E:/ML Model Training/junior level structures/project 1/data/raw/housing.csv",
                 header=None, delimiter=r"\s+", names=column_names)

In [3]:
data.columns

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT', 'MEDV'],
      dtype='object')

In [4]:
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [None]:
y=data['MEDV']
X=data.drop(columns='MEDV',axis=1)

### OR

In [21]:
data1=pd.read_csv("E:/ML Model Training/junior level structures/project 1/data/raw/housing.csv")

In [25]:
def build_data(data):
    data=data.iloc[:, 0]
    record_points=[]
    for record in data:
        
        record_points.append(record.replace("  "," ").replace('  ',' ').split(' ')[1:])

    return record_points

In [26]:

data=pd.DataFrame(data=build_data(data1),columns=column_names, dtype=float)
data=data.dropna(axis=0)
for c in data.columns:
    data[c] = pd.to_numeric(data[c])
y=data['MEDV']
X=data.drop(columns='MEDV',axis=1)

In [27]:
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
1,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
2,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
3,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2
4,0.02985,0.0,2.18,0.0,0.458,6.43,58.7,6.0622,3.0,222.0,18.7,394.12,5.21,28.7


In [9]:
data.isna().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
MEDV       0
dtype: int64

### Training The Model

In [34]:
net.eval()

HousePricesModel(
  (dense1): DenseLayer(
    (linear): Linear(in_features=13, out_features=13, bias=True)
    (activation): LeakyReLU(negative_slope=0.01)
  )
  (dense2): DenseLayer(
    (linear): Linear(in_features=13, out_features=1, bias=True)
  )
)

In [25]:
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[-0.0082,  0.0527, -0.0089, -0.1263,  0.1788, -0.0151, -0.1888,  0.0684,
         -0.0581, -0.1088, -0.1821, -0.2718, -0.1794],
        [-0.1431,  0.0032, -0.1773, -0.2003, -0.2407,  0.0388,  0.1723, -0.1101,
          0.2233, -0.2358,  0.1007, -0.1158, -0.1815],
        [ 0.1639,  0.2714,  0.2123, -0.2177,  0.1169,  0.2267, -0.2714, -0.0201,
          0.0717, -0.1021,  0.1796, -0.1646, -0.2094],
        [ 0.0123, -0.2425, -0.2383, -0.2501,  0.0101,  0.0615,  0.2763, -0.0578,
         -0.0115, -0.1710, -0.2692,  0.1767, -0.0903],
        [ 0.1782, -0.2150,  0.2419, -0.2123, -0.1897, -0.2311, -0.2126, -0.1061,
         -0.0763, -0.0030, -0.0165,  0.2203, -0.2225],
        [-0.2643,  0.0077, -0.1833, -0.0346, -0.2507, -0.1351,  0.1515, -0.0298,
          0.0629, -0.0904, -0.2678,  0.0526,  0.2546],
        [ 0.0276,  0.0664, -0.2685, -0.0397,  0.1838,  0.0314, -0.2710,  0.0284,
          0.2740, -0.1801,  0.1518, -0.0914, -0.0344],
        [ 0.2185, -0.2289,

In [35]:
train_loader = DataLoader(TensorDataset(torch.tensor(X.values, dtype=torch.float),
                                        torch.tensor(y.values, dtype=torch.float)), batch_size=32, shuffle=True)
val_loader = DataLoader(TensorDataset(torch.tensor(X[0:int(data.shape[0]/4)].values,dtype=torch.float),
                                      torch.tensor(y[0:int(data.shape[0]/4)].values, dtype=torch.float)),
                                       batch_size=32)

In [44]:
import torch
from pathlib import Path
from torch.utils.data import DataLoader
from dataclasses import dataclass


@dataclass
class TrainConfig:
    lr: float
    momentum: float
    epochs: int
    seed: int = 42
    num_workers: int = 4


class Train:
    def __init__(self,model:HousePricesModel,optimizer:torch.optim.Optimizer,
                 loss_fn:torch.nn.MSELoss,epochs:int, device: torch.device) -> None:
        self.model=model
        self.optimizer=optimizer
        self.loss_fn=loss_fn
        self.epochs=epochs
        self.device=device

    def fit(self,train_data: DataLoader,val_data: DataLoader)-> None:
        trainer = PyTorchTrainer2(self.model, self.optimizer, self.loss_fn,self.device)
        trainer.fit(train_data, val_data, epochs=self.epochs)




#if __name__ == "__main__":

def training_pipeline(train_data, val_data)-> None:


    # Hyperparameters

    cfg = TrainConfig(
        lr=1e-4,
        momentum=0.8,
        epochs=200,
        seed=42
    )

    torch.manual_seed(cfg.seed)



    # Initialize model, optimizer, and loss function

    model = HousePricesModel()
    optimizer = torch.optim.SGD(model.parameters(), lr=cfg.lr,
                                momentum=cfg.momentum)
    criterion = torch.nn.MSELoss()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # Train the model
    trainer_object=Train(model=model, optimizer=optimizer,
                            loss_fn=criterion, epochs=cfg.epochs,device=device)
    trainer_object.fit(train_data,val_data)

In [45]:
training_pipeline(train_loader,val_loader)

Epoch 10: val_loss=28.3466
Epoch 20: val_loss=28.4738
Epoch 30: val_loss=29.3723
Epoch 40: val_loss=28.4820
Epoch 50: val_loss=31.2827
Epoch 60: val_loss=32.3281
Epoch 70: val_loss=28.7952
Epoch 80: val_loss=30.0272
Epoch 90: val_loss=30.3701
Epoch 100: val_loss=30.9543
Epoch 110: val_loss=30.9857
Epoch 120: val_loss=29.5851
Epoch 130: val_loss=29.1575
Epoch 140: val_loss=28.2150
Epoch 150: val_loss=30.3258
Epoch 160: val_loss=30.2623
Epoch 170: val_loss=30.1962
Epoch 180: val_loss=28.6153
Epoch 190: val_loss=29.0326
Epoch 200: val_loss=27.7699


In [61]:
model=trainer.fit(train_loader,val_loader,epochs=200)

epoch=10 val_loss=4102.5881 R2_Score=-335.8432 MAE=63.7361
epoch=20 val_loss=1867.3088 R2_Score=-627.9933 MAE=42.8914
epoch=30 val_loss=554.9280 R2_Score=-1050.6934 MAE=22.9467
epoch=40 val_loss=50.5903 R2_Score=-13.4861 MAE=5.2212
epoch=50 val_loss=38.1482 R2_Score=-7.3826 MAE=5.0536
epoch=60 val_loss=37.5108 R2_Score=-6.0265 MAE=5.0615
epoch=70 val_loss=38.1600 R2_Score=-5.3995 MAE=5.1165
epoch=80 val_loss=37.1008 R2_Score=-4.7660 MAE=5.0028
epoch=90 val_loss=36.7830 R2_Score=-4.3365 MAE=4.9599
epoch=100 val_loss=36.6534 R2_Score=-4.2076 MAE=4.9550
epoch=110 val_loss=33.4353 R2_Score=-3.6673 MAE=4.6556
epoch=120 val_loss=33.9627 R2_Score=-3.6792 MAE=4.7160
epoch=130 val_loss=34.8682 R2_Score=-3.6871 MAE=4.8078
epoch=140 val_loss=32.2933 R2_Score=-3.2711 MAE=4.5595
epoch=150 val_loss=32.2615 R2_Score=-3.1775 MAE=4.5613
epoch=160 val_loss=32.4259 R2_Score=-3.1860 MAE=4.5819
epoch=170 val_loss=32.8817 R2_Score=-3.1456 MAE=4.6299
epoch=180 val_loss=33.1065 R2_Score=-3.0897 MAE=4.6557
epo

In [26]:
model=trainer.fit(X_train=torch.tensor(X.values, dtype=torch.float), y_train=torch.tensor(y.values, dtype=torch.float), 
            X_test=torch.tensor(X[0:int(data.shape[0]/4)].values,dtype=torch.float), 
            y_test=torch.tensor(y[0:int(data.shape[0]/4)].values, dtype=torch.float), epochs=121)

  return F.mse_loss(input, target, reduction=self.reduction)


>==> epoch 0 >==> loss = tensor(394.4746, grad_fn=<MseLossBackward0>)
>==> epoch 10 >==> loss = tensor(103.7475, grad_fn=<MseLossBackward0>)
>==> epoch 20 >==> loss = tensor(59.6593, grad_fn=<MseLossBackward0>)
>==> epoch 30 >==> loss = tensor(56.6137, grad_fn=<MseLossBackward0>)
>==> epoch 40 >==> loss = tensor(53.2680, grad_fn=<MseLossBackward0>)
>==> epoch 50 >==> loss = tensor(50.1687, grad_fn=<MseLossBackward0>)
>==> epoch 60 >==> loss = tensor(47.1748, grad_fn=<MseLossBackward0>)
>==> epoch 70 >==> loss = tensor(44.1417, grad_fn=<MseLossBackward0>)
>==> epoch 80 >==> loss = tensor(41.7832, grad_fn=<MseLossBackward0>)
>==> epoch 90 >==> loss = tensor(39.7318, grad_fn=<MseLossBackward0>)
>==> epoch 100 >==> loss = tensor(38.3525, grad_fn=<MseLossBackward0>)
>==> epoch 110 >==> loss = tensor(37.3787, grad_fn=<MseLossBackward0>)
>==> epoch 120 >==> loss = tensor(36.6337, grad_fn=<MseLossBackward0>)


In [39]:
sample_data = torch.tensor([[0.02729,	0.0,	8.07,	0.0,	0.469,	7.185,	61.1,	4.9671,	2.0,	242.0,	17.8,	392.83,	4.03]], dtype=torch.float)
net(sample_data)

tensor([[26.2630]], grad_fn=<AddmmBackward0>)

In [28]:
print(trainer)

<__main__.PyTorchTrainer object at 0x00000151249FDE20>


In [None]:
import pandas as pd

class Readcsv:
    def __init__(self,path:str) ->None:
        self.path=path
    def read(self,name:str)-> pd.Series:
        return pd.read_csv(self.path+name)

class CleanData(Readcsv):
    def __init__(self, path) -> None:
        super().__init__(path)

    def clean(self,name:str) ->None:
        data=self.read(name)
        formatted_data=pd.DataFrame(data=self.seprate_columns(data.iloc[:, 0]),columns=column_names)
        clean_data=self.drop_missing_values(formatted_data)
        structured_data=self.dtype_to_float(clean_data)
        self.save_data(structured_data)

    def dtype_to_float(self,data:pd.DataFrame) ->pd.DataFrame:

        for col in data.columns:
            data[col] = data[col].astype(float)
        return data


    def drop_missing_values(self,data:pd.DataFrame) -> pd.DataFrame:
        return data.dropna(axis=0)

    def seprate_columns(self,data:pd.Series) ->list[str]:
        record_points=[]
        for record in data:
            
            record_points.append(record.replace("  "," ").replace('  ',' ').split(' ')[1:])

        return record_points
    
    def save_data(self,data:pd.DataFrame):
        data.to_csv("E:/ML Model Training/junior level structures/project 1/data/processed/processed_data.csv",index=False)

In [139]:
obj=CleanData("E:/ML Model Training/junior level structures/project 1/data/raw/")

In [140]:
obj.clean("housing.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = data[col].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = data[col].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = data[col].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

In [None]:
class DataReader:
    def __init__(self,path:str)->None:
        self.path=path

    def read(self,name:str)-> pd.DataFrame:
        return pd.read_csv(self.path+name)
    
class DataMaker(DataReader):
    def __init__(self, path)->None:
        super().__init__(path)

    def make_data(self,name:str)->Tuple[Tuple[torch.Tensor,torch.Tensor],Tuple[torch.Tensor,torch.Tensor]]:
        data=self.read(name)
        print("shape of data :",data.shape)
        y,X=self.get_X_y(data)
        print("shape of X :",X.shape)
        return self.convert_to_tensor(X[96:],y[96:]),self.convert_to_tensor(X[:96],y[:96])


    def get_X_y(self,data:pd.DataFrame) -> Tuple[pd.Series,pd.DataFrame]:
        return data['MEDV'],data.drop(columns=['MEDV'],axis=1)
    
    def convert_to_tensor(self,X:pd.DataFrame,y:pd.Series)->Tuple[torch.Tensor,torch.Tensor]:
        return torch.tensor(X.values,dtype=torch.float),torch.tensor(y.values,dtype=torch.float)

In [3]:
ob=DataMaker("E:/ML Model Training/junior level structures/project 1/data/processed/")

In [64]:
(X,y),(X_val,y_val)=ob.make_data("processed_data.csv")

shape of data : (451, 14)
shape of X : (451, 13)


In [65]:
y_val.isnan().sum()

tensor(0)

In [133]:
z,p=ob.get_X_y(d)

In [144]:
X.shape,y.shape

(torch.Size([355, 13]), torch.Size([355]))

In [76]:
class Train:
    def __init__(self,model:HousePricesModel,optimizer:optim.Optimizer,
                 loss_fn:nn.MSELoss,epochs:int):
        self.model=model
        self.optimizer=optimizer
        self.loss_fn=loss_fn
        self.epochs=epochs
    def fit(self,X:torch.Tensor,y:torch.Tensor,
            X_val:torch.Tensor,y_val:torch.Tensor)->PyTorchModel:
        trainer = PyTorchTrainer(self.model, self.optimizer, self.loss_fn)
        #logger.info("Training started...")
        model=trainer.fit(X, y, X_val, y_val, epochs=self.epochs)
        return model
        #logger.info("Training finished.")

In [131]:
t_o=Train(net,optimizer,criterion,201)

In [132]:
net(sample_data)

tensor([[-5.2241]], grad_fn=<AddmmBackward0>)

In [133]:
mo=t_o.fit(X,y,X_val,y_val)

>==> epoch 0 >==> loss = tensor(879.2496, grad_fn=<MseLossBackward0>)
>==> epoch 10 >==> loss = tensor(278.8664, grad_fn=<MseLossBackward0>)
>==> epoch 20 >==> loss = tensor(46.6454, grad_fn=<MseLossBackward0>)
>==> epoch 30 >==> loss = tensor(37.4225, grad_fn=<MseLossBackward0>)
>==> epoch 40 >==> loss = tensor(36.7412, grad_fn=<MseLossBackward0>)
>==> epoch 50 >==> loss = tensor(36.4069, grad_fn=<MseLossBackward0>)
>==> epoch 60 >==> loss = tensor(36.2479, grad_fn=<MseLossBackward0>)
>==> epoch 70 >==> loss = tensor(36.0693, grad_fn=<MseLossBackward0>)
>==> epoch 80 >==> loss = tensor(35.8484, grad_fn=<MseLossBackward0>)
>==> epoch 90 >==> loss = tensor(35.6031, grad_fn=<MseLossBackward0>)
>==> epoch 100 >==> loss = tensor(35.4262, grad_fn=<MseLossBackward0>)
>==> epoch 110 >==> loss = tensor(35.2383, grad_fn=<MseLossBackward0>)
>==> epoch 120 >==> loss = tensor(35.0336, grad_fn=<MseLossBackward0>)
>==> epoch 130 >==> loss = tensor(34.8232, grad_fn=<MseLossBackward0>)
>==> epoch 140 

In [121]:
mo(sample_data)

tensor([[22.4547]], grad_fn=<AddmmBackward0>)

In [6]:
dict1={"a":"asfg"}
assert dict1.__class__==str

AssertionError: 

In [None]:
from pydantic import BaseModel

class ApiData(BaseModel):
    val1: float
    val2: float
    val3: float
    val4: float
    val5: float
    val6: float
    val7: float
    val8: float
    val9: float
    val10: float
    val11: float
    val12: float
    val13:float


In [50]:
import json
def apidata(q: ApiData)-> ApiData:
    l=[]
    for val in q:
        l.append(q[val])

    print(l)


In [51]:

w={"val1":0.02729,	"val2":0.0,	"val3":8.07,	"val4":0.0,
          "val5":0.469,	"val6":7.185,	"val7":61.1,	"val8":4.9671,	"val9":2.0,	"val10":242.0,	"val11":17.8,
          "val12":392.83,	"val13":4.03}

z=apidata(w)

[0.02729, 0.0, 8.07, 0.0, 0.469, 7.185, 61.1, 4.9671, 2.0, 242.0, 17.8, 392.83, 4.03]


['0.00',
 '7.070',
 '0',
 '0.4690',
 '7.1850',
 '61.10',
 '4.9671',
 '2',
 '242.0',
 '17.80',
 '392.83',
 '4.03',
 '34.70']

In [None]:
from dataclasses import dataclass
from pathlib import Path
from typing import Tuple
import pandas as pd
import torch
from torch.utils.data import TensorDataset, DataLoader

@dataclass
class DatasetConfig:
    target_col: str
    val_ratio: float = 0.2
    seed: int = 42
    batch_size: int = 64

def read_csv(root: Path, name: str) -> pd.DataFrame:
    path = root / name
    if not path.exists():
        raise FileNotFoundError(f"Data file not found: {path}")
    df = pd.read_csv(path)
    if df.empty:
        raise ValueError(f"Empty data file: {path}")
    return df

def split_shuffle(df: pd.DataFrame, cfg: DatasetConfig) -> Tuple[pd.DataFrame, pd.DataFrame]:
    if cfg.target_col not in df.columns:
        raise KeyError(f"Target column '{cfg.target_col}' not in data.")
    df = df.sample(frac=1.0, random_state=cfg.seed)
    n_val = max(1, int(len(df) * cfg.val_ratio))
    return df.iloc[n_val:], df.iloc[:n_val]

def to_tensors(df: pd.DataFrame, target: str) -> Tuple[torch.Tensor, torch.Tensor]:
    y = torch.tensor(df[target].values, dtype=torch.float32)
    X = torch.tensor(df.drop(columns=[target]).values, dtype=torch.float32)
    return X, y

class PyTorchTrainer:
    def __init__(self, model: torch.nn.Module, optim: torch.optim.Optimizer, criterion: torch.nn.Module, device: torch.device):
        self.model = model.to(device)
        self.optim = optim
        self.criterion = criterion
        self.device = device

    def _check_params(self):
        opt_params = {p for g in self.optim.param_groups for p in g['params']}
        model_params = set(self.model.parameters())
        missing = model_params - opt_params
        if missing:
            raise ValueError(f"Optimizer missing {len(missing)} model params.")

    def fit(self, train_loader: DataLoader, val_loader: DataLoader, epochs: int = 100, eval_every: int = 10, clip_norm: float = 1.0):
        self._check_params()
        for e in range(1, epochs + 1):
            self.model.train()
            for xb, yb in train_loader:
                xb, yb = xb.to(self.device), yb.to(self.device)
                out = self.model(xb).squeeze()
                loss = self.criterion(out, yb)
                self.optim.zero_grad()
                loss.backward()
                if clip_norm is not None:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=clip_norm)
                self.optim.step()

            if e % eval_every == 0:
                self.model.eval()
                val_loss = 0.0
                n = 0
                with torch.no_grad():
                    for xb, yb in val_loader:
                        xb, yb = xb.to(self.device), yb.to(self.device)
                        out = self.model(xb).squeeze()
                        val_loss += self.criterion(out, yb).item() * xb.size(0)
                        n += xb.size(0)
                print(f"Epoch {e}: val_loss={val_loss / max(n, 1):.4f}")

    def save(self, path: Path):
        path.parent.mkdir(parents=True, exist_ok=True)
        torch.save({'model': self.model.state_dict(), 'optimizer': self.optim.state_dict()}, path)

# Usage sketch
cfg = DatasetConfig(target_col="SalePrice", val_ratio=0.2, seed=123, batch_size=64)
df = read_csv(Path("data/processed"), "processed_data.csv")
train_df, val_df = split_shuffle(df, cfg)
X_tr, y_tr = to_tensors(train_df, cfg.target_col)
X_val, y_val = to_tensors(val_df, cfg.target_col)
train_loader = DataLoader(TensorDataset(X_tr, y_tr), batch_size=cfg.batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=cfg.batch_size)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HousePricesModel()
optim = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.8)
criterion = torch.nn.MSELoss()
trainer = PyTorchTrainer(model, optim, criterion, device)
trainer.fit(train_loader, val_loader, epochs=200, eval_every=10)
trainer.save(Path("models/saved_model.pt"))

In [None]:
"""
@dataclass
class DatasetConfig:
    target_col: str
    val_ratio: float = 0.2
    seed: int = 42
    batch_size: int = 64

class DataReader:
    def __init__(self,root:Path)->None:
        self.root=root

    def read(self,name:str)-> pd.DataFrame:
        path = self.root / name
        if not path.exists():
            raise FileNotFoundError(f"Data file not found: {path}")
        df=pd.read_csv(path)
        if df.empty:
            raise ValueError(f"Empty data file: {path}")
        return df
    
class DataMaker(DataReader):
    def __init__(self, root)->None:
        super().__init__(root)

    def make_data(self,name:str,
                  cfg:DatasetConfig)->Tuple[Tuple[torch.Tensor,torch.Tensor],Tuple[torch.Tensor,torch.Tensor]]:

        data=self.read(name)

        train_df,val_df=self.split_shuffle(data,cfg)

        return self.convert_to_tensor(train_df,cfg.target_col),self.convert_to_tensor(val_df,cfg.target_col)
    
    def split_shuffle(self,df: pd.DataFrame, cfg: DatasetConfig) -> Tuple[pd.DataFrame, pd.DataFrame]:
        if cfg.target_col not in df.columns:
            raise KeyError(f"Target column '{cfg.target_col}' not in data.")
        df = df.sample(frac=1.0, random_state=cfg.seed)
        n_val = max(1, int(len(df) * cfg.val_ratio))
        return df.iloc[n_val:], df.iloc[:n_val]
    
    def convert_to_tensor(self,df:pd.DataFrame,target_col:str)->Tuple[torch.Tensor,torch.Tensor]:
        y = torch.tensor(df[target_col].values, dtype=torch.float32)
        X = torch.tensor(df.drop(columns=[target_col]).values, dtype=torch.float32)
        return X, y"""