# Segment 3 Extra Lab

## Let's make a deeper neural network

In [None]:
# imports - now including pytorch

import os
from dotenv import load_dotenv
from huggingface_hub import login
import numpy as np
from tqdm import tqdm
import pickle
from evaluator import evaluate
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sentence_transformers import SentenceTransformer
import chromadb
from torch.optim.lr_scheduler import CosineAnnealingLR

In [None]:
# Load in dataset
# Sidenote: this is actually a larger dataset than before (about twice as large)

with open('../train.pkl', 'rb') as file:
    train = pickle.load(file)

with open('../test.pkl', 'rb') as file:
    test = pickle.load(file)

In [None]:
len(train)

In [None]:
# environment

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
DB = "../segment4/products_vectorstore"

In [None]:
# Log in to HuggingFace
# If you don't have a HuggingFace account, you can set one up for free at www.huggingface.co
# And then add the HF_TOKEN to your .env file as explained in the project README

hf_token = os.environ['HF_TOKEN']
login(token=hf_token, add_to_git_credential=False)

In [None]:
client = chromadb.PersistentClient(path=DB)
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [None]:
collection_name = "products"
collection = client.get_or_create_collection(collection_name)

In [None]:
result = collection.get(include=['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
prices = [metadata['price'] for metadata in result['metadatas']]

In [None]:
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(vectors)
y_train_tensor = torch.FloatTensor(prices).unsqueeze(1)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_tensor, y_train_tensor, test_size=0.01, random_state=42)

# Log
y_train_log = torch.log(y_train + 1)
y_val_log = torch.log(y_val + 1)
    
# Normalize log prices
y_mean = y_train_log.mean()
y_std = y_train_log.std()
y_train_norm = (y_train_log - y_mean) / y_std
y_val_norm = (y_val_log - y_mean) / y_std

# Create the loader
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
class NewNeuralNetwork(nn.Module):
    def __init__(self, input_size, num_layers=10, hidden_size=4096, dropout_prob=0.2):
        super(NewNeuralNetwork, self).__init__()
        
        # First layer
        self.input_layer = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout_prob)
        )
        
        # Residual blocks
        self.residual_blocks = nn.ModuleList()
        for i in range(num_layers - 2):
            self.residual_blocks.append(
                ResidualBlock(hidden_size, dropout_prob)
            )
        
        # Output layer
        self.output_layer = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        x = self.input_layer(x)
        
        for block in self.residual_blocks:
            x = block(x)
            
        return self.output_layer(x)

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, hidden_size, dropout_prob):
        super(ResidualBlock, self).__init__()
        self.block = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout_prob),
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size)
        )
        self.relu = nn.ReLU()
        
    def forward(self, x):
        residual = x
        out = self.block(x)
        out += residual  # Skip connection
        return self.relu(out)

In [None]:
model = NewNeuralNetwork(X_train.shape[1])
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)
loss_function = nn.L1Loss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
scheduler = CosineAnnealingLR(optimizer, T_max=10, eta_min=0)

train_dataset = torch.utils.data.TensorDataset(X_train, y_train_norm)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
EPOCH_START = 1
EPOCH_END = 5

for epoch in range(EPOCH_START, EPOCH_END+1):
    model.train()
    train_losses = []
    
    for batch_X, batch_y in tqdm(train_loader):
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        train_losses.append(loss.item())
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val.to(device))
        val_loss = loss_function(val_outputs, y_val_norm.to(device))
        
        # Convert back to original scale for meaningful metrics
        val_outputs_orig = torch.exp(val_outputs * y_std + y_mean) - 1
        mae = torch.abs(val_outputs_orig - y_val.to(device)).mean()
    
    avg_train_loss = np.mean(train_losses)
    print(f'Epoch [{epoch+1}/{EPOCH_END}]')
    print(f'Train Loss: {avg_train_loss:.4f}, Val Loss: {val_loss.item():.4f}')
    print(f'Val MAE (original scale): ${mae.item():.2f}')
    print(f'Learning rate: {scheduler.get_last_lr()[0]:.6f}')

    # torch.save(model.state_dict(), f'models/nnn-{epoch+1}.pth')
    
    # Learning rate scheduling
    scheduler.step()

In [None]:
encoder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

def new_neural_network(item):
    model.eval()
    with torch.no_grad():
        vector = encoder.encode(item.text)
        vector = torch.FloatTensor(vector).to(device)
        pred = model(vector)[0]
        result = torch.exp(pred * y_std + y_mean) - 1
        result = result.item()
    return max(0, result)

In [None]:
new_neural_network(test[1])

In [None]:
evaluate(new_neural_network, test)