# Quiz 12 

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

class DiabetesModel(nn.Module):
    def __init__(self, input_size):
        super(DiabetesModel, self).__init__()
        self.linear = nn.Linear(input_size, 1)
    
    def forward(self, x):
        return self.linear(x)

def train_and_evaluate_model(model, optimizer, criterion, X_train, y_train, X_test, y_test, epochs=1000):
    for epoch in range(epochs):
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (epoch+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
    
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test)
        mse = mean_squared_error(y_test, y_pred.numpy())
        print(f'Mean Squared Error: {mse:.4f}')

input_size = X_train.shape[1]
model = DiabetesModel(input_size)
criterion = nn.MSELoss()

optimizer = optim.SGD(model.parameters(), lr=0.01)

print("Using SGD optimizer:")
train_and_evaluate_model(model, optimizer, criterion, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor)

print("\nUsing Adam optimizer:")
optimizer = optim.Adam(model.parameters(), lr=0.01)
train_and_evaluate_model(model, optimizer, criterion, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor)


Using SGD optimizer:
Epoch [100/1000], Loss: 3354.8347
Epoch [200/1000], Loss: 2906.4897
Epoch [300/1000], Loss: 2896.8118
Epoch [400/1000], Loss: 2895.5522
Epoch [500/1000], Loss: 2894.5525
Epoch [600/1000], Loss: 2893.6023
Epoch [700/1000], Loss: 2892.6892
Epoch [800/1000], Loss: 2891.8103
Epoch [900/1000], Loss: 2890.9646
Epoch [1000/1000], Loss: 2890.1499
Mean Squared Error: 2885.4773

Using Adam optimizer:
Epoch [100/1000], Loss: 2888.7478
Epoch [200/1000], Loss: 2887.4985
Epoch [300/1000], Loss: 2886.3208
Epoch [400/1000], Loss: 2885.2043
Epoch [500/1000], Loss: 2884.1445
Epoch [600/1000], Loss: 2883.1370
Epoch [700/1000], Loss: 2882.1799
Epoch [800/1000], Loss: 2881.2710
Epoch [900/1000], Loss: 2880.4070
Epoch [1000/1000], Loss: 2879.5867
Mean Squared Error: 2881.6074


* the Adam optimizer performs much better than the SGD optimizer. i can tell this based off of the mean-squared error for the Adam optimizer being lower than the mean-squared error for the SGD optimizer. 
* the Adam optimizer works to minimize the loss function during training of neural networks by considering the moving average of the first and second-order movements of its gradient. 

# Extra Credit Question (2 pts) 

In [7]:
def count_divisors(n: int) -> str:
    """
    Count the number of divisors for an input value.

    Args:
    n (int): Input value.

    Returns:
    str: A string indicating the number of divisors and the divisors themselves.
    """
    divisors = [str(i) for i in range(1, n + 1) if n % i == 0]
    divisor_count = len(divisors)
    divisors_str = ", ".join(divisors)
    return f"There are {divisor_count} divisors: {divisors_str}"

# testing the function
print(count_divisors(5))  # Output: “There are 2 divisors: 1 and 5”
print(count_divisors(40))  # Output: “There are 8 divisors: 1, 2, 4, 5, 8, 10, 20, and 40”


There are 2 divisors: 1, 5
There are 8 divisors: 1, 2, 4, 5, 8, 10, 20, 40
