In [1]:
#Input
import torch
import pandas as pd

torch.manual_seed(42)
X = torch.rand(100, 1) * 10  # 100 data points between 0 and 10
y = 2 * X + 3 + torch.randn(100, 1)  # Linear relationship with noise

# Save the generated data to data.csv
data = torch.cat((X, y), dim=1)
df = pd.DataFrame(data.numpy(), columns=['X', 'y'])
df.to_csv('data.csv', index=False)

import torch
import torch.nn as nn
import torch.optim as optim

import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

class LinearRegressionDataset(Dataset):
    def __init__(self, csv_file):
        # Load data from CSV file
        self.data = pd.read_csv(csv_file)
        self.X = torch.tensor(self.data['X'].values, dtype=torch.float32).view(-1, 1)
        self.y = torch.tensor(self.data['y'].values, dtype=torch.float32).view(-1, 1)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Example usage of the DataLoader
dataset = LinearRegressionDataset('data.csv')
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Define the Linear Regression Model
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(1, 1)  # Single input and single output

    def forward(self, x):
        return self.linear(x)

# Initialize the model, loss function, and optimizer
model = LinearRegressionModel()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
epochs = 1000
for epoch in range(epochs):
    for batch_X, batch_y in dataloader:
        # Forward pass
        predictions = model(batch_X)
        loss = criterion(predictions, batch_y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Log progress every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")

# Display the learned parameters
[w, b] = model.linear.parameters()
print(f"Learned weight: {w.item():.4f}, Learned bias: {b.item():.4f}")

# Testing on new data
X_test = torch.tensor([[4.0], [7.0]])
with torch.no_grad():
    predictions = model(X_test)
    print(f"Predictions for {X_test.tolist()}: {predictions.tolist()}")

Epoch [100/1000], Loss: 1.5655
Epoch [200/1000], Loss: 0.4624
Epoch [300/1000], Loss: 1.4614
Epoch [400/1000], Loss: 0.3983
Epoch [500/1000], Loss: 0.5415
Epoch [600/1000], Loss: 1.6767
Epoch [700/1000], Loss: 1.0075
Epoch [800/1000], Loss: 0.3245
Epoch [900/1000], Loss: 0.6541
Epoch [1000/1000], Loss: 1.6099
Learned weight: 1.9207, Learned bias: 3.2333
Predictions for [[4.0], [7.0]]: [[10.91616153717041], [16.67831039428711]]


In [2]:
#Strong LLM
import jax
import jax.numpy as jnp
import numpy as np
import pandas as pd


# Set the random seed and create a PRNG key
key = jax.random.PRNGKey(42)

# Generate synthetic data: 100 data points between 0 and 10, with noise
key, subkey = jax.random.split(key)
X = jax.random.uniform(subkey, shape=(100, 1)) * 10
key, subkey = jax.random.split(key)
noise = jax.random.normal(subkey, shape=(100, 1))
y = 2 * X + 3 + noise  # y = 2*x + 3 + noise

# Save the generated data to 'data.csv'
data = jnp.concatenate([X, y], axis=1)
df = pd.DataFrame(np.array(data), columns=['X', 'y'])
df.to_csv('data.csv', index=False)

# Define a simple Dataset class that loads data from CSV
class LinearRegressionDataset:
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        # Convert the pandas Series to JAX arrays (with float32 precision)
        self.X = jnp.array(self.data['X'].values, dtype=jnp.float32).reshape(-1, 1)
        self.y = jnp.array(self.data['y'].values, dtype=jnp.float32).reshape(-1, 1)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Define a DataLoader function to yield batches
def data_loader(dataset, batch_size, shuffle=True):
    n = len(dataset)
    indices = np.arange(n)
    if shuffle:
        np.random.shuffle(indices)
    for start_idx in range(0, n, batch_size):
        batch_indices = indices[start_idx:start_idx + batch_size]
        yield dataset.X[batch_indices], dataset.y[batch_indices]

# Create dataset and specify batch size
dataset = LinearRegressionDataset('data.csv')
batch_size = 32

# Initialize model parameters similar to nn.Linear(1, 1)
# Here we initialize the weight (shape (1,1)) and bias (shape (1,))
bound = 1.0  # Using a uniform distribution bound similar to PyTorch initialization
key, subkey = jax.random.split(key)
w = jax.random.uniform(subkey, shape=(1, 1), minval=-bound, maxval=bound)
key, subkey = jax.random.split(key)
b = jax.random.uniform(subkey, shape=(1,), minval=-bound, maxval=bound)
params = {"w": w, "b": b}

# Define the forward (prediction) function
def predict(params, x):
    return jnp.dot(x, params["w"]) + params["b"]

# Define the loss function (Mean Squared Error)
def loss_fn(params, x, y):
    preds = predict(params, x)
    return jnp.mean((preds - y) ** 2)

# Set hyperparameters
lr = 0.01
epochs = 1000

# Get function to compute loss and its gradients
loss_and_grad = jax.value_and_grad(loss_fn)

# Training loop over epochs and batches
for epoch in range(epochs):
    # Loop over batches using our custom data_loader
    for batch_X, batch_y in data_loader(dataset, batch_size, shuffle=True):
        loss, grads = loss_and_grad(params, batch_X, batch_y)
        # Update parameters using SGD
        params["w"] = params["w"] - lr * grads["w"]
        params["b"] = params["b"] - lr * grads["b"]
    
    # Log progress every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss:.4f}")


# Extract and print learned weight and bias
learned_w = params["w"][0, 0]
learned_b = params["b"][0]
print(f"Learned weight: {learned_w:.4f}, Learned bias: {learned_b:.4f}")

# Testing on new data
X_test = jnp.array([[4.0], [7.0]])
predictions = predict(params, X_test)
print(f"Predictions for {X_test.tolist()}: {predictions.tolist()}")


Epoch [100/1000], Loss: 1.6549
Epoch [200/1000], Loss: 1.5143
Epoch [300/1000], Loss: 0.4336
Epoch [400/1000], Loss: 0.7000
Epoch [500/1000], Loss: 0.2754
Epoch [600/1000], Loss: 0.6613
Epoch [700/1000], Loss: 1.1929
Epoch [800/1000], Loss: 1.9943
Epoch [900/1000], Loss: 2.2525
Epoch [1000/1000], Loss: 0.5789
Learned weight: 1.9570, Learned bias: 2.8438
Predictions for [[4.0], [7.0]]: [[10.671821594238281], [16.54287338256836]]


In [3]:
#Weak LLM
import jax
import jax.numpy as jnp  # MODIFIED: Ensure consistent import
from jax import grad, jit, random  # MODIFIED: PRNG keys usage
from flax import linen as nn
from flax.training import train_state
import optax

class SimpleNN(nn.Module):
    @nn.compact
    def __call__(self, x):
        x = nn.Dense(10)(x)
        return x

def create_train_state(rng, learning_rate):
    model = SimpleNN()
    params = model.init(rng, jnp.ones([1, 1]))  # Initialize with dummy input
    tx = optax.adam(learning_rate)
    return train_state.TrainState.create(apply_fn=model.apply, params=params, tx=tx)

@jit
def train_step(state, batch):
    def loss_fn(params):
        predictions = state.apply_fn(params, batch['x'])
        return jnp.mean((predictions - batch['y']) ** 2)

    grads = grad(loss_fn)(state.params)
    new_state = state.apply_gradients(grads=grads)
    return new_state

def main():
    rng = random.PRNGKey(0)  # Initialize PRNG key
    learning_rate = 0.001
    state = create_train_state(rng, learning_rate)
    
    # Example training loop (with dummy data)
    for epoch in range(10):
        batch = {'x': jnp.array([[1.0], [2.0]]), 'y': jnp.array([[2.0], [4.0]])}  # Dummy input and output
        state = train_step(state, batch)

    # Output learned parameters
    w = state.params['params']['Dense_0']['kernel'].flatten()[0]
    b = state.params['params']['Dense_0']['bias'].flatten()[0]
    print(f"Learned weight: {w:.4f}, Learned bias: {b:.4f}")

    # Testing on new data
    X_test = jnp.array([[4.0], [7.0]])
    predictions = state.apply_fn(state.params, X_test)
    print(f"Predictions for {X_test.tolist()}: {predictions.tolist()}")

if __name__ == "__main__":  # MODIFIED: Ensure entry point
    main()

Learned weight: 0.4034, Learned bias: 0.0100
Predictions for [[4.0], [7.0]]: [[1.6237629652023315, 1.9412795305252075, 1.1064223051071167, 1.2425427436828613, 4.557937145233154, -2.4911012649536133, -1.248417854309082, 0.6250647902488708, 0.5563428997993469, -1.6758862733840942], [2.83408784866333, 3.389741897583008, 1.928741455078125, 2.166952133178711, 7.968894958496094, -4.3669257164001465, -2.1922295093536377, 1.0863655805587769, 0.9661023020744324, -2.9402992725372314]]


In [None]:
"""
Error Code:
class SimpleNN(nn.Module):
    @nn.compact
    def __call__(self, x):
        x = nn.Dense(10)(x)
        return x
  
  
Error:
The original task has 1 output, the shape of the predicted value does not match the target value, and the loss cannot be calculated correctly


Fix Guide:
Change the shape of the predicted value to 1


Correct Code:
class SimpleNN(nn.Module):
    @nn.compact
    def __call__(self, x):
        x = nn.Dense(1)(x)
        return x
"""


"""
Error Code:
# Example training loop (with dummy data)
for epoch in range(10):
    batch = {'x': jnp.array([[1.0], [2.0]]), 'y': jnp.array([[2.0], [4.0]])}  # Dummy input and output
    state = train_step(state, batch)
        
        
Error:
Only fixed "dummy" data is used, and the part of loading synthetic data from CSV files and training by batches is missing, so it is impossible to achieve the same functionality as the original code


Fix Guide:
Add data loading functions and data loaders to read data from CSV files and train in batches
Call to load data and train in batches


Correct Code:
import pandas as pd
import numpy as np

def load_data(csv_file):
    df = pd.read_csv(csv_file)
    X = jnp.array(df['X'].values, dtype=jnp.float32).reshape(-1, 1)
    y = jnp.array(df['y'].values, dtype=jnp.float32).reshape(-1, 1)
    return X, y

def data_loader(X, y, batch_size, shuffle=True):
    n = X.shape[0]
    indices = np.arange(n)
    if shuffle:
        np.random.shuffle(indices)
    for start in range(0, n, batch_size):
        batch_idx = indices[start:start + batch_size]
        yield {'x': X[batch_idx], 'y': y[batch_idx]}
        

X, y = load_data('data.csv')
batch_size = 32
epochs = 1000

for epoch in range(epochs):
    for batch in data_loader(X, y, batch_size, shuffle=True):
        state, loss = train_step(state, batch)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss:.4f}")
"""


"""
Error Code:
grads = grad(loss_fn)(state.params)
new_state = state.apply_gradients(grads=grads)
return new_state


Error:
The current loss value is not returned in the training step, resulting in the inability to print log information to monitor the training process in the training loop


Fix Guide:
Modify the train_step function to return the updated state and the loss value of the current batch


Correct Code:
grads = grad(loss_fn)(state.params)
new_state = state.apply_gradients(grads=grads)
loss = loss_fn(state.params)
return new_state, loss
"""

In [5]:
#fixed
import jax
import jax.numpy as jnp  # MODIFIED: Ensure consistent import
from jax import grad, jit, random  # MODIFIED: PRNG keys usage
from flax import linen as nn
from flax.training import train_state
import optax
import pandas as pd
import numpy as np


def load_data(csv_file):
    df = pd.read_csv(csv_file)
    X = jnp.array(df['X'].values, dtype=jnp.float32).reshape(-1, 1)
    y = jnp.array(df['y'].values, dtype=jnp.float32).reshape(-1, 1)
    return X, y

def data_loader(X, y, batch_size, shuffle=True):
    n = X.shape[0]
    indices = np.arange(n)
    if shuffle:
        np.random.shuffle(indices)
    for start in range(0, n, batch_size):
        batch_idx = indices[start:start + batch_size]
        yield {'x': X[batch_idx], 'y': y[batch_idx]}

class SimpleNN(nn.Module):
    @nn.compact
    def __call__(self, x):
        x = nn.Dense(1)(x)
        return x

def create_train_state(rng, learning_rate):
    model = SimpleNN()
    params = model.init(rng, jnp.ones([1, 1]))  # Initialize with dummy input
    tx = optax.adam(learning_rate)
    return train_state.TrainState.create(apply_fn=model.apply, params=params, tx=tx)

@jit
def train_step(state, batch):
    def loss_fn(params):
        predictions = state.apply_fn(params, batch['x'])
        return jnp.mean((predictions - batch['y']) ** 2)

    grads = grad(loss_fn)(state.params)
    new_state = state.apply_gradients(grads=grads)
    loss = loss_fn(state.params)
    return new_state, loss

def main():
    rng = random.PRNGKey(0)  # Initialize PRNG key
    learning_rate = 0.001
    state = create_train_state(rng, learning_rate)
    
    X, y = load_data('data.csv')
    batch_size = 32
    epochs = 1000

    for epoch in range(epochs):
        for batch in data_loader(X, y, batch_size, shuffle=True):
            state, loss = train_step(state, batch)
        if (epoch + 1) % 100 == 0:
            print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss:.4f}")

    # Output learned parameters
    w = state.params['params']['Dense_0']['kernel'].flatten()[0]
    b = state.params['params']['Dense_0']['bias'].flatten()[0]
    print(f"Learned weight: {w:.4f}, Learned bias: {b:.4f}")

    # Testing on new data
    X_test = jnp.array([[4.0], [7.0]])
    predictions = state.apply_fn(state.params, X_test)
    print(f"Predictions for {X_test.tolist()}: {predictions.tolist()}")

if __name__ == "__main__":  # MODIFIED: Ensure entry point
    main()

Epoch [100/1000], Loss: 199.2036
Epoch [200/1000], Loss: 104.8455
Epoch [300/1000], Loss: 81.0222
Epoch [400/1000], Loss: 65.5604
Epoch [500/1000], Loss: 33.5759
Epoch [600/1000], Loss: 9.7464
Epoch [700/1000], Loss: 5.4672
Epoch [800/1000], Loss: 1.5301
Epoch [900/1000], Loss: 2.3061
Epoch [1000/1000], Loss: 1.0849
Learned weight: 2.0726, Learned bias: 2.2719
Predictions for [[4.0], [7.0]]: [[10.562368392944336], [16.78019905090332]]
