In [33]:
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

class MiniBatchSGDRegressor:
    def __init__(self):
        self.learning_rate = 0.01
        self.n_iter = 100
        self.batch_size = 32
        self.model = Pipeline([
            ('scaler', StandardScaler()),
            ('regressor', SGDRegressor(
                learning_rate='constant',
                eta0=0.01,
                n_iter_no_change=5,
                warm_start=True
                ))
        ])

    def partial_fit(self, pipeline, X, y):
        X = pipeline.named_steps['scaler'].fit_transform(X)
        pipeline.named_steps['regressor'].partial_fit(X, y)

    def iterate_minibatches(self, X, y):
        for i in range(0, len(X), self.batch_size):
            yield X[i:i + self.batch_size], y[i:i + self.batch_size]

    def fit(self, X_train, y_train):
        for epoch in range(self.n_iter):
            for X_batch, y_batch in self.iterate_minibatches(X_train, y_train):
                self.partial_fit(self.model, X_batch, y_batch)
    


# https://stackoverflow.com/questions/17931865/using-partial-fit-with-scikit-pipeline

    def predict(self, X_test):
        return self.model.predict(X_test)

# Example usage:
# Assuming you have X_train, y_train, X_test
# model = MiniBatchSGDRegressor()
# model.fit(X_train, y_train)
# predictions = model.predict(X_test)


In [34]:
# Import packages

import pandas as pd
import numpy as np
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error, r2_score
from util import dict_train_test_split

In [35]:
%load_ext memory_profiler

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler


In [37]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Set random seed for reproducibility
np.random.seed(42)

# Generate sample data
X = 2 * np.random.rand(1000, 1)
y = 4 + 3 * X + np.random.randn(1000, 1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = y_train.ravel()

# Example usage:
model = MiniBatchSGDRegressor()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')


Mean Squared Error: 1.0294120372968558
