Import Libraries and Environment Setup

In [62]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import time


Data Loading

In [63]:
digits = datasets.load_digits()
X = digits.data
y = digits.target
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=32, shuffle=False)


Model Building : Logistic Regression

In [64]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=10000)
model.fit(X_train, y_train)
accuracy = model.score(X_test, y_test)
print(f"Original Model Accuracy: {accuracy * 1:.2f}")

Original Model Accuracy: 0.97


Logistic_Regression Model Size

In [65]:
import joblib
import os
joblib.dump(model, "logistic_regression_model.pkl")
model_size = os.path.getsize("logistic_regression_model.pkl")
print(f"Original Model Size: {model_size / 1024:.2f} KB")

Original Model Size: 5.98 KB


Inference time

In [66]:
start_time = time.time()
predictions = model.predict(X_test)
end_time = time.time()
inference_time = end_time - start_time
print(f"Original Model Inference Time: {inference_time:.6f} seconds")

Original Model Inference Time: 0.001176 seconds


Quantization Function

In [67]:
def quantize_model(model, scale_factor):
    quantized_weights = np.round(model.coef_ * scale_factor).astype(np.int8)
    return quantized_weights


Quantized Model Inference Time and Model Accuracy

In [68]:
def quantized_inference(X, quantized_weights, scale_factor):
    scaled_weights = quantized_weights.astype(np.float32) / scale_factor
    logits = np.dot(X, scaled_weights.T) + model.intercept_
    return np.argmax(logits, axis=1)
scale_factor = 2 ** 7
quantized_weights = quantize_model(model, scale_factor)
start_time = time.time()
quantized_predictions = quantized_inference(X_test, quantized_weights, scale_factor)
end_time = time.time()
quantized_inference_time = end_time - start_time
print(f"Quantized Model Inference Time: {quantized_inference_time:.6f} seconds")
quantized_accuracy = np.mean(quantized_predictions == y_test)
print(f"Quantized Model Accuracy: {quantized_accuracy * 100:.2f}%")


Quantized Model Inference Time: 0.001360 seconds
Quantized Model Accuracy: 67.22%


Quantized Model Size

In [69]:
quantized_model_size = quantized_weights.nbytes
print(f"Quantized Model Size: {quantized_model_size / 1024:.2f} KB")

Quantized Model Size: 0.62 KB


Report of Quantized vs Original Model

In [70]:
print(f"Original Model Size: {model_size / 1024:.2f} KB")
print(f"Quantized Model Size: {quantized_model_size / 1024:.2f} KB")
print(f"Original Inference Time: {inference_time:.6f} seconds")
print(f"Quantized Inference Time: {quantized_inference_time:.6f} seconds")
print(f"Original Model Accuracy: {accuracy * 100:.2f}%")
print(f"Quantized Model Accuracy: {quantized_accuracy * 100:.2f}%")

Original Model Size: 5.98 KB
Quantized Model Size: 0.62 KB
Original Inference Time: 0.001176 seconds
Quantized Inference Time: 0.001360 seconds
Original Model Accuracy: 97.22%
Quantized Model Accuracy: 67.22%
