In [8]:
import clip_feature_extractor
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from scipy.stats import loguniform
import numpy as np

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
X_train_CIFAR100, y_train_CIFAR100, X_test_CIFAR100, y_test_CIFAR100 = clip_feature_extractor.get_CIFAR100_features()

Files already downloaded and verified
Files already downloaded and verified
Extracting features from CIFAR100 dataset
Loaded previously extracted features from disk.


In [None]:
# C = 0.316 provided by OpenAI

log_reg_CIFAR100 = LogisticRegression(random_state=0, max_iter=1000, C=0.316, n_jobs=-1)
log_reg_CIFAR100.fit(X_train_CIFAR100, y_train_CIFAR100)
predictions_CIFAR100 = log_reg_CIFAR100.predict(X_test_CIFAR100)
accuracy_CIFAR100 = np.mean((y_test_CIFAR100 == predictions_CIFAR100).astype(float)) * 100.
print(f"Accuracy = {accuracy_CIFAR100:.3f}")


Accuracy = 80.020


In [6]:
X_train_CIFAR10, y_train_CIFAR10, X_test_CIFAR10, y_test_CIFAR10 = clip_feature_extractor.get_CIFAR10_features()

Files already downloaded and verified
Files already downloaded and verified
Loaded features from disk.


In [17]:
# Create a pipeline with data scaling and logistic regression
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression(max_iter=1000))
])

# Define the parameter distributions with valid solver-penalty combinations
param_distributions = [
    {
        'logreg__penalty': ['l1'],
        'logreg__C': loguniform(1e-4, 1e4),
        'logreg__solver': ['liblinear', 'saga'],
    },
    {
        'logreg__penalty': ['l2'],
        'logreg__C': loguniform(1e-4, 1e4),
        'logreg__solver': ['lbfgs', 'liblinear', 'sag', 'saga'],
    },
    {
        'logreg__penalty': ['elasticnet'],
        'logreg__C': loguniform(1e-4, 1e4),
        'logreg__solver': ['saga'],
        'logreg__l1_ratio': np.linspace(0, 1, 10),
    },
    {
        'logreg__penalty': ['none'],
        'logreg__solver': ['lbfgs', 'sag', 'saga'],
    }
]

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_distributions,
    n_iter=25,
    cv=2,
    scoring='accuracy',
    random_state=42,
    n_jobs=-1,
    verbose=2
)

# Fit the model on the training data
random_search.fit(X_train_CIFAR10, y_train_CIFAR10)

# Output the best parameters and score
print("Best parameters found: ", random_search.best_params_)
print("Best cross-validation score: ", random_search.best_score_)

# Evaluate the best model on the test set
best_model = random_search.best_estimator_
test_score = best_model.score(X_test_CIFAR10, y_test_CIFAR10)
print("Test set accuracy: ", test_score)

Fitting 2 folds for each of 25 candidates, totalling 50 fits
[CV 2/2] END logreg__C=0.0006306658668123951, logreg__l1_ratio=0.7777777777777777, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.898 total time=  14.7s
[CV 1/2] END logreg__C=0.0006306658668123951, logreg__l1_ratio=0.7777777777777777, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.891 total time=  14.9s
[CV 2/2] END logreg__C=0.00014610865886287216, logreg__l1_ratio=0.1111111111111111, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.919 total time=  20.7s
[CV 1/2] END logreg__penalty=none, logreg__solver=sag;, score=nan total time=   0.5s
[CV 2/2] END logreg__penalty=none, logreg__solver=sag;, score=nan total time=   0.5s
[CV 1/2] END logreg__C=0.00014610865886287216, logreg__l1_ratio=0.1111111111111111, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.918 total time=  23.3s
[CV 1/2] END logreg__C=0.00010144487859320233, logreg__penalty=l2, logreg__solver=saga;, score=0.935 total time



[CV 1/2] END logreg__C=235.69148616733443, logreg__l1_ratio=0.7777777777777777, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.925 total time=28.7min




[CV 2/2] END logreg__C=235.69148616733443, logreg__l1_ratio=0.7777777777777777, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.928 total time=28.7min
[CV 1/2] END logreg__C=0.00011390176182186649, logreg__penalty=l2, logreg__solver=lbfgs;, score=0.935 total time=   2.1s
[CV 2/2] END logreg__C=0.00011390176182186649, logreg__penalty=l2, logreg__solver=lbfgs;, score=0.939 total time=   2.1s
[CV 2/2] END logreg__C=1.5783280762132288, logreg__penalty=l1, logreg__solver=liblinear;, score=0.943 total time= 2.4min
[CV 1/2] END logreg__C=1.5783280762132288, logreg__penalty=l1, logreg__solver=liblinear;, score=0.942 total time= 2.5min
[CV 2/2] END logreg__C=0.0002362246997776369, logreg__penalty=l2, logreg__solver=saga;, score=0.943 total time=   7.9s
[CV 1/2] END logreg__penalty=none, logreg__solver=saga;, score=nan total time=   0.5s
[CV 1/2] END logreg__C=0.0002362246997776369, logreg__penalty=l2, logreg__solver=saga;, score=0.939 total time=   9.5s
[CV 2/2] END logreg__penalty=no



[CV 2/2] END logreg__C=5.953896264004558, logreg__penalty=l1, logreg__solver=saga;, score=0.930 total time=31.6min




[CV 1/2] END logreg__C=6.4405075539937195, logreg__penalty=l1, logreg__solver=saga;, score=0.927 total time=31.7min
[CV 1/2] END logreg__C=0.0005307029188745615, logreg__penalty=l2, logreg__solver=liblinear;, score=0.936 total time=  26.3s
[CV 1/2] END logreg__penalty=none, logreg__solver=lbfgs;, score=nan total time=   0.5s
[CV 2/2] END logreg__penalty=none, logreg__solver=lbfgs;, score=nan total time=   0.5s
[CV 2/2] END logreg__C=0.0005307029188745615, logreg__penalty=l2, logreg__solver=liblinear;, score=0.939 total time=  27.3s




[CV 1/2] END logreg__C=5.953896264004558, logreg__penalty=l1, logreg__solver=saga;, score=0.927 total time=31.8min




[CV 1/2] END logreg__C=8.706669857047757, logreg__penalty=l1, logreg__solver=saga;, score=0.927 total time=31.5min




[CV 2/2] END logreg__C=8.706669857047757, logreg__penalty=l1, logreg__solver=saga;, score=0.930 total time=31.6min
[CV 1/2] END logreg__penalty=none, logreg__solver=lbfgs;, score=nan total time=   0.6s
[CV 2/2] END logreg__penalty=none, logreg__solver=lbfgs;, score=nan total time=   0.5s
[CV 1/2] END logreg__penalty=none, logreg__solver=sag;, score=nan total time=   0.5s
[CV 2/2] END logreg__penalty=none, logreg__solver=sag;, score=nan total time=   0.5s




[CV 2/2] END logreg__C=6.4405075539937195, logreg__penalty=l1, logreg__solver=saga;, score=0.930 total time=32.6min
[CV 1/2] END logreg__C=0.12115239957904832, logreg__penalty=l2, logreg__solver=liblinear;, score=0.945 total time= 1.1min
[CV 2/2] END logreg__C=0.12115239957904832, logreg__penalty=l2, logreg__solver=liblinear;, score=0.950 total time= 1.1min
[CV 1/2] END logreg__C=0.0070331617412763316, logreg__penalty=l1, logreg__solver=saga;, score=0.932 total time=  37.2s
[CV 2/2] END logreg__C=0.0070331617412763316, logreg__penalty=l1, logreg__solver=saga;, score=0.934 total time=  34.3s
[CV 1/2] END logreg__C=0.11473500268396371, logreg__l1_ratio=0.3333333333333333, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.945 total time=11.3min
[CV 1/2] END logreg__C=0.0024373462416671784, logreg__l1_ratio=0.0, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.947 total time=  38.6s
[CV 2/2] END logreg__C=0.0024373462416671784, logreg__l1_ratio=0.0, logreg__penalty=elastic



[CV 2/2] END logreg__C=0.4017852745189947, logreg__penalty=l1, logreg__solver=saga;, score=0.945 total time=26.6min




[CV 1/2] END logreg__C=757.7453045410584, logreg__l1_ratio=0.6666666666666666, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.925 total time=29.7min




[CV 2/2] END logreg__C=757.7453045410584, logreg__l1_ratio=0.6666666666666666, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.928 total time=30.2min




[CV 2/2] END logreg__C=29.24880658395848, logreg__l1_ratio=0.7777777777777777, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.928 total time=30.6min




[CV 1/2] END logreg__C=29.24880658395848, logreg__l1_ratio=0.7777777777777777, logreg__penalty=elasticnet, logreg__solver=saga;, score=0.926 total time=30.9min


14 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
8 fits failed with the following error:
Traceback (most recent call last):
  File "/home/tonyxdsu/.local/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/tonyxdsu/.local/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tonyxdsu/.local/lib/python3.12/site-packages/sklearn/pipeline.py", line 473, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  File "/home/tonyxdsu/.local/lib/python3.12/sit

Best parameters found:  {'logreg__C': np.float64(0.0024373462416671784), 'logreg__l1_ratio': np.float64(0.0), 'logreg__penalty': 'elasticnet', 'logreg__solver': 'saga'}
Best cross-validation score:  0.9486
Test set accuracy:  0.9492
