In [14]:
from sklearn.datasets import load_iris
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.decomposition import TruncatedSVD
import numpy as np

In [2]:
# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
# Train and test the model without applying SVD
print("Model without applying SVD:")
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Classification Report without SVD:")
print(classification_report(y_test, y_pred))
accuracy_without_svd = accuracy_score(y_test, y_pred)


Model without applying SVD:
Classification Report without SVD:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [11]:
# Apply SVD
svd = TruncatedSVD(n_components=2)
X_train_svd = svd.fit_transform(X_train)
X_test_svd = svd.transform(X_test)

# Train and test the model after applying SVD
print("\nModel after applying SVD:")
model_svd = LogisticRegression()
model_svd.fit(X_train_svd, y_train)
y_pred_svd = model_svd.predict(X_test_svd)
print("Classification Report with SVD:")
print(classification_report(y_test, y_pred_svd))
accuracy_with_svd = accuracy_score(y_test, y_pred_svd)

# Calculate variance explained by SVD components
variance_explained = svd.explained_variance_ratio_.sum()

# Print variance and accuracy
print("\nVariance explained by SVD components:", variance_explained)
print("Accuracy without SVD:", accuracy_without_svd)
print("Accuracy with SVD:", accuracy_with_svd)


Model after applying SVD:
Classification Report with SVD:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


Variance explained by SVD components: 0.9763362752389951
Accuracy without SVD: 1.0
Accuracy with SVD: 1.0


Model without applying SVD:

Classification Report without SVD: This section presents the precision, recall, and F1-score for each class, along with the support (number of instances) for each class in the test set. In this case, all metrics are perfect (1.0), indicating that the model correctly classified all instances of each class. The overall accuracy is also 1.0, meaning that all predictions made by the model are correct.


Model after applying SVD:

Classification Report with SVD: Similar to the previous section, this section presents the precision, recall, and F1-score for each class, along with the support for each class in the test set. Again, all metrics are perfect (1.0), indicating that the model correctly classified all instances of each class. The overall accuracy is also 1.0, indicating that all predictions made by the model after applying SVD are correct.


Variance explained by SVD components:

This section provides the variance explained by the SVD components. In this case, the two SVD components explain approximately 97.6% of the variance in the original dataset. This indicates that these components capture most of the variability in the data, despite reducing the dimensionality.

**High Dimensinal**

In [16]:
# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target.astype(int)

  warn(


In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


In [24]:
# Train and test the model without applying SVD
print("Model without applying SVD:")
model = LogisticRegression(max_iter=10)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Classification Report without SVD:")
print(classification_report(y_test, y_pred))
accuracy_without_svd = accuracy_score(y_test, y_pred)

Model without applying SVD:
Classification Report without SVD:
              precision    recall  f1-score   support

           0       0.95      0.94      0.94      2267
           1       0.93      0.97      0.95      2603
           2       0.87      0.86      0.87      2350
           3       0.86      0.86      0.86      2383
           4       0.88      0.91      0.89      2144
           5       0.88      0.72      0.79      2107
           6       0.90      0.94      0.92      2294
           7       0.90      0.90      0.90      2455
           8       0.80      0.84      0.82      2196
           9       0.85      0.87      0.86      2301

    accuracy                           0.88     23100
   macro avg       0.88      0.88      0.88     23100
weighted avg       0.88      0.88      0.88     23100



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [25]:
# Apply SVD
svd = TruncatedSVD(n_components=50)
X_train_svd = svd.fit_transform(X_train)
X_test_svd = svd.transform(X_test)

# Train and test the model after applying SVD
print("\nModel after applying SVD:")
model_svd = LogisticRegression(max_iter=10)
model_svd.fit(X_train_svd, y_train)
y_pred_svd = model_svd.predict(X_test_svd)
print("Classification Report with SVD:")
print(classification_report(y_test, y_pred_svd))
accuracy_with_svd = accuracy_score(y_test, y_pred_svd)

# Calculate variance explained by SVD components
variance_explained = svd.explained_variance_ratio_.sum()

# Print variance and accuracy
print("\nVariance explained by SVD components:", variance_explained)
print("Accuracy without SVD:", accuracy_without_svd)
print("Accuracy with SVD:", accuracy_with_svd)


Model after applying SVD:
Classification Report with SVD:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94      2267
           1       0.93      0.96      0.94      2603
           2       0.87      0.86      0.86      2350
           3       0.85      0.86      0.85      2383
           4       0.87      0.90      0.89      2144
           5       0.87      0.71      0.78      2107
           6       0.89      0.93      0.91      2294
           7       0.90      0.90      0.90      2455
           8       0.79      0.83      0.81      2196
           9       0.84      0.86      0.85      2301

    accuracy                           0.88     23100
   macro avg       0.88      0.87      0.87     23100
weighted avg       0.88      0.88      0.88     23100


Variance explained by SVD components: 0.8255508394529177
Accuracy without SVD: 0.8822943722943722
Accuracy with SVD: 0.8774458874458875


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Without SVD:

 The accuracy score indicates the proportion of correctly classified instances out of the total number of instances. Here, the accuracy is 1.0, meaning all predictions made by the model are correct.

With SVD:

Accuracy with SVD: The accuracy score is again 1.0, indicating perfect classification performance.