In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Load the dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist['data'], mnist['target']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess the data by scaling it
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the RBM model
rbm = BernoulliRBM(n_components=256, learning_rate=0.01, n_iter=5, verbose=1)

# Initialize the logistic regression model with increased max_iter and class weights
logistic = LogisticRegression(max_iter=200, class_weight='balanced')

# Create a pipeline that first extracts features using the RBM and then classifies with logistic regression
rbm_pipeline = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])

# Train the pipeline
rbm_pipeline.fit(X_train_scaled, y_train)

# Make predictions
y_pred = rbm_pipeline.predict(X_test_scaled)

# Print the classification report
print("Logistic Regression using RBM features:\n", classification_report(y_test, y_pred))

# Evaluate the model on the test set
rbm_score = rbm_pipeline.score(X_test_scaled, y_test)
print("RBM Classification score:", rbm_score)

# Check the distribution of classes
print("Training set class distribution:\n", pd.Series(y_train).value_counts())
print("Test set class distribution:\n", pd.Series(y_test).value_counts())
