## PCA code from scratch

In [20]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [4]:
# Task 1: Load Breast Cancer Dataset
data = load_breast_cancer()
X = data.data
y = data.target

In [5]:
# Task 2: Print shape and first few rows
print("Dataset shape:", X.shape)
print("First few rows of the dataset:")
print(pd.DataFrame(X, columns=data.feature_names).head())

Dataset shape: (569, 30)
First few rows of the dataset:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  wors

In [6]:
# Task 3: Define features and target
features = X
target = y

In [7]:
# Task 4: Print the feature names
print("Feature names:")
print(data.feature_names)

Feature names:
['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


In [8]:
# Task 5: Standardize the input
scaler = StandardScaler()
features = scaler.fit_transform(features)

In [9]:
# Task 6: Print the mean and std of the transformed input
print("Mean of standardized features:", np.mean(features))
print("Standard deviation of standardized features:", np.std(features))

Mean of standardized features: -6.118909323768877e-16
Standard deviation of standardized features: 1.0


In [10]:
# Task 7: Define the number of principal components to 2
n_components = 2

In [11]:
# Task 8: Fit the input into PCA
covariance_matrix = np.cov(features.T)
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
eigen_pairs = [(np.abs(eigenvalues[i]), eigenvectors[:, i]) for i in range(len(eigenvalues))]
eigen_pairs.sort(key=lambda x: x[0], reverse=True)
w = np.hstack((eigen_pairs[i][1].reshape(len(eigenvalues), 1)) for i in range(n_components))

  w = np.hstack((eigen_pairs[i][1].reshape(len(eigenvalues), 1)) for i in range(n_components))


In [12]:
# Task 9: Print the variance ratio of the components
variance_ratio = eigenvalues / sum(eigenvalues)
print("Variance ratio of the components:")
print(variance_ratio)

Variance ratio of the components:
[4.42720256e-01 1.89711820e-01 9.39316326e-02 6.60213492e-02
 5.49576849e-02 4.02452204e-02 2.25073371e-02 1.58872380e-02
 1.38964937e-02 1.16897819e-02 9.79718988e-03 8.70537901e-03
 8.04524987e-03 5.23365745e-03 3.13783217e-03 2.66209337e-03
 1.97996793e-03 1.75395945e-03 1.64925306e-03 4.43482743e-06
 2.49601032e-05 5.29779290e-05 2.30015463e-04 2.72587995e-04
 5.16042379e-04 6.01833567e-04 8.11361259e-04 9.14646751e-04
 1.03864675e-03 9.99096464e-04]


In [13]:
# Task 10: Divide the dataset into training set and testing set
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.33, random_state=42)

In [14]:
# Task 11: Build neural networks for dataset without PCA and with PCA
class NeuralNetwork:
    def __init__(self, input_dim, hidden_dims, output_dim):
        self.weights = []
        self.biases = []
        self.num_layers = len(hidden_dims) + 1

        # Initialize weights and biases for hidden layers
        for i in range(self.num_layers - 1):
            if i == 0:
                self.weights.append(np.random.randn(input_dim, hidden_dims[i]))
            else:
                self.weights.append(np.random.randn(hidden_dims[i-1], hidden_dims[i]))
            self.biases.append(np.zeros(hidden_dims[i]))

        # Initialize weights and biases for the output layer
        self.weights.append(np.random.randn(hidden_dims[-1], output_dim))
        self.biases.append(np.zeros(output_dim))

    def forward(self, X):
        activations = []
        for i in range(self.num_layers):
            if i == 0:
                activations.append(self.relu(np.dot(X, self.weights[i]) + self.biases[i]))
            else:
                activations.append(self.relu(np.dot(activations[i-1], self.weights[i]) + self.biases[i]))
        return activations[-1]

    @staticmethod
    def relu(x):
        return np.maximum(0, x)

    def predict(self, X):
        output = self.forward(X)
        return np.round(output)

    def get_accuracy(self, X, y):
        predictions = self.predict(X)
        accuracy = np.mean(predictions == y)
        return accuracy

In [15]:
# Task 12: Total layers – 1 input layer, 2 hidden layers of 500 neurons with ReLU activation, 1 output layer with 1 neuron and sigmoid activation
input_dim = X_train.shape[1]
hidden_dims = [500, 500]
output_dim = 1

In [16]:
# Without PCA
nn_without_pca = NeuralNetwork(input_dim, hidden_dims, output_dim)

# With PCA
nn_with_pca = NeuralNetwork(n_components, hidden_dims, output_dim)

In [17]:
# Task 13: Compile the model with loss as Cross entropy, adam optimizer, and accuracy metric
class BinaryCrossEntropyLoss:
    @staticmethod
    def loss(y_true, y_pred):
        epsilon = 1e-10
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        loss = -y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred)
        return loss

    @staticmethod
    def gradient(y_true, y_pred):
        epsilon = 1e-10
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        gradient = (y_pred - y_true) / (y_pred * (1 - y_pred))
        return gradient


class AdamOptimizer:
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = None
        self.v = None
        self.t = 0

    def update(self, weights, biases, gradients):
        if self.m is None and self.v is None:
            self.m = [np.zeros_like(weight) for weight in weights]
            self.v = [np.zeros_like(weight) for weight in weights]

        self.t += 1
        for i in range(len(weights)):
            self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * gradients[i]
            self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * np.square(gradients[i])

            m_hat = self.m[i] / (1 - self.beta1 ** self.t)
            v_hat = self.v[i] / (1 - self.beta2 ** self.t)

            weights[i] -= self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)
            biases[i] -= self.learning_rate * np.mean(m_hat / (np.sqrt(v_hat) + self.epsilon))


def train_network(nn, X_train, y_train, num_epochs=100, batch_size=32, learning_rate=0.001):
    loss_func = BinaryCrossEntropyLoss()
    optimizer = AdamOptimizer(learning_rate=learning_rate)

    num_samples = X_train.shape[0]
    num_batches = num_samples // batch_size

    for epoch in range(num_epochs):
        for batch in range(num_batches):
            start = batch * batch_size
            end = start + batch_size
            X_batch = X_train[start:end]
            y_batch = y_train[start:end]

            activations = []
            activations.append(X_batch)
            output = nn.forward(X_batch)
            activations.append(output)

            loss = loss_func.loss(y_batch, output)
            gradient = loss_func.gradient(y_batch, output)

            for i in range(nn.num_layers - 1, -1, -1):
                if i == nn.num_layers - 1:
                    delta = gradient
                else:
                    delta = np.dot(delta, nn.weights[i + 1].T) * (activations[i] > 0)

                weight_gradient = np.dot(activations[i].T, delta)
                bias_gradient = np.mean(delta, axis=0)

                optimizer.update(nn.weights, nn.biases, [weight_gradient, bias_gradient])

    return nn

In [18]:
# Task 14: Print the accuracy for both networks
num_epochs = 100
batch_size = 32
learning_rate = 0.001

nn_without_pca = train_network(nn_without_pca, X_train, y_train, num_epochs, batch_size, learning_rate)
nn_with_pca = train_network(nn_with_pca, X_train @ w, y_train, num_epochs, batch_size, learning_rate)

accuracy_without_pca = nn_without_pca.get_accuracy(X_test, y_test)
accuracy_with_pca = nn_with_pca.get_accuracy(X_test @ w, y_test)

print("Accuracy without PCA:", accuracy_without_pca)
print("Accuracy with PCA:", accuracy_with_pca)

IndexError: list index out of range

## using PCA library

In [21]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA


# Task 1: Load Breast Cancer Dataset
data = load_breast_cancer()
X = data.data
y = data.target


# Task 2: Print shape and first few rows
print("Dataset shape:", X.shape)
print("First few rows of the dataset:")
print(pd.DataFrame(X, columns=data.feature_names).head())


# Task 3: Define features and target
features = X
target = y


# Task 4: Print the feature names
print("Feature names:")
print(data.feature_names)


# Task 5: Standardize the input
scaler = StandardScaler()
features = scaler.fit_transform(features)


# Task 6: Print the mean and std of the transformed input
print("Mean of standardized features:", np.mean(features))
print("Standard deviation of standardized features:", np.std(features))


# Task 7: Define the number of principal components to 2
n_components = 2


# Task 8: Fit the input into PCA
pca = PCA(n_components=n_components)
principal_components = pca.fit_transform(features)


# Task 9: Print the variance ratio of the components
variance_ratio = pca.explained_variance_ratio_
print("Variance ratio of the components:")
print(variance_ratio)


# Task 10: Divide the dataset into training set and testing set
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.33, random_state=42)


# Task 11: Build neural networks for dataset without PCA and with PCA
# No neural network code in this version


# Task 12: Print the accuracy for both networks
print("Accuracy without PCA: N/A")
print("Accuracy with PCA: N/A")

Dataset shape: (569, 30)
First few rows of the dataset:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  wors

In [22]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Load Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Print shape and first few rows
print("Data shape:", X.shape)
print("First few rows of data:\n", pd.DataFrame(X, columns=data.feature_names).head())

# Define features and target
features = data.feature_names
target = data.target_names

# Print feature names
print("Feature names:")
for feature in features:
    print("-", feature)

# Standardize the input features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Print mean and std of transformed input
print("Mean of transformed input:", np.mean(X_scaled))
print("Standard deviation of transformed input:", np.std(X_scaled))

# Define the number of principal components to 2
n_components = 2

# Fit the input to PCA
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)

# Print variance ratio of the components
print("Variance ratio of the components:", pca.explained_variance_ratio_)

# Divide the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)

# Build neural network without PCA
model_without_pca = MLPClassifier(hidden_layer_sizes=(500, 500), activation='relu', random_state=42)
model_without_pca.fit(X_train, y_train)
y_pred_without_pca = model_without_pca.predict(X_test)
accuracy_without_pca = accuracy_score(y_test, y_pred_without_pca)
print("Accuracy without PCA:", accuracy_without_pca)

# Build neural network with PCA
X_pca_train, X_pca_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.33, random_state=42)
model_with_pca = MLPClassifier(hidden_layer_sizes=(500, 500), activation='relu', random_state=42)
model_with_pca.fit(X_pca_train, y_train)
y_pred_with_pca = model_with_pca.predict(X_pca_test)
accuracy_with_pca = accuracy_score(y_test, y_pred_with_pca)
print("Accuracy with PCA:", accuracy_with_pca)


Data shape: (569, 30)
First few rows of data:
    mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst radius 



In [25]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.decomposition import PCA
from tensorflow import keras
from tensorflow.keras import layers

# Step 1: Load Breast Cancer Dataset
data = load_breast_cancer()

# Step 2: Print shape and first few rows
print("Dataset Shape:", data.data.shape)
print("First few rows of the dataset:")
print(pd.DataFrame(data.data, columns=data.feature_names).head())

# Step 3: Define features and target
X = data.data
y = data.target

# Step 4: Print feature names
print("Feature Names:")
print(data.feature_names)

# Step 5: Use StandardScaler() to transform the input
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 6: Print the mean and std of the transformed input
print("Mean of the Transformed Input:")
print(np.mean(X_scaled, axis=0))
print("Standard Deviation of the Transformed Input:")
print(np.std(X_scaled, axis=0))

# Step 7: Define the number of principal components to 2
n_components = 2

# Step 8: Fit the input into PCA
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)

# Step 9: Print the variance ratio of the components
print("Variance Ratio of the Components:")
print(pca.explained_variance_ratio_)

# Step 10: Divide the dataset into training set and testing set (0.33)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)

# Step 11: Build neural networks for dataset without PCA and with PCA
input_shape = X_train.shape[1]
num_classes = len(np.unique(y_train))

# Without PCA
model_no_pca = keras.Sequential([
    layers.Dense(500, activation='relu', input_shape=(input_shape,)),
    layers.Dense(500, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])

# With PCA
input_shape_pca = X_pca.shape[1]
model_with_pca = keras.Sequential([
    layers.Dense(500, activation='relu', input_shape=(input_shape_pca,)),
    layers.Dense(500, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])

# Step 12: Compile the models
model_no_pca.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_with_pca.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# One-hot encode the target variables
encoder = OneHotEncoder(categories='auto')
y_train_encoded = encoder.fit_transform(y_train.reshape(-1, 1)).toarray()
y_test_encoded = encoder.transform(y_test.reshape(-1, 1)).toarray()

# Step 13: Print the accuracy for both networks
batch_size = 128
epochs = 20

# Without PCA
model_no_pca.fit(X_train, y_train_encoded, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test_encoded))
_, accuracy_no_pca = model_no_pca.evaluate(X_test, y_test_encoded)
print("Accuracy without PCA:", accuracy_no_pca)

# With PCA
model_with_pca.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_with_pca.fit(X_pca, y_train_encoded, batch_size=batch_size, epochs=epochs, validation_split=0.2)
_, accuracy_with_pca = model_with_pca.evaluate(pca.transform(X_test), y_test_encoded)
print("Accuracy with PCA:", accuracy_with_pca)


Dataset Shape: (569, 30)
First few rows of the dataset:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  wors

ValueError: Data cardinality is ambiguous:
  x sizes: 455
  y sizes: 381
Make sure all arrays contain the same number of samples.

In [28]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.decomposition import PCA
from tensorflow import keras
from tensorflow.keras import layers

# Step 1: Load Breast Cancer Dataset
data = load_breast_cancer()

# Step 2: Print shape and first few rows
print("Dataset Shape:", data.data.shape)
print("First few rows of the dataset:")
print(pd.DataFrame(data.data, columns=data.feature_names).head())

# Step 3: Define features and target
X = data.data
y = data.target

# Step 4: Print feature names
print("Feature Names:")
print(data.feature_names)

# Step 5: Use StandardScaler() to transform the input
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 6: Print the mean and std of the transformed input
print("Mean of the Transformed Input:")
print(np.mean(X_scaled, axis=0))
print("Standard Deviation of the Transformed Input:")
print(np.std(X_scaled, axis=0))

# Step 7: Define the number of principal components to 2
n_components = 2

# Step 8: Fit the input into PCA
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)

# Step 9: Print the variance ratio of the components
print("Variance Ratio of the Components:")
print(pca.explained_variance_ratio_)

# Step 10: Divide the dataset into training set and testing set (0.33)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)

# Step 11: Build neural networks for dataset without PCA and with PCA
input_shape = X_train.shape[1]
num_classes = len(np.unique(y_train))

# Without PCA
model_no_pca = keras.Sequential([
    layers.Dense(500, activation='relu', input_shape=(input_shape,)),
    layers.Dense(500, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])

# With PCA
input_shape_pca = X_pca.shape[1]
model_with_pca = keras.Sequential([
    layers.Dense(500, activation='relu', input_shape=(input_shape_pca,)),
    layers.Dense(500, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])

# Step 12: Compile the models
model_no_pca.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_with_pca.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# One-hot encode the target variables
encoder = OneHotEncoder(categories='auto')
y_train_encoded = encoder.fit_transform(y_train.reshape(-1, 1)).toarray()
y_test_encoded = encoder.transform(y_test.reshape(-1, 1)).toarray()

# Step 13: Print the accuracy for both networks
batch_size = 128
epochs = 20

# Without PCA
model_no_pca.fit(X_train, y_train_encoded, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test_encoded))
_, accuracy_no_pca = model_no_pca.evaluate(X_test, y_test_encoded)
print("Accuracy without PCA:", accuracy_no_pca)

# With PCA
model_with_pca.fit(X_pca, y_train_encoded, batch_size=batch_size, epochs=epochs, validation_split=0.33)
_, accuracy_with_pca = model_with_pca.evaluate(pca.transform(X_test), y_test_encoded)
print("Accuracy with PCA:", accuracy_with_pca)


Dataset Shape: (569, 30)
First few rows of the dataset:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  wors

ValueError: Data cardinality is ambiguous:
  x sizes: 188
  y sizes: 0
Make sure all arrays contain the same number of samples.

In [29]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.decomposition import PCA
from tensorflow import keras
from tensorflow.keras import layers

# Step 1: Load Breast Cancer Dataset
data = load_breast_cancer()

# Step 2: Print shape and first few rows
print("Dataset Shape:", data.data.shape)
print("First few rows of the dataset:")
print(pd.DataFrame(data.data, columns=data.feature_names).head())

# Step 3: Define features and target
X = data.data
y = data.target

# Step 4: Print feature names
print("Feature Names:")
print(data.feature_names)

# Step 5: Use StandardScaler() to transform the input
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 6: Print the mean and std of the transformed input
print("Mean of the Transformed Input:")
print(np.mean(X_scaled, axis=0))
print("Standard Deviation of the Transformed Input:")
print(np.std(X_scaled, axis=0))

# Step 7: Define the number of principal components to 2
n_components = 2

# Step 8: Fit the input into PCA
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)

# Step 9: Print the variance ratio of the components
print("Variance Ratio of the Components:")
print(pca.explained_variance_ratio_)

# Step 10: Divide the dataset into training set and testing set (0.33)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)

# Step 11: Build neural networks for dataset without PCA and with PCA
input_shape = X_train.shape[1]
num_classes = len(np.unique(y_train))

# Without PCA
model_no_pca = keras.Sequential([
    layers.Dense(500, activation='relu', input_shape=(input_shape,)),
    layers.Dense(500, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])

# With PCA
input_shape_pca = X_pca.shape[1]
model_with_pca = keras.Sequential([
    layers.Dense(500, activation='relu', input_shape=(input_shape_pca,)),
    layers.Dense(500, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])

# Step 12: Compile the models
model_no_pca.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_with_pca.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# One-hot encode the target variables
encoder = OneHotEncoder(categories='auto')
y_encoded = encoder.fit_transform(y.reshape(-1, 1)).toarray()
y_train_encoded = y_encoded[y_train]
y_test_encoded = y_encoded[y_test]

# Step 13: Print the accuracy for both networks
batch_size = 128
epochs = 20

# Without PCA
model_no_pca.fit(X_train, y_train_encoded, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test_encoded))
_, accuracy_no_pca = model_no_pca.evaluate(X_test, y_test_encoded)
print("Accuracy without PCA:", accuracy_no_pca)

# With PCA
model_with_pca.fit(X_pca, y_train_encoded, batch_size=batch_size, epochs=epochs, validation_split=0.33)
_, accuracy_with_pca = model_with_pca.evaluate(pca.transform(X_test), y_test_encoded)
print("Accuracy with PCA:", accuracy_with_pca)


Dataset Shape: (569, 30)
First few rows of the dataset:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  wors

ValueError: Data cardinality is ambiguous:
  x sizes: 188
  y sizes: 0
Make sure all arrays contain the same number of samples.

In [30]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical

# Step 1: Load Breast Cancer Dataset
data = load_breast_cancer()

# Step 2: Print shape and first few rows
print("Dataset Shape:", data.data.shape)
print("First few rows of the dataset:")
print(pd.DataFrame(data.data, columns=data.feature_names).head())

# Step 3: Define features and target
X = data.data
y = data.target

# Step 4: Print feature names
print("Feature Names:")
print(data.feature_names)

# Step 5: Use StandardScaler() to transform the input
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 6: Print the mean and std of the transformed input
print("Mean of the Transformed Input:")
print(np.mean(X_scaled, axis=0))
print("Standard Deviation of the Transformed Input:")
print(np.std(X_scaled, axis=0))

# Step 7: Define the number of principal components to 2
n_components = 2

# Step 8: Fit the input into PCA
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)

# Step 9: Print the variance ratio of the components
print("Variance Ratio of the Components:")
print(pca.explained_variance_ratio_)

# Step 10: Divide the dataset into training set and testing set (0.33)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)

# Step 11: Build neural networks for dataset without PCA and with PCA
input_shape = X_train.shape[1]
num_classes = len(np.unique(y_train))

# Without PCA
model_no_pca = keras.Sequential([
    layers.Dense(500, activation='relu', input_shape=(input_shape,)),
    layers.Dense(500, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])

# With PCA
input_shape_pca = X_pca.shape[1]
model_with_pca = keras.Sequential([
    layers.Dense(500, activation='relu', input_shape=(input_shape_pca,)),
    layers.Dense(500, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])

# Step 12: Compile the models
model_no_pca.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_with_pca.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Convert the target variables to categorical
y_train_categorical = to_categorical(y_train, num_classes)
y_test_categorical = to_categorical(y_test, num_classes)

# Step 13: Print the accuracy for both networks
batch_size = 128
epochs = 20

# Without PCA
model_no_pca.fit(X_train, y_train_categorical, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test_categorical))
_, accuracy_no_pca = model_no_pca.evaluate(X_test, y_test_categorical)
print("Accuracy without PCA:", accuracy_no_pca)

# With PCA
model_with_pca.fit(X_pca, y_train_categorical, batch_size=batch_size, epochs=epochs, validation_split=0.33)
_, accuracy_with_pca = model_with_pca.evaluate(pca.transform(X_test), y_test_categorical)
print("Accuracy with PCA:", accuracy_with_pca)


Dataset Shape: (569, 30)
First few rows of the dataset:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  wors

ValueError: Data cardinality is ambiguous:
  x sizes: 188
  y sizes: 0
Make sure all arrays contain the same number of samples.

In [33]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical

# Step 1: Load Breast Cancer Dataset
data = load_breast_cancer()

# Step 2: Print shape and first few rows
print("Dataset Shape:", data.data.shape)
print("First few rows of the dataset:")
print(pd.DataFrame(data.data, columns=data.feature_names).head())

# Step 3: Define features and target
X = data.data
y = data.target

# Step 4: Print feature names
print("Feature Names:")
print(data.feature_names)

# Step 5: Use StandardScaler() to transform the input
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 6: Print the mean and std of the transformed input
print("Mean of the Transformed Input:")
print(np.mean(X_scaled, axis=0))
print("Standard Deviation of the Transformed Input:")
print(np.std(X_scaled, axis=0))

# Step 7: Define the number of principal components to 2
n_components = 2

# Step 8: Fit the input into PCA
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)

# Step 9: Print the variance ratio of the components
print("Variance Ratio of the Components:")
print(pca.explained_variance_ratio_)

# Step 10: Divide the dataset into training set and testing set (0.33)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)

# Step 11: Build neural networks for dataset without PCA and with PCA
input_shape = X_train.shape[1]
num_classes = len(np.unique(y_train))

# Without PCA
model_no_pca = keras.Sequential([
    layers.Dense(500, activation='relu', input_shape=(input_shape,)),
    layers.Dense(500, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])

# With PCA
input_shape_pca = X_pca.shape[1]
model_with_pca = keras.Sequential([
    layers.Dense(500, activation='relu', input_shape=(input_shape_pca,)),
    layers.Dense(500, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])

# Step 12: Compile the models
model_no_pca.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_with_pca.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Convert the target variables to categorical
y_train_categorical = to_categorical(y_train, num_classes)
y_test_categorical = to_categorical(y_test, num_classes)

# Step 13: Print the accuracy for both networks
batch_size = 128
epochs = 20

# Without PCA
model_no_pca.fit(X_train, y_train_categorical, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test_categorical))
_, accuracy_no_pca = model_no_pca.evaluate(X_test, y_test_categorical)
print("Accuracy without PCA:", accuracy_no_pca)

# With PCA
X_train_pca, X_test_pca, _, _ = train_test_split(X_pca, y, test_size=0.33, random_state=42)
model_with_pca.fit(X_train_pca, y_train_categorical, batch_size=batch_size, epochs=epochs, validation_data=(X_test_pca, y_test_categorical))
_, accuracy_with_pca = model_with_pca.evaluate(X_test_pca, y_test_categorical)
print("Accuracy with PCA:", accuracy_with_pca)

print("Accuracy without PCA:", accuracy_no_pca)


Dataset Shape: (569, 30)
First few rows of the dataset:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  wors

Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Accuracy with PCA: 0.9627659320831299
Accuracy without PCA: 0.9840425252914429
