In [196]:
import numpy as np
import pandas as pd

In [197]:
sleep_data = pd.read_csv('../datasets/Sleep_health_and_lifestyle_dataset.csv')
sleep_data.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [198]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer

categorical_cols = sleep_data.select_dtypes(include=['object']).columns.drop('Sleep Disorder')
numerical_cols = sleep_data.select_dtypes(include=['float64', 'int64']).columns.drop('Person ID')

imputer = SimpleImputer(strategy='most_frequent')
scaler = StandardScaler()

X_transformed = sleep_data.drop(['Sleep Disorder', 'Person ID'], axis=1).copy()
y_transformed = sleep_data['Sleep Disorder']

for col in numerical_cols:
    X_transformed[col] = imputer.fit_transform(X_transformed[[col]])
    X_transformed[col] = scaler.fit_transform(X_transformed[[col]])

for col in categorical_cols:
    X_transformed[col] = imputer.fit_transform(X_transformed[[col]])
    X_transformed[col] = LabelEncoder().fit_transform(X_transformed[col])

y_encoded = LabelEncoder().fit_transform(y_transformed)

X_transformed.head()

Unnamed: 0,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps
0,1,-1.753096,9,-1.298887,-1.09828,-0.825418,0.347021,3,11,1.654719,-1.619584
1,1,-1.637643,1,-1.173036,-1.09828,0.039844,1.475592,0,9,1.170474,1.970077
2,1,-1.637643,1,-1.173036,-1.09828,0.039844,1.475592,0,9,1.170474,1.970077
3,1,-1.637643,6,-1.550588,-2.771424,-1.40226,1.475592,2,22,3.591698,-2.362273
4,1,-1.637643,6,-1.550588,-2.771424,-1.40226,1.475592,2,22,3.591698,-2.362273


In [199]:
def my_pca(X, n_components=None):
    """
    Реализация метода главных компонент.

    Параметры:
    - X: numpy.ndarray, матрица данных
    - n_components: int, количество компонентов (по умолчанию None)

    Возвращает:
    - X_pca: numpy.ndarray, данные после применения PCA
    """

    mean_vec = np.mean(X, axis=0)
    X_scaled = X - mean_vec

    cov_matrix = np.cov(X_scaled, rowvar=False)

    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_indices]
    print('Собственные числа')
    print(eigenvalues)
    eigenvectors = eigenvectors[:, sorted_indices]

    if n_components is None:
        n_components = min(X.shape[0], X.shape[1])
    selected_eigenvectors = eigenvectors[:, :n_components]

    X_pca = np.dot(X_scaled, selected_eigenvectors)

    all_columns = np.arange(X.shape[1])
    retained_columns = np.arange(n_components)
    retained_column_names = list(X.columns[retained_columns])
    print(f'Оставленные столбцы {retained_column_names}')
    
    removed_columns = np.setdiff1d(all_columns, retained_columns)
    removed_columns_names = list(X.columns[removed_columns])
    print(f'Убранные столбцы {removed_columns_names}')

    return X_pca

n_components = 1
X_pca = my_pca(X_transformed, n_components=n_components)

X_pca[:n_components, :]

Собственные числа
[5.53552157e+01 6.77612252e+00 3.43543532e+00 1.53737931e+00
 8.75944711e-01 5.49764297e-01 2.08288793e-01 1.62418506e-01
 8.11262451e-02 6.85528341e-02 2.55786214e-02]
Оставленные столбцы ['Gender']
Убранные столбцы ['Age', 'Occupation', 'Sleep Duration', 'Quality of Sleep', 'Physical Activity Level', 'Stress Level', 'BMI Category', 'Blood Pressure', 'Heart Rate', 'Daily Steps']


array([[-0.46863056]])

In [200]:
class CustomNeuralNetwork:
    """
    Custom implementation of a simple 2-layer neural network.
    """

    def __init__(self, random_state=None):
        np.random.seed(random_state)    
        self.weights1 = np.random.rand(3, n_components)
        self.weights2 = np.random.rand(2, 3)

    def _tanh(self, x):
        return np.tanh(3 * x)

    def _tanh_derivative(self, x):
        return 3 * (1 - np.tanh(3 * x) ** 2)

    def _forward_propagation(self, X):
        self.layer1 = self._tanh(np.dot(X, self.weights1.T))
        self.output = self._tanh(np.dot(self.layer1, self.weights2.T))
        return self.output

    def _backward_propagation(self, X, y, learning_rate):
        error = y - self.output

        d_weights2 = learning_rate * np.dot(self.layer1.T, error * self._tanh_derivative(self.output))
        d_weights1 = learning_rate * np.dot(X.T, np.dot(error * self._tanh_derivative(self.output),
                                                        self.weights2) * self._tanh_derivative(self.layer1))

        self.weights1 += d_weights1.T
        self.weights2 += d_weights2.T

    def fit(self, X, y, learning_rate=0.01, epochs=1000):
        for _ in range(epochs):
            self._forward_propagation(X)
            self._backward_propagation(X, y, learning_rate)

    def predict(self, X):
        output = self._forward_propagation(X)
        return np.argmax(output, axis=1)

In [201]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X_pca, y_encoded, test_size=0.20, random_state=42)

# Training and testing the custom neural network
nn = CustomNeuralNetwork(random_state=42)
nn.fit(X_train, y_train.reshape(-1, 1), learning_rate=0.01, epochs=1000)

# Making predictions
y_train_pred_custom_nn = nn.predict(X_train)
y_test_pred_custom_nn = nn.predict(X_test)

# Calculating accuracy
train_accuracy_custom_nn = accuracy_score(y_train, y_train_pred_custom_nn)
test_accuracy_custom_nn = accuracy_score(y_test, y_test_pred_custom_nn)

train_accuracy_custom_nn, test_accuracy_custom_nn

(0.6086956521739131, 0.6133333333333333)