In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from google.colab import files

def upload_file():
    uploaded = files.upload()
    file_name = next(iter(uploaded))
    return file_name

def load_data(file_name):
    return pd.read_csv(file_name, delimiter=';')

def perform_pca(X, n_components):
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X)
    return X_pca, pca.explained_variance_ratio_

def plot_variance(explained_variance_ratio):
    cumulative_explained_variance = np.cumsum(explained_variance_ratio)
    n_features = len(explained_variance_ratio)

    plt.figure(figsize=(10, 5))
    plt.bar(range(1, n_features + 1), explained_variance_ratio, alpha=0.5, align='center', label='individual explained variance')
    plt.step(range(1, n_features + 1), cumulative_explained_variance, where='mid', label='cumulative explained variance')
    plt.xlabel('Principal components')
    plt.ylabel('Explained variance ratio')
    plt.title('Explained Variance Ratio of Principal Components')
    plt.legend(loc='best')
    plt.show()

def get_pca_loadings(X, n_components, feature_names):
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X)
    loadings = pca.components_
    loadings_df = pd.DataFrame(loadings.T, columns=[f'PC{i+1}' for i in range(n_components)], index=feature_names)
    return loadings_df


In [2]:
import unittest

class TestPCAFunctions(unittest.TestCase):

    def setUp(self):
        # テスト用のダミーデータを設定
        self.data = pd.DataFrame({
            'feature1': [1, 2, 3, 4, 5],
            'feature2': [5, 4, 3, 2, 1],
            'feature3': [2, 3, 4, 5, 6],
            'quality': [3, 3, 2, 5, 4]
        })
        self.X = self.data.iloc[:, :-1].values
        self.feature_names = self.data.columns[:-1]

    def test_perform_pca(self):
        X_pca, explained_variance_ratio = perform_pca(self.X, 2)
        self.assertEqual(X_pca.shape, (5, 2))
        self.assertEqual(len(explained_variance_ratio), 2)
        self.assertAlmostEqual(sum(explained_variance_ratio), 1, delta=0.1)

    def test_get_pca_loadings(self):
        loadings_df = get_pca_loadings(self.X, 2, self.feature_names)
        self.assertEqual(loadings_df.shape, (3, 2))
        self.assertTrue(all(col in loadings_df.columns for col in ['PC1', 'PC2']))
        self.assertTrue(all(index in loadings_df.index for index in self.feature_names))

if __name__ == '__main__':
    unittest.main(argv=[''], exit=False)


..
----------------------------------------------------------------------
Ran 2 tests in 0.046s

OK
