In [None]:
import unittest
from bokeh.palettes import Category10
from bokeh.models import ColumnDataSource
import bokeh.plotting as bp
from sqlalchemy import create_engine
import numpy as np
import pandas as pd
import bokeh.io as bio
bio.output_notebook()


class DatabaseManager:
    """Handle database."""

    def __init__(self, db_name="data.db"):
        try:
            self.engine = create_engine(f'sqlite:///{db_name}')
        except Exception as e:
            print(f"connecting to the database failed : {e}")
            raise

    def store_dataframe(self, df, table_name):
        """Saves the DataFrame to a table in the database."""
        try:
            df.to_sql(table_name, self.engine,
                      if_exists='replace', index=False)
        except Exception as e:
            print(f" data can not be stored : {e}")
            raise

    def load_dataframe(self, table_name):
        """Loads data from a table into a DataFrame."""
        try:
            return pd.read_sql(f'SELECT * FROM {table_name}', self.engine)
        except Exception as e:
            print(f" data can not be loaded : {e}")
            raise


class FunctionSelector:
    """Selects the best functions to fit the training data using the Least Squares method."""

    def __init__(self, training_data, ideal_functions):
        self.training_data = training_data
        self.ideal_functions = ideal_functions
        self.max_training_deviation = {}

    def select_best_functions(self):
        """Chooses the four functions that fit the training data the best."""
        best_functions = []
        for train_col in self.training_data.columns[1:]:
            best_func = min(
                self.ideal_functions.columns[1:],
                key=lambda ideal_col: np.sum(
                    (self.training_data[train_col] - self.ideal_functions[ideal_col])**2)
            )
            best_functions.append(best_func)
            self.max_training_deviation[best_func] = np.max(
                abs(self.training_data[train_col] - self.ideal_functions[best_func]))
        return best_functions[:4]


class TestDataMapper:
    """Maps test data to the best-fitting ideal functions."""

    def __init__(self, test_data, ideal_functions, selected_functions, max_training_deviation):
        self.test_data = test_data
        self.ideal_functions = ideal_functions[['x'] + selected_functions]
        self.selected_functions = selected_functions
        self.max_training_deviation = max_training_deviation

    def map_test_data(self):
        """Matches each test data point to the closest ideal function."""
        mapped_data = []
        for _, row in self.test_data.iterrows():
            x_val = row['x']
            y_test = row['y']
            min_deviation = float('inf')
            best_function = None

            for func in self.selected_functions:
                ideal_val_arr = self.ideal_functions.loc[self.ideal_functions['x']
                                                         == x_val, func].values

                if ideal_val_arr.size > 0:
                    y_ideal = ideal_val_arr[0]
                    deviation = abs(y_test - y_ideal)
                    max_train_dev = self.max_training_deviation[func]
                    if deviation <= np.sqrt(2) * max_train_dev and deviation < min_deviation:
                        min_deviation = deviation
                        best_function = func

            mapped_data.append([x_val, y_test, min_deviation, best_function])

        return pd.DataFrame(mapped_data, columns=['x', 'y', 'delta_y', 'ideal_function'])


class DataVisualizer:
    """create visualizations using Bokeh."""

    def plot_training_scatter(self, df, title, x_col='x', y_cols=None):
        """Plots training data as a scatter plot."""
        try:
            if y_cols is None:
                y_cols = df.columns[1:]

            p = bp.figure(title=title, x_axis_label='x',
                          y_axis_label='y', width=900, height=500)
            colors = Category10[len(y_cols)]

            for i, y_col in enumerate(y_cols):
                p.scatter(df[x_col], df[y_col], size=1,
                          color=colors[i], alpha=0.6, legend_label=y_col)

            p.legend.title = "Legend"
            p.legend.location = "top_left"
            bp.show(p)
        except Exception as e:
            print(f"plotting training as scatter plot failed: {e}")
            raise

    def plot_test_scatter(self, df, title, x_col='x', y_col='y', color='blue'):
        """Shows test data as a scatter plot."""
        try:
            p = bp.figure(title=title, x_axis_label='x',
                          y_axis_label='y', width=900, height=500)
            source = ColumnDataSource(df)
            p.scatter(x_col, y_col, source=source,
                      size=6, color=color, alpha=0.6)
            bp.show(p)
        except Exception as e:
            print(f"plotting test scatter failed : {e}")
            raise

    def plot_ideal_functions(self, ideal_df, selected_functions, title):
        """Plots the selected ideal functions on a graph."""
        try:
            p = bp.figure(title=title, x_axis_label='x',
                          y_axis_label='y', width=900, height=500)
            colors = Category10[10]
            for i, func in enumerate(selected_functions):
                source = ColumnDataSource(ideal_df[['x', func]])
                p.line('x', func, source=source, line_width=2,
                       color=colors[i % len(colors)], legend_label=func)
            p.legend.title = "Legend"
            p.legend.location = "top_left"
            bp.show(p)
        except Exception as e:
            print(f"plotting ideal functions failed: {e}")
            raise

    def plot_test_data_with_ideal(self, test_df, ideal_df, selected_functions, title):
        """Plots test data alongside the ideal functions."""
        try:
            p = bp.figure(title=title, x_axis_label='x',
                          y_axis_label='y', width=900, height=500)
            source = ColumnDataSource(test_df)
            p.scatter('x', 'y', source=source, size=7, color='blue',
                      alpha=0.6, legend_label="Test Data")
            colors = Category10[10]
            for i, func in enumerate(selected_functions):
                ideal_source = ColumnDataSource(ideal_df[['x', func]])
                p.line('x', func, source=ideal_source, line_width=2,
                       color=colors[i % len(colors)], legend_label=func)
            p.legend.title = "Legend"
            p.legend.location = "top_left"
            bp.show(p)
        except Exception as e:
            print(f"failed to plotting test data with ideal : {e}")
            raise


# Unit Test
class TestFunctions(unittest.TestCase):

    def test_database_manager(self):
        """Test DatabaseManager functionality."""
        db_manager = DatabaseManager(db_name="test.db")
        data = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})

        # Test storing and loading data
        db_manager.store_dataframe(data, 'test_table')
        loaded_data = db_manager.load_dataframe('test_table')

        self.assertEqual(data.shape, loaded_data.shape)

    def test_function_selector(self):
        """Test FunctionSelector functionality."""
        training_data = pd.DataFrame(
            {'x': [1, 2, 3], 'y1': [1, 2, 3], 'y2': [3, 2, 1]})
        ideal_functions = pd.DataFrame(
            {'x': [1, 2, 3], 'f1': [1.1, 2.1, 3.1], 'f2': [2.1, 1.1, 0.1]})

        selector = FunctionSelector(training_data, ideal_functions)
        best_functions = selector.select_best_functions()

    def test_test_data_mapper(self):
        """Test TestDataMapper functionality."""
        test_data = pd.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 3]})
        ideal_functions = pd.DataFrame(
            {'x': [1, 2, 3], 'f1': [1.1, 2.1, 3.1], 'f2': [2.1, 1.1, 0.1]})
        selected_functions = ['f1', 'f2']
        max_train_dev = {'f1': 0.1, 'f2': 0.2}

        mapper = TestDataMapper(
            test_data, ideal_functions, selected_functions, max_train_dev)
        mapped_data = mapper.map_test_data()

        self.assertEqual(mapped_data.shape[0], test_data.shape[0])


class TestMappingCriteria(unittest.TestCase):
    def setUp(self):
        """Set up sample data for testing."""
        self.test_data = pd.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 3]})
        self.ideal_functions = pd.DataFrame(
            {'x': [1, 2, 3], 'f1': [1.1, 2.1, 3.1], 'f2': [2.1, 1.1, 0.1]})
        self.selected_functions = ['f1', 'f2']
        self.max_train_dev = {'f1': 0.1, 'f2': 0.2}

    def test_mapping_criterion(self):
        """Test that the mapping  is applied correctly."""
        mapper = TestDataMapper(
            self.test_data, self.ideal_functions, self.selected_functions, self.max_train_dev)
        mapped_data = mapper.map_test_data()
        for _, row in mapped_data.iterrows():
            self.assertLessEqual(row['delta_y'], np.sqrt(
                2) * self.max_train_dev.get(row['ideal_function'], float('inf')))


if __name__ == "__main__":
    # Run unit tests
    unittest.main(argv=[''], verbosity=2, exit=False)

    # Read dataset
    db_manager = DatabaseManager()
    training_data = pd.read_csv(
        '/content/drive/MyDrive/Colab Notebooks/train.csv')
    ideal_functions = pd.read_csv(
        "/content/drive/MyDrive/Colab Notebooks/ideal.csv")
    test_data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/test.csv")

    # Store data
    for name, data in zip(["training_data", "ideal_functions", "test_data"], [training_data, ideal_functions, test_data]):
        db_manager.store_dataframe(data, name)

    # Select ideal functions
    selector = FunctionSelector(training_data, ideal_functions)
    best_functions = selector.select_best_functions()

    # Map
    mapper = TestDataMapper(test_data, ideal_functions,
                            best_functions, selector.max_training_deviation)
    mapped_data = mapper.map_test_data()
    db_manager.store_dataframe(mapped_data, "mapped_test_data")

    # Visualize
    visualizer = DataVisualizer()
    visualizer.plot_training_scatter(
        training_data, "Training Data", y_cols=['y1', 'y2', 'y3', 'y4'])
    visualizer.plot_test_scatter(test_data, "Test Data", y_col='y')
    visualizer.plot_ideal_functions(
        ideal_functions, best_functions, "Ideal Functions")
    visualizer.plot_test_data_with_ideal(
        test_data, ideal_functions, best_functions, "Test Data with Ideal Functions")

    print(f"Selected Best Functions: {best_functions}")
    print("Mapped Test Data Table:")
    display(mapped_data.style.set_properties(**{'text-align': 'left'}))

    #    This comment is assumed as a change that has made in the code
    # another change has been made

test_database_manager (__main__.TestFunctions.test_database_manager)
Test DatabaseManager functionality. ... ok
test_function_selector (__main__.TestFunctions.test_function_selector)
Test FunctionSelector functionality. ... ok
test_test_data_mapper (__main__.TestFunctions.test_test_data_mapper)
Test TestDataMapper functionality. ... ok
test_mapping_criterion (__main__.TestMappingCriteria.test_mapping_criterion)
Test that the mapping  is applied correctly. ... ok

----------------------------------------------------------------------
Ran 4 tests in 0.386s

OK


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Colab Notebooks/train.csv'