In [5]:
# making necessary imports
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, Column, Integer, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from bokeh.io import output_notebook, show
from bokeh.plotting import figure



In [6]:
Base = declarative_base()
output_notebook()

  Base = declarative_base()


In [7]:


class DataLoadError(Exception):
    """Custom exception for data loading errors."""
    pass

class DataHandler:
    """Base class for handling data operations."""

    def __init__(self, db_url):
        self.engine = create_engine(db_url)
        self.session = sessionmaker(bind=self.engine)()

    """
        Initialize the DataHandler.

        Args:
            db_url (str): The URL to connect to the database.
        """

    def load_csv(self, csv_path):
        """Load data from a CSV file."""
        try:
            data = pd.read_csv(csv_path)
            return data
        except Exception as e:
            raise DataLoadError(f"Error loading data from {csv_path}: {str(e)}")

            """
        Load data from a CSV file.

        Args:
            csv_path (str): The path to the CSV file.

        Returns:
            pd.DataFrame: The loaded data as a DataFrame.

        Raises:
            DataLoadError: If there is an error loading the data.
        """

    def save_to_db(self, data, table_name):
        """Save data to the SQLite database."""
        data.to_sql(table_name, self.engine, if_exists='replace', index=False)

        """
        Save data to the SQLite database.

        Args:
            data (pd.DataFrame): The data to be saved.
            table_name (str): The name of the table in the database.
        """




In [8]:

class FunctionAnalyzer(DataHandler):
    """Derived class for analyzing functions. Inherits from DataHandler."""

    def __init__(self, db_url):
        super().__init__(db_url)

        """
        Initialize the FunctionAnalyzer.

        Args:
            db_url (str): The URL to connect to the database.
        """


    def train(self, training_data, ideal_functions):
        """Training logic."""

        """
        Train the function analyzer.

        Args:
            training_data (pd.DataFrame): The training data.
            ideal_functions (pd.DataFrame): The ideal functions for comparison.

        Returns:
            list: A list of chosen functions based on training results.
        """

        best_functions = []
        # Compute the sum of y-deviations squared for each ideal function
        for col in ideal_functions.columns[1:]:
            deviation = np.sum((training_data['y1'] - ideal_functions[col])**2)
            best_functions.append((col, deviation))
        # Sort by deviation and select the top 4
        best_functions = sorted(best_functions, key=lambda x: x[1])[:4]
        return [func[0] for func in best_functions]

    def test(self,training_data, test_data, chosen_functions, ideal_functions):
        """Testing logic."""

        """
        Test the function analyzer.

        Args:
            training_data (pd.DataFrame): The training data.
            test_data (pd.DataFrame): The test data for evaluation.
            chosen_functions (list): List of chosen functions.
            ideal_functions (pd.DataFrame): The ideal functions for comparison.

        Returns:
            list: Test results containing (x, y, deviation, function) for each test point.
        """

        results = []
        max_deviations = {}
        for func in chosen_functions:
            max_deviation = np.max(np.abs(training_data['y1'] - ideal_functions[func]))
            max_deviations[func] = max_deviation * np.sqrt(2)

        for index, row in test_data.iterrows():
            best_fit = None
            best_deviation = float('inf')
            for func in chosen_functions:
                deviation = abs(row['y'] - ideal_functions.loc[ideal_functions['x'] == row['x'], func].values[0])
                if deviation < best_deviation and deviation <= max_deviations[func]:
                    best_deviation = deviation
                    best_fit = func
            if best_fit:
                results.append((row['x'], row['y'], best_deviation, int(best_fit[1:])))
            else:
                results.append((row['x'], row['y'], None, None))
        return results

    def visualize_training(self, training_data, ideal_functions, chosen_functions):
        """Visualize the training process using bokeh."""

        """
        Visualize the training process using Bokeh.

        Args:
            training_data (pd.DataFrame): The training data.
            ideal_functions (pd.DataFrame): The ideal functions for comparison.
            chosen_functions (list): List of chosen functions.
        """

        p = figure(title="Training Data and Chosen Ideal Functions", x_axis_label='x', y_axis_label='y')

        # Plot training data
        p.circle(training_data['x'], training_data['y1'], legend_label="Training Data", fill_color="blue", size=5)

        # Plot chosen ideal functions
        for func in chosen_functions:
            p.line(ideal_functions['x'], ideal_functions[func], legend_label=f"Ideal Function {func}", line_color="green")

        # Display the plot in the notebook
        show(p)

        # Loss Visualization
        losses = []
        function_names = []
        for func in ideal_functions.columns[1:]:  # Skip the 'x' column
            loss = np.sum((training_data['y1'] - ideal_functions[func])**2)
            losses.append(loss)
            function_names.append(func)

        p_loss = figure(x_range=function_names, title="Loss for Each Ideal Function", x_axis_label='Function', y_axis_label='Loss', height=350, tools="pan,box_zoom,reset")

        p_loss.vbar(x=function_names, top=losses, width=0.9)
        p_loss.xgrid.grid_line_color = None
        p_loss.y_range.start = 0

        # Display the loss plot
        show(p_loss)


    def visualize(self, training_data, ideal_functions, chosen_functions, test_data):
        """Visualization using bokeh."""

        """
        Visualize the function analysis results using Bokeh.

        Args:
            training_data (pd.DataFrame): The training data.
            ideal_functions (pd.DataFrame): The ideal functions for comparison.
            chosen_functions (list): List of chosen functions.
            test_data (pd.DataFrame): The test data for evaluation.
        """

        p = figure(title="Training Data, Ideal Functions, and Test Data", x_axis_label='x', y_axis_label='y')

        # Plot training data
        p.circle(training_data['x'], training_data['y1'], legend_label="Training Data", fill_color="blue", size=5)

        # Plot ideal functions
        for func in chosen_functions:
            p.line(ideal_functions['x'], ideal_functions[func], legend_label=f"Ideal Function {func}", line_color="green")

        # Plot test data
        p.cross(test_data['x'], test_data['y'], legend_label="Test Data", line_color="red", size=10)

        # Display the plot in the notebook
        show(p)



In [10]:

# Unit Tests
def test_train():

    """
    Unit test for the training functionality of the FunctionAnalyzer class.
    """

    analyzer = FunctionAnalyzer('sqlite:///data.db')
    training_data = analyzer.load_csv('train.csv')
    ideal_functions = analyzer.load_csv('ideal.csv')
    chosen_functions = analyzer.train(training_data, ideal_functions)
    analyzer.visualize_training(training_data, ideal_functions, chosen_functions)

    assert len(chosen_functions) == 4



def test_test():

    """
    Unit test for the testing functionality of the FunctionAnalyzer class.
    """

    analyzer = FunctionAnalyzer('sqlite:///data.db')
    training_data = analyzer.load_csv('train.csv')
    test_data = analyzer.load_csv('test.csv')
    ideal_functions = analyzer.load_csv('ideal.csv')

    # Compute the sum of squared deviations for each ideal function
    losses = {}
    for func in ideal_functions.columns[1:]:  # Skip the 'x' column
        loss = np.sum((training_data['y1'] - ideal_functions[func])**2)
        losses[func] = loss

    # Select the top 4 functions with the lowest loss values
    chosen_functions = sorted(losses, key=losses.get)[:4]

    test_results = analyzer.test(training_data, test_data, chosen_functions, ideal_functions)
    analyzer.visualize(training_data, ideal_functions, chosen_functions, test_data)
    assert len(test_results) == len(test_data)



test_train()


In [13]:
test_test()

In [11]:
import unittest

class TestFunctionAnalyzer(unittest.TestCase):

    def setUp(self):
        self.analyzer = FunctionAnalyzer('sqlite:///data.db')
        self.training_data = self.analyzer.load_csv('train.csv')
        self.ideal_functions = self.analyzer.load_csv('ideal.csv')
        self.test_data = self.analyzer.load_csv('test.csv')
        self.chosen_functions = ['y1', 'y2', 'y3', 'y4']

    def test_train(self):
        chosen_functions = self.analyzer.train(self.training_data, self.ideal_functions)
        self.assertEqual(len(chosen_functions), 4)

    def test_test(self):
        test_results = self.analyzer.test(self.training_data, self.test_data, self.chosen_functions, self.ideal_functions)
        self.assertEqual(len(test_results), len(self.test_data))




In [12]:
# Create a test suite
suite = unittest.TestLoader().loadTestsFromTestCase(TestFunctionAnalyzer)

# Run the tests
unittest.TextTestRunner().run(suite)

..
----------------------------------------------------------------------
Ran 2 tests in 0.157s

OK


<unittest.runner.TextTestResult run=2 errors=0 failures=0>