In [1]:
# Required imports
import logging
import numpy as np
import pandas as pd
from bokeh.layouts import gridplot, column
from bokeh.models import ColumnDataSource, DataTable, TableColumn
from bokeh.plotting import figure, output_file, show
from sqlalchemy import create_engine
from sqlalchemy.exc import SQLAlchemyError
from math import sqrt

# Set up logging for debugging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class DatabaseHandler:
    """Handles database operations for loading and saving data."""
    def __init__(self, db_name='functions.db'):
        self.engine = create_engine(f'sqlite:///{db_name}')
        self.conn = self.engine.connect()

    def save_to_db(self, df, table_name):
        """Save DataFrame to SQLite database."""
        try:
            df.to_sql(table_name, self.conn, if_exists='replace', index=False)
            logging.info(f"Data saved to table '{table_name}'.")
        except SQLAlchemyError as e:
            logging.error(f"Error saving to database: {e}")

    def load_from_db(self, table_name):
        """Load data from SQLite database."""
        try:
            return pd.read_sql_table(table_name, self.conn)
        except SQLAlchemyError as e:
            logging.error(f"Error loading from database: {e}")
            return None

class FunctionSelector:
    """Selects the best ideal functions based on least-squares criterion."""
    @staticmethod
    def find_best_ideal_functions(training_df, ideal_df):
        best_funcs = []
        deviations = {}

        for col in ideal_df.columns[1:]:  # Ignore 'x' column
            deviation = ((training_df.iloc[:, 1:] - ideal_df[col].values.reshape(-1, 1)) ** 2).sum().sum()
            deviations[col] = deviation

        sorted_deviations = sorted(deviations, key=deviations.get)
        best_funcs = sorted_deviations[:4]  # Select the four functions with smallest deviations
        logging.info(f"Best functions selected: {best_funcs}")
        
        return best_funcs

class DataMapper:
    """Maps test data to chosen ideal functions based on deviation criteria."""
    @staticmethod
    def map_test_to_ideal(test_df, training_df, ideal_df, best_funcs):
        results = []

        for _, test_row in test_df.iterrows():
            x_test, y_test = test_row['x'], test_row['y']
            for func_name in best_funcs:
                if func_name not in ideal_df.columns:
                    continue

                ideal_y = ideal_df[ideal_df['x'] == x_test][func_name]
                if ideal_y.empty:
                    continue  # Skip if no corresponding x-value in ideal functions

                ideal_value = ideal_y.values[0]
                deviation = abs(y_test - ideal_value)

                # Maximum allowed deviation based on training data
                train_y = training_df[f'y{best_funcs.index(func_name) + 1}']
                max_deviation = max(abs(train_y - ideal_df[func_name])) * sqrt(2)

                if deviation <= max_deviation:
                    results.append({'x': x_test, 'y': y_test, 'Delta y': deviation, 'Ideal Func': func_name})

        logging.info(f"Mapping complete. {len(results)} entries mapped.")
        return pd.DataFrame(results)

class DataVisualizer:
    """Visualizes training, ideal, and test data using Bokeh."""
    @staticmethod
    def visualize_data(training_df, mapped_df, ideal_df, best_funcs):
        output_file("visualization.html")

        plots = []
        for i, func_name in enumerate(best_funcs):
            p = figure(title=f'Training vs Ideal Function {func_name}', x_axis_label='x', y_axis_label='y')
            p.line(training_df['x'], training_df[f'y{i+1}'], color="blue", legend_label=f'Training y{i+1}')
            p.line(ideal_df['x'], ideal_df[func_name], color="green", legend_label=f'Ideal {func_name}')

            test_subset = mapped_df[mapped_df['Ideal Func'] == func_name]
            if not test_subset.empty:
                p.scatter(test_subset['x'], test_subset['y'], color="red", size=5, legend_label='Test Data')

            p.legend.location = "top_left"
            plots.append(p)

        source = ColumnDataSource(mapped_df)
        columns = [
            TableColumn(field="x", title="X"),
            TableColumn(field="y", title="Y"),
            TableColumn(field="Delta y", title="Delta Y"),
            TableColumn(field="Ideal Func", title="Ideal Function"),
        ]
        data_table = DataTable(source=source, columns=columns, width=600, height=600)

        layout = column(gridplot(plots, ncols=2), data_table)
        show(layout)

        logging.info("Visualization complete.")

def main():
    # Load data from CSV files
    training_data = pd.read_csv('train_data.csv')
    ideal_data = pd.read_csv('ideal_functions.csv')
    test_data = pd.read_csv('test_data.csv')
    db_handler = DatabaseHandler()

    # Save data to database
    db_handler.save_to_db(training_data, 'training')
    db_handler.save_to_db(ideal_data, 'ideal_functions')
    db_handler.save_to_db(test_data, 'test_data')

    # Select best ideal functions
    selector = FunctionSelector()
    best_ideal_funcs = selector.find_best_ideal_functions(training_data, ideal_data)

    # Map test data to ideal functions
    mapper = DataMapper()
    mapped_data = mapper.map_test_to_ideal(test_data, training_data, ideal_data, best_ideal_funcs)
    db_handler.save_to_db(mapped_data, 'test_mapping')

    # Visualize data and table
    visualizer = DataVisualizer()
    visualizer.visualize_data(training_data, mapped_data, ideal_data, best_ideal_funcs)

# Run main
if __name__ == "__main__":
    main()


2024-11-03 11:25:28,898 - INFO - Data saved to table 'training'.
2024-11-03 11:25:28,961 - INFO - Data saved to table 'ideal_functions'.
2024-11-03 11:25:29,020 - INFO - Data saved to table 'test_data'.
2024-11-03 11:25:29,063 - INFO - Best functions selected: ['y48', 'y44', 'y50', 'y2']
2024-11-03 11:25:29,243 - INFO - Mapping complete. 301 entries mapped.
2024-11-03 11:25:29,272 - INFO - Data saved to table 'test_mapping'.
2024-11-03 11:25:31,298 - INFO - Visualization complete.
