In [1]:
import os

class BaseCustomError(Exception):
    """Base class for custom exceptions with logging capability."""
    def __init__(self, message):
        self.message = message
        super().__init__(self.message)

    def log_error(self):
        """Log the error to a file."""
        with open("error_log.txt", "a") as log_file:
            log_file.write(f"ERROR: {self.message}\n")

class CustomFileNotFoundError(BaseCustomError):
    """Exception raised when a file is not found."""
    def __init__(self, file_path, message="File not found"):
        super().__init__(f"{message}: {file_path}")

class CustomEmptyDataError(BaseCustomError):
    """Exception raised when the file is empty."""
    def __init__(self, file_path, message="The file is empty"):
        super().__init__(f"{message}: {file_path}")

class CustomDataParseError(BaseCustomError):
    """Exception raised when there is an error parsing the data."""
    def __init__(self, file_path, message="Error parsing data"):
        super().__init__(f"{message}: {file_path}")

class CustomDataHandlerError(BaseCustomError):
    """General exception raised for unexpected errors in data handling."""
    def __init__(self, message="An unexpected error occurred"):
        super().__init__(message)


In [2]:
import pandas as pd

"""Class for handling data loading and processing."""
class DataHandler:
    def __init__(self, file_path):
        self.file_path = file_path
        self.data = self.load_data()
        
    """Loads data from a CSV file.
        and throw custom exceptions in case of errors....
    """
    def load_data(self):
        try:
            data = pd.read_csv(self.file_path)
            if data.empty:
                raise CustomEmptyDataError(f"The file {self.file_path} contains no data.")
        except FileNotFoundError as e:
            custom_error = CustomFileNotFoundError(self.file_path)
            custom_error.log_error()  # Log the error
            raise custom_error from e
        except pd.errors.EmptyDataError as e:
            custom_error = CustomEmptyDataError(self.file_path)
            custom_error.log_error()  # Log the error
            raise custom_error from e
        except pd.errors.ParserError as e:
            custom_error = CustomDataParseError(self.file_path)
            custom_error.log_error()  # Log the error
            raise custom_error from e
        except Exception as e:
            custom_error = CustomDataHandlerError()
            custom_error.log_error()  # Log the error
            raise custom_error from e
        return data

In [3]:
class TrainDataHandler(DataHandler):
    """Class for handling training data."""
    def __init__(self, file_path):
        super().__init__(file_path)
        
train_data = TrainDataHandler('train.csv').data
train_data.head()

Unnamed: 0,x,y1,y2,y3,y4
0,-20.0,39.778572,-40.07859,-20.214268,-0.324914
1,-19.9,39.604813,-39.784,-20.07095,-0.05882
2,-19.8,40.09907,-40.018845,-19.906782,-0.45183
3,-19.7,40.1511,-39.518402,-19.389118,-0.612044
4,-19.6,39.795662,-39.360065,-19.81589,-0.306076


In [4]:
from sqlalchemy import create_engine, Column, Integer, Float, String
from sqlalchemy.orm import declarative_base, sessionmaker

class DatabaseConnection:
    def __init__(self, db_url):
        """Initialize the DatabaseConnection class with a database URL."""
        self.db_url = db_url
        self.engine = create_engine(self.db_url)
        self.Base = declarative_base()
        self.Session = sessionmaker(bind=self.engine)

    def get_session(self):
        """Create and return a new database session."""
        return self.Session()

    def get_Base(self):
        ''' Return Base Object '''
        return self.Base

    def get_engine(self):
        #Return the Database engine object
        return self.engine

In [5]:
class InsertRecords:
    
    def create_tables(data):
        session = database.get_session()  #Get DB connection Session
        Base = database.get_Base()  #Get Base class
        engine = database.get_engine()  # Get engine
        
        """Create all tables defined using the Base."""
        Base.metadata.create_all(engine)
        session.add(data) #Add data to the DB
        session.commit()  # Commit the Transaction
        session.close()  # Once operation is done close the session

In [6]:
DB_URL = 'sqlite:///./my_db.db'
database = DatabaseConnection(DB_URL)
#Define the ORM Models: Create a base class and define tables
class TrainingData(database.get_Base()):
    '''
    Training data tables contains 5 columns namely x, y1, y2, y3 and y4.
    needs to define type of each column.
    '''
    __tablename__ = 'training'  #Name of the Table for training Data
    id = Column(Integer, primary_key=True, autoincrement=True)  #Auto generated ID
    x = Column(Float)
    y1 = Column(Float)
    y2 = Column(Float)
    y3 = Column(Float)
    y4 = Column(Float)

record = InsertRecords #Object creation for record insersion
data=''
for _, row in train_data.iterrows():  #iterate on training data 
     # Create a dictionary of the arguments
    args = {key: row[key] for key in train_data.columns}
    
    ''' Unpack the dictionary as keyword arguments. Basically idea is to create the arguments like
        x=row['x'], y1=row['y1'], y2=row['y2'], y3=row['y3'], y4=row['y4']
    '''
    record.create_tables(TrainingData(**args)) #Insert records to DB

In [7]:
class IdealDataHandler(DataHandler):
    """Class for handling ideal data."""
    def __init__(self, file_path):
        super().__init__(file_path)
        
ideal_function = IdealDataHandler('ideal.csv').load_data()
ideal_function.head()

Unnamed: 0,x,y1,y2,y3,y4,y5,y6,y7,y8,y9,...,y41,y42,y43,y44,y45,y46,y47,y48,y49,y50
0,-20.0,-0.912945,0.408082,9.087055,5.408082,-9.087055,0.912945,-0.839071,-0.850919,0.816164,...,-40.456474,40.20404,2.995732,-0.008333,12.995732,5.298317,-5.298317,-0.186278,0.912945,0.39685
1,-19.9,-0.867644,0.497186,9.132356,5.497186,-9.132356,0.867644,-0.865213,0.168518,0.994372,...,-40.23382,40.04859,2.99072,-0.00834,12.99072,5.293305,-5.293305,-0.21569,0.867644,0.476954
2,-19.8,-0.813674,0.581322,9.186326,5.581322,-9.186326,0.813674,-0.889191,0.612391,1.162644,...,-40.006836,39.89066,2.985682,-0.008347,12.985682,5.288267,-5.288267,-0.236503,0.813674,0.549129
3,-19.7,-0.751573,0.659649,9.248426,5.659649,-9.248426,0.751573,-0.910947,-0.994669,1.319299,...,-39.775787,39.729824,2.980619,-0.008354,12.980619,5.283204,-5.283204,-0.247887,0.751573,0.61284
4,-19.6,-0.681964,0.731386,9.318036,5.731386,-9.318036,0.681964,-0.930426,0.774356,1.462772,...,-39.54098,39.565693,2.97553,-0.008361,12.97553,5.278115,-5.278115,-0.249389,0.681964,0.667902


In [8]:
database = DatabaseConnection(DB_URL)
#Define the ORM Models: Create a base class and define tables
class IdealData(database.get_Base()):
    '''
    Ideal Function data tables contains 51 columns namely x, y1, y2, y3 ..... y50.
    needs to define type of each column.
    '''
    __tablename__ = 'ideal_functions'  #Name of the Table for training Data
    id = Column(Integer, primary_key=True, autoincrement=True)  #Auto generated ID
    x = Column(Float)
   
    # Dynamically adding y1, y2, y3, ..., y50 columns
    for i in range(1, 51):
        locals()[f"y{i}"] = Column(Float)

record = InsertRecords #Object creation for record insersion
data=''
for _, row in ideal_function.iterrows():  #iterate on Ideal func data 
     # Create a dictionary of the arguments
    args = {key: row[key] for key in ideal_function.columns}
    
    ''' Unpack the dictionary as keyword arguments. Basically idea is to create the arguments like
        x=row['x'], y1=row['y1'], y2=row['y2'], y3=row['y3'],...... ,y50=row['y50']
    '''
    record.create_tables(IdealData(**args)) #Insert records to DB

In [9]:
from bokeh.plotting import figure, show

class DataVisualizer:
    def visulize_data(data_records, columnName,p=None, size=1,shape="circle", color=None,title=''):
        # Check if the column exists in the DataFrame
        if columnName not in data_records.columns:
            raise ValueError(f"Column '{columnName}' not found in DataFrame.")

        # Assign a custom color if provided; otherwise, use default
        colors = ["grey","red", "black", "green", "yellow", "purple", 
          "blue", "violet", "orange", "pink", "brown"]
        color = color if color else colors[hash(columnName) % len(colors)]

        # Create a new figure if none is provided
        if p is None:
            p = figure(title="Data Visualization", x_axis_label="X - Values", y_axis_label="Y- Values")
         
        # Add Scatter plot for Data Records
        p.scatter(data_records['x'], data_records[columnName], size=size, legend_label=f"X, {columnName} {title}", color=color,marker=shape)
        
        # Return the figure for further modifications
        return p

In [10]:
#Visualize the Training Data
#Create db connection Object
db = DatabaseConnection(DB_URL)
#load trainig data from DB
# Read entire table into a Pandas DataFrame
training_data = pd.read_sql_table("training", con=db.get_engine())

#Once data is loaded then viasualize it using Bokeh
train_data_visualizer = DataVisualizer
#Visualize Training Data x, y vlaue(from y1 to y4)
p = None
for i in range(1, len(training_data.columns)-1): #In DB we have one extra column id, so needs to use -1
    p = train_data_visualizer.visulize_data(training_data,'y'+str(i),p,1,shape='circle',title='Training Data')
show(p)

In [11]:
#Visualize the Ideal Data
#Create db connection Object
db = DatabaseConnection(DB_URL)
#load Ideal func data from DB
# Read entire table into a Pandas DataFrame
ideal_data = pd.read_sql_table("ideal_functions", con=db.get_engine())

#Once data is loaded then viasualize it using Bokeh
ideal_data_visualizer = DataVisualizer
#Visualize Ideal Functions Data x, y vlaue(from y1 to y50)
p = None
for i in range(1, len(ideal_data.columns)-1): #In DB we have one extra column id, so needs to use -1
    p = ideal_data_visualizer.visulize_data(ideal_data,'y'+str(i),p)
show(p) 

In [12]:
class MergeDataFrame:
    '''
    we have two dataframes training data and ideal functions. Ideal functions has to be evaluated based on MSE
    we need to compare the y value based on X values. So, lets merge(Similar to JOIN in SQL) both datasets first
    '''    
    def mergeData(train_data, ideal_function):
        df = pd.merge(ideal_function, train_data, on='x',suffixes=('','_train'))
        return df
'''
We have training data and ideal functions in database. 
lets load those data as a data frame and merge them in a single DF for further computation.
Actually for this assignement x values are same line by line for 
both training data and ideal function , so no need to merge but we want to follow standards
so that there will not be any possibility of errors.
'''
db = DatabaseConnection(DB_URL)
training_data =pd.read_sql_table("training", con=db.get_engine())  #Load Training Data From DB
ideal_function = pd.read_sql_table('ideal_functions', con=db.get_engine()) #Load Ideal function from DB
merge_data = MergeDataFrame #Create Object
merged_df = merge_data.mergeData(train_data, ideal_function) #Merge the DF
merged_df.head()

Unnamed: 0,id,x,y1,y2,y3,y4,y5,y6,y7,y8,...,y45,y46,y47,y48,y49,y50,y1_train,y2_train,y3_train,y4_train
0,1,-20.0,-0.912945,0.408082,9.087055,5.408082,-9.087055,0.912945,-0.839071,-0.850919,...,12.995732,5.298317,-5.298317,-0.186278,0.912945,0.39685,39.778572,-40.07859,-20.214268,-0.324914
1,2,-19.9,-0.867644,0.497186,9.132356,5.497186,-9.132356,0.867644,-0.865213,0.168518,...,12.99072,5.293305,-5.293305,-0.21569,0.867644,0.476954,39.604813,-39.784,-20.07095,-0.05882
2,3,-19.8,-0.813674,0.581322,9.186326,5.581322,-9.186326,0.813674,-0.889191,0.612391,...,12.985682,5.288267,-5.288267,-0.236503,0.813674,0.549129,40.09907,-40.018845,-19.906782,-0.45183
3,4,-19.7,-0.751573,0.659649,9.248426,5.659649,-9.248426,0.751573,-0.910947,-0.994669,...,12.980619,5.283204,-5.283204,-0.247887,0.751573,0.61284,40.1511,-39.518402,-19.389118,-0.612044
4,5,-19.6,-0.681964,0.731386,9.318036,5.731386,-9.318036,0.681964,-0.930426,0.774356,...,12.97553,5.278115,-5.278115,-0.249389,0.681964,0.667902,39.795662,-39.360065,-19.81589,-0.306076


In [13]:
from sklearn.metrics import mean_squared_error  # For easier computation, calculating MSE
class FindBestFitFunctions:
    """
    Class for Finding best Fit Ideal functions among 50 Ideal functions for each training Data
    for each training data column pairs we have to find the best ideal functions.
    i.e. from training data (x, y1) column find best fit ideal function
    and same for training data column (x, y2) |   (x, y3)  and (x, y4)
    so, we have to compare based on Least-Square Method. 
    we will be using Mean Squared Error Method. scikit-learn package is best for using this method.
    """     
    #Now choose the Best Fit Ideal Function for the training Data
    def find_best_fit_ideal_function(train, ideal, merged_df ):
        best_fit_function = {}   #Initialize Empty dictionary to store train data and ideal function as a key value Pair
        '''
        Loop through the every column of Train data and Ideal Function.
        Outer loop is for train data and Inner loop is for finding best fit ideal function
        '''
        for i in range(1, train):
            #Tweak variable, so that name matches exactly as train_data column
            y_train = 'y'+str(i)+'_train'
            best_fit='' #Initialize best fit to Empty
            best_mse = float('inf') # Initialize best Mean squared error as Infinity so that it will execute atleast once.
            for j in range(1, ideal):
                y_func = 'y'+str(j) # Tweak variable name so that name matches exactly as Ideal Function column
                '''
                Now calculate mse for each ideal function against training Data. 
                Lowest MSE will be considered as best fit.
                '''
                # Check if ideal function column exists and is not empty
                if y_func not in merged_df.columns or merged_df[y_func].empty:
                    continue
                mean_error =  mean_squared_error(merged_df[y_train], merged_df[y_func])
                if mean_error < best_mse:
                    best_mse = mean_error
                    best_fit = y_func
            best_fit_function[y_train] = best_fit
        return best_fit_function # Finally return Best Fit Ideal Function

find_best_fit_func = FindBestFitFunctions
best_fit_function = find_best_fit_func.find_best_fit_ideal_function(len(training_data.columns)-1, len(ideal_function.columns)-1, merged_df)
best_fit_function

{'y1_train': 'y42', 'y2_train': 'y41', 'y3_train': 'y11', 'y4_train': 'y48'}

In [14]:
#Create db connection Object
db = DatabaseConnection(DB_URL)
#load trainig data from DB
training_data = pd.read_sql_table("training", con=db.get_engine())
ideal_function = pd.read_sql_table("ideal_functions", con=db.get_engine())
#Once data is loaded then viasualize it using Bokeh
'''
    Compare the Training Data and their correspoinding best fit Ideal Functions:
    Using Visulations | Note: This is Optional Step , we just want to verify we are on 
    correct Direction. And Our Analysis so far is correct..
'''
data_visualize = DataVisualizer
'''
Basically the Idea is we want to plot those 4 training Data column Pairs
        (x, y1) (x, y2) (x,y3) and (x,y4) of Training Data and
        (x, y42) (x, y41)  (x, y11) and (x, y48) of Ideal Function Data 
'''
#Visualize Training Data x, y vlaue(from y1 to y4)
p = None
for i in range(1, len(training_data.columns)-1):
    p = data_visualize.visulize_data(training_data,'y'+str(i),p,2.5,'star', title='Training Data') 

#Visualize Ideal functions x, y values(y41, y42, y11, y48)
data_visualize.visulize_data(ideal_function,'y41',p, title='Ideal func',color='red')
data_visualize.visulize_data(ideal_function,'y42',p, title='Ideal func',color='green')
data_visualize.visulize_data(ideal_function,'y11',p, title='Ideal func',color='black')
data_visualize.visulize_data(ideal_function,'y48',p, title='Ideal func',color='blue')
'''
Above function visulize_data is used for multiple purposes. this illustrates the OOP concept Polymorphism(Method Overloading)
'''
# Display the plot
show(p)

In [15]:
#Load the Test Data From Csv File
class TestDataHandler(DataHandler):
    """Class for handling test data."""
    def __init__(self, file_path):
        super().__init__(file_path)

test_data_points = TestDataHandler('test.csv').load_data()
test_data_points.head()

Unnamed: 0,x,y
0,17.5,34.16104
1,0.3,1.215102
2,-8.7,-16.843908
3,-19.2,-37.17087
4,-11.0,-20.263054


In [16]:
#Visualize the Test Data
test_data_visualizer = DataVisualizer
p = None
p = test_data_visualizer.visulize_data(test_data_points,'y',p,7,'circle','green',title='Test Data ')
show(p)

In [17]:
import numpy as np

class MaxDeviation:
    '''
    Find Max deviation between training data and their chosen Ideal function as a dictionary
    '''
    def find_max_deviations(best_fit_functions):
        max_deviation = {}
        for y_column, ideal_column in best_fit_functions.items():
            y_column = y_column.replace('_train','')  #For Matching column name in the source training data
            max_deviation[y_column] = np.max(np.abs(training_data[y_column] - ideal_function[ideal_column]))
        return max_deviation

max_deviation = MaxDeviation.find_max_deviations(best_fit_function)
max_deviation

{'y1': np.float64(0.4959680000000013),
 'y2': np.float64(0.49770300000000134),
 'y3': np.float64(0.4989360000000005),
 'y4': np.float64(0.49974158999999996)}

In [18]:
DB_URL = 'sqlite:///./my_db.db'
database = DatabaseConnection(DB_URL)

In [19]:
#Define the ORM Model: Create a base class and define table
class TestDataModel(database.get_Base()):
    __tablename__='test_data_mapping'
    '''
    This table should contains 5 columns namely x_test, y_test, delta_y, and assigned_ideal_function.
    needs to define type of each column.
    '''
    id = Column(Integer, primary_key=True, autoincrement=True)  #Auto generated ID
    x = Column(Float) #Namming column name x_test as x
    y = Column(Float) #Naming column name as y_test as y
    delta_y = Column(Float)
    assigned_ideal_function = Column(String)

In [20]:
class MapTestData:
    '''
    Class for Mapping Test data row by row
    '''
    def __init__(self, test_data_points, ideal_function, best_fit_function, max_deviation, record):
        self.test_data_points = test_data_points
        self.ideal_function = ideal_function
        self.best_fit_function = best_fit_function
        self.max_deviation = max_deviation
        self.record = record

    def map_test_data_with_ideal_function(self):
        for _, row in self.test_data_points.iterrows():
            x_test = row['x']  # X - value of test data
            y_test = row['y']  # Y - Value of test data
            best_fit = None    # Initialize best fit and min_deviation as none
            min_deviation = None
            for y_column, ideal_column in self.best_fit_function.items():
                ideal_value = self.ideal_function.loc[self.ideal_function['x'] == x_test, ideal_column].values
                if len(ideal_value) == 0:
                    continue
                deviation = np.abs(y_test - ideal_value[0])  # Calculate the deviation
                y_column_clean = y_column.replace('_train', '')  # Match column names
                if y_column_clean in self.max_deviation:  # Check if key exists in max_deviation
                    if deviation <= self.max_deviation[y_column_clean] * np.sqrt(2):
                        min_deviation = deviation
                        best_fit = ideal_column
            self.record.create_tables(TestDataModel(x=x_test, y=y_test, delta_y=min_deviation, assigned_ideal_function=best_fit))

record = InsertRecords #Object creation for record insersion in DB
data_pt = MapTestData(test_data_points,ideal_function,best_fit_function, max_deviation, record)
data_pt.map_test_data_with_ideal_function()

In [21]:
'''
Finally, Let's visualize the test Data Points and Ideal functions together
There are some test data points which can not be assgned to above chosen ideal function.
Those data points which can not be assigned to any of the chosen ideal function, we will treat them as outliers.
'''
#Create db connection Object
db = DatabaseConnection(DB_URL)
#load test data from DB
test_data = pd.read_sql_table("test_data_mapping", con=db.get_engine())
test_data.head()

Unnamed: 0,id,x,y,delta_y,assigned_ideal_function
0,1,17.5,34.16104,0.351148,y41
1,2,0.3,1.215102,0.467342,y41
2,3,-8.7,-16.843908,,
3,4,-19.2,-37.17087,,
4,5,-11.0,-20.263054,,


In [22]:
fitted_test_data = test_data[test_data['assigned_ideal_function'].notna()]
fitted_test_data.head()

Unnamed: 0,id,x,y,delta_y,assigned_ideal_function
0,1,17.5,34.16104,0.351148,y41
1,2,0.3,1.215102,0.467342,y41
5,6,0.8,1.426456,0.626456,y11
6,7,14.0,-0.066506,0.134233,y48
8,9,-15.0,-0.205363,0.452371,y48


In [23]:
outlier_test_data = test_data[test_data['assigned_ideal_function'].isna()]
outlier_test_data.head()

Unnamed: 0,id,x,y,delta_y,assigned_ideal_function
2,3,-8.7,-16.843908,,
3,4,-19.2,-37.17087,,
4,5,-11.0,-20.263054,,
7,8,-10.4,-2.007094,,
10,11,-7.6,-39.4954,,


In [24]:
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       100 non-null    int64  
 1   x                        100 non-null    float64
 2   y                        100 non-null    float64
 3   delta_y                  48 non-null     float64
 4   assigned_ideal_function  48 non-null     object 
dtypes: float64(3), int64(1), object(1)
memory usage: 4.0+ KB


In [25]:
'''
Visualize the assigned test data points and Outliers together.
'''
data_visualizer = DataVisualizer # Create an object for visualization
p = None
p = data_visualizer.visulize_data(fitted_test_data,'y',p,5,'circle',title='Fitted Test Data', color='black') # Plotting fitted test data as 
data_visualizer.visulize_data(outlier_test_data,'y',p,5,'star',title='Outliers Test Data', color='red') #plotting outlier test data

#Also visualize 4 chosen ideal functions together
data_visualizer.visulize_data(ideal_function,'y41',p, title='Ideal func',color='brown')
data_visualizer.visulize_data(ideal_function,'y42',p, title='Ideal func',color='orange')
data_visualizer.visulize_data(ideal_function,'y11',p, title='Ideal func',color='green')
data_visualizer.visulize_data(ideal_function,'y48',p, title='Ideal func',color='blue')

show(p)

In [26]:
import unittest
import pandas as pd

class TestFindBestFitFunctions(unittest.TestCase):
    def test_find_best_fit_ideal_function(self):
        """Test normal operation with valid data."""
        # Sample training data
        training_data_ = pd.DataFrame({
            'id':[1,2,3],
            'x': [1, 2, 3],
            'y1': [1.1, 2.2, 3.3],
            'y2': [5.2, 5.3, 6.4]
        })

        # Sample ideal function data
        ideal_function_ = pd.DataFrame({
            'id':[1,2,3],
            'x': [1, 2, 3],
            'y1': [1.0, 2.0, 3.0],
            'y2': [2.1, 2.1, 3.1],
            'y3': [5.2, 5.2, 5.2]
        })

        # Merged DataFrame (training data + ideal functions)
        merged_df_ = pd.merge(ideal_function_, training_data_, on='x', suffixes=('', '_train'))

        # Expected output
        expected_best_fit = {
            'y1_train': 'y1',  # y1_train should match y1 (lowest MSE)
            'y2_train': 'y3'   # y2_train should match y3 (lowest MSE)
        }

        # Call the method
        best_fit_function_ = FindBestFitFunctions.find_best_fit_ideal_function(
            len(training_data_.columns) - 1,  # Number of training columns (excluding 'id' column in DB)
            len(ideal_function_.columns) - 1,  # Number of ideal function columns (excluding 'id' column in DB)
            merged_df_
        )

        # Verify the result
        self.assertEqual(best_fit_function_, expected_best_fit)

    def test_empty_data(self):
        """Test case where input data is empty."""
        # Empty DataFrames
        training_data = pd.DataFrame(columns=['id', 'x', 'y1', 'y2'])
        ideal_function = pd.DataFrame(columns=['id', 'x', 'y1', 'y2', 'y3'])
        merged_df = pd.DataFrame(columns=['id', 'x', 'y1', 'y2','y3', 'y1_train', 'y2_train'])

        best_fit_func = {'y1_train': '', 'y2_train': ''}
        expected_output = {'y1_train': '', 'y2_train': ''}
        # Should return empty dictionary for empty data
        self.assertEqual(best_fit_func,  expected_output)

if __name__ == '__main__':
    # Create a test suite and run it
    suite = unittest.TestLoader().loadTestsFromTestCase(TestFindBestFitFunctions)
    unittest.TextTestRunner(verbosity=2).run(suite)

test_empty_data (__main__.TestFindBestFitFunctions.test_empty_data)
Test case where input data is empty. ... ok
test_find_best_fit_ideal_function (__main__.TestFindBestFitFunctions.test_find_best_fit_ideal_function)
Test normal operation with valid data. ... ok

----------------------------------------------------------------------
Ran 2 tests in 0.028s

OK


In [27]:
import unittest
from sqlalchemy import Column, Integer, Float, String
from sqlalchemy.orm import declarative_base
from sqlalchemy.exc import IntegrityError

# Import the classes to be tested
DATABASE_URL = "sqlite:///testx.db"  # Using SQLite for testing

database = DatabaseConnection(DATABASE_URL)
Base = database.get_Base()
engine = database.get_engine()
Session = database.get_session()

# Define a sample table model for testing
class SampleModel(Base):
    __tablename__ = "sample_table"
    id = Column(Integer, primary_key=True, autoincrement=True)
    x = Column(Float, nullable=False)
    y = Column(Float, nullable=False)
    description = Column(String, nullable=True)

# Create the test table
Base.metadata.drop_all(engine)  # Drop existing tables to ensure a clean test
Base.metadata.create_all(engine)

class TestDatabaseConnection(unittest.TestCase):
    def setUp(self):
        """Set up a new session before each test."""
        self.session = Session
    
    def tearDown(self):
        """Roll back changes and close the session after each test."""
        self.session.close()
        engine.dispose()
    
    def test_database_connection(self):
        """Test that the database connection is established."""
        self.assertIsNotNone(engine)
        self.assertIsNotNone(self.session)
    
    def test_insert_record(self):
        """Test inserting a record into the database."""
        test_entry = SampleModel(x=1.0, y=2.0, description="Test Entry")
        self.session.add(test_entry)
        self.session.commit()
        
        retrieved_entry = self.session.query(SampleModel).filter_by(x=1.0).first()
        self.assertIsNotNone(retrieved_entry)
        self.assertEqual(retrieved_entry.y, 2.0)
        self.assertEqual(retrieved_entry.description, "Test Entry")
    
    def test_insert_null_value(self):
        """Test inserting a record with a null constraint violation."""
        test_entry = SampleModel(x=None, y=2.0, description="Invalid Entry")
        self.session.add(test_entry)
        
        with self.assertRaises(IntegrityError):
            self.session.commit()
        self.session.rollback()
    
    def test_insert_records_via_insert_records_class(self):
        """Test inserting a record using the InsertRecords class."""
        test_entry = SampleModel(x=3.0, y=4.0, description="Inserted via InsertRecords")
        InsertRecords.create_tables(test_entry)
        
        retrieved_entry = self.session.query(SampleModel).filter_by(x=3.0).first()
        self.assertIsNotNone(retrieved_entry)
        self.assertEqual(retrieved_entry.y, 4.0)
        self.assertEqual(retrieved_entry.description, "Inserted via InsertRecords")

if __name__ == '__main__':
    # Create a test suite and run it
    suite = unittest.TestLoader().loadTestsFromTestCase(TestDatabaseConnection)
    unittest.TextTestRunner(verbosity=2).run(suite)

test_database_connection (__main__.TestDatabaseConnection.test_database_connection)
Test that the database connection is established. ... ok
test_insert_null_value (__main__.TestDatabaseConnection.test_insert_null_value)
Test inserting a record with a null constraint violation. ... ok
test_insert_record (__main__.TestDatabaseConnection.test_insert_record)
Test inserting a record into the database. ... ok
test_insert_records_via_insert_records_class (__main__.TestDatabaseConnection.test_insert_records_via_insert_records_class)
Test inserting a record using the InsertRecords class. ... ok

----------------------------------------------------------------------
Ran 4 tests in 0.040s

OK
