In [1]:
pip install pandas sqlalchemy




In [2]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import math
from bokeh.plotting import figure, show
from bokeh.io import output_notebook



In [3]:
# Step 1: Create a SQLite database and connect to it using sqlalchemy
engine = create_engine('sqlite:///regression_data.db')
Session = sessionmaker(bind=engine)
session = Session()

In [4]:
# Step 2: Load training data (A) into a single five-column spreadsheet/table in the SQLite database
training_data = pd.read_csv('train.csv')
training_data.to_sql('training_data', con=engine, if_exists='replace', index=False)

400

In [5]:
# Step 3: Load ideal functions (C) into another table in the SQLite database
ideal_functions = pd.read_csv('ideal.csv')
ideal_functions.to_sql('ideal_functions', con=engine, if_exists='replace', index=False)

400

In [6]:
# Step 4: Load test data (B) line-by-line from CSV file, check compiling criteria, and save results in the database
test_data = pd.read_csv('test.csv')

In [7]:
#Helper function to calculate the least square method for a dataset and an ideal function

def mse(y1, y2):
   
    mse = ((y1 - y2) ** 2).mean()
    return mse
    
def choose_ideal(training_data, ideal_functions):
    mean_squared_error = {} 
    for col in training_data.columns[1:]:
        idealmse = []
        for idealcol in ideal_functions.columns[1:]:
            idealmse.append(mse(training_data[col], ideal_functions[idealcol]))
        arr = np.array(idealmse)
        min_index = np.argmin(arr)
        mean_squared_error[col] = ideal_functions.columns[1:][min_index]
    return mean_squared_error





In [8]:
# Helper function to calculate the deviation for training data
def train_dev(training_data, ideal_functions, mean_squared_error):
    dev1 = []
    for row in training_data.iterrows():
        mse = []
        for col in row[1].columns:
            mse.append(mse(row[1][col], ideal_functions.iloc[row[0]][mean_squared_error[col]]))
        max_mse = max(mse)
        max_mse = np.sqrt(2) * max_mse
        dev1.append(max_mse)
    return(dev1)               
                       
                       


In [9]:
# Helper function to calculate the deviation for test data
def test_calculations(test_data, mean_squared_error, ideal_functions, dev1):
    dev2=[]
    for row in test_data.iterrows():
        mse=[]
        for value in mean_squared_error.values():
            mse.append(mse(row[1][value], ideal_functions.iloc[row[0]][value]))
        max_mse=max(mse)    
        dev2.append(max_mse)
    return(dev2) 

In [10]:
# Function to compare deviations
def comparison(dev1, dev2):
    result=[]
    for i in range(len(dev1)):
        if dev1[i]>dev2[i]:
            result.append(0)
        else:
            result.append(i)


In [11]:
# Helper function to plot a line chart
def plot_line_chart(x, y, title):
    output_notebook()
    p = figure(title=title, x_axis_label='x', y_axis_label='y')
    for col in y.columns:
        p.line(x, y[col], legend_label=col, line_width=2)
    p.y_range.start = min(y.min().min(), 0)  # Adjust the y-axis range
    p.y_range.end = max(y.max().max(), 0)
    show(p)


In [15]:
# Plot training data
plot_line_chart(training_data['x'], training_data[['y1', 'y2', 'y3', 'y4']], 'Training Data')


In [16]:
# Plot test data
plot_line_chart(test_data['x'], test_data[['y']], 'Test Data')


In [17]:
# Plot chosen ideal functions
mean_squared_error = choose_ideal(training_data, ideal_functions)

# Convert the mean_squared_error dictionary to a DataFrame
mse_df = pd.DataFrame(list(mean_squared_error.items()), columns=['Column', 'Ideal Function'])

# Plot the chosen ideal functions using the plot_line_chart function
for column, ideal_function in mse_df.items():
    plot_line_chart(training_data['x'], ideal_functions[ideal_function], f'Chosen Ideal Function for {column}')
