In [82]:
import numpy as np
import pandas as pd
import os
from typing import List, Tuple, Callable

pd.set_option('display.precision', 12)  # Increase decimal precision
pd.set_option('display.width', 300)     # Wider display
pd.set_option('display.max_columns', None)  # Show all column

# Least Sum Square

## Algorithm

* Input:
    * A set of $n$ data points $D = \{(x_i, y_i)\}_{i=1}^n$.
    * A set of $m$ basis functions $G = \{\varphi_j(x)\}_{j=1}^m$.

* Output: A coefficient vector $a = [a_1, \dots, a_m]^t$ that minimizes the sum of squared errors $S = \sum_{i=1}^{n} \left(\sum_{j=1}^{m} a_j \varphi_j(x_i) - y_i\right)^2$.

* Steps:

1. Construct the Design Matrix $\Phi$:

    Create an $n \times m$ matrix $\Phi$ where the entry at row $i$, column $j$ is $\Phi_{ij} = \varphi_j(x_i)$.
        $$
        \Phi = \begin{bmatrix}
        \varphi_1(x_1) & \varphi_2(x_1) & \dots & \varphi_m(x_1) \\
        \varphi_1(x_2) & \varphi_2(x_2) & \dots & \varphi_m(x_2) \\
        \vdots & \vdots & \ddots & \vdots \\
        \varphi_1(x_n) & \varphi_2(x_n) & \dots & \varphi_m(x_n)
        \end{bmatrix}
        $$

2. Construct the Target Vector $y$:

    Create an $n \times 1$ column vector $y = [y_1, y_2, \dots, y_n]^t$.

3. Form the Gram Matrix $M$:
    
    Calculate the $m \times m$ matrix $M$ by multiplying the transpose of $\Phi$ by $\Phi$.
        $$M = \Phi^t \Phi$$

4. Form the Target Vector $b'$:}

    Calculate the $m \times 1$ vector $b'$ by multiplying the transpose of $\Phi$ by $y$.
        $$b' = \Phi^t y$$


5. Solve the Normal Equations:
    Solve the $m \times m$ system of linear equations $Ma = b'$ for the unknown $m \times 1$ coefficient vector $a$.

* Output $a$:
    * $a = M^{-1} b'$
    * The final approximating function is $g(x) = a_1\varphi_1(x) + a_2\varphi_2(x) + \dots + a_m\varphi_m(x)$.

In [83]:
def calculate_least_squares_error(
    y_vals: np.ndarray, 
    b_prime: np.ndarray, 
    a_coeffs: np.ndarray
) -> Tuple[float, float, float]:
    """
    Calculates the SSE, MSE, and RMSE of the fit.
    
    Args:
        y_vals (np.ndarray): The n x 1 target vector y.
        b_prime (np.ndarray): The m x 1 vector (Phi^T * y).
        a_coeffs (np.ndarray): The m x 1 solution vector a.
        
    Returns:
        (SSE, MSE, RMSE)
    """
    n = len(y_vals)
    
    # 1. [y, y] = y^T * y
    y_t_y = y_vals.T.dot(y_vals).item()
    
    # 2. sum(a_j * [y, phi_j]) = a^T * b'
    a_t_b_prime = a_coeffs.T.dot(b_prime).item()
    
    # 3. SSE = [y, y] - sum(a_j * [y, phi_j])
    sse = y_t_y - a_t_b_prime
    
    # 4. MSE = SSE / n
    mse = sse / n
    
    # 5. RMSE = sqrt(MSE)
    rmse = np.sqrt(mse)
    
    return sse, mse, rmse

In [84]:
def least_squares_fit(
    points: List[Tuple[float, float]], 
    basis_funcs: List[Callable[[float], float]]
) -> np.ndarray:
    """
    Finds the coefficients for a set of basis functions that best fit
    the given (x, y) data points using the method of Normal Equations.

    Args:
        points: A list of (x, y) data tuples.
        basis_funcs: A list of lambda functions representing the basis [g1, g2, ..., gm].

    Returns:
        np.ndarray: The column vector of optimal coefficients [a1, a2, ..., am].
    """
    
    # --- 1. Construct Input Matrices and Vectors ---
    n = len(points)
    m = len(basis_funcs)
    
    if n < m:
        print("Warning: More basis functions (m) than data points (n). "
              "The system is underdetermined.")

    x_vals = np.array([p[0] for p in points])
    y_vals = np.array([p[1] for p in points]).reshape(-1, 1) # y must be n x 1

    df_input = pd.DataFrame(points, columns=['x_i', 'y_i'])

    # --- 2. Construct Design Matrix Phi (n x m) ---
    # Phi_ij = phi_j(x_i)
    phi_matrix = np.zeros((n, m))
    for j, func in enumerate(basis_funcs):
        phi_matrix[:, j] = func(x_vals)
    
    df_phi = pd.DataFrame(phi_matrix, columns=[f"\u03C6_{j+1}(x)" for j in range(m)])

    # --- 3. Form Gram Matrix M = Phi^T * Phi (m x m) ---
    phi_t = phi_matrix.T
    M = phi_t.dot(phi_matrix)
    
    df_M = pd.DataFrame(M)

    # --- 4. Form Target Vector b' = Phi^T * y (m x 1) ---
    b_prime = phi_t.dot(y_vals)
    
    df_b_prime = pd.DataFrame(b_prime)

    # --- 5. Solve the Normal Equations Ma = b' ---
    # Use np.linalg.solve, which is a robust and fast solver (like Gauss elimination)
    # as per your instruction to use core numpy/pandas.
    try:
        a_coeffs = np.linalg.solve(M, b_prime)
    except np.linalg.LinAlgError:
        print("Error: The matrix M is singular. "
              "The basis functions may be linearly dependent.")
        return None

    df_a = pd.DataFrame(a_coeffs, columns=['a_j'])
    df_a.index = [f"a_{j+1}" for j in range(m)]

    # --- 6. Calculate Error Metrics ---
    sse, mse, rmse = calculate_least_squares_error(y_vals, b_prime, a_coeffs)

    return df_phi, df_M, df_b_prime, df_a, a_coeffs, sse, mse, rmse

## Result

In [85]:
# --- Example Usage ---

points_data = [
    (np.deg2rad(30), 2.611),
    (np.deg2rad(60), 3.102),
    (np.deg2rad(90), 2.912),
    (np.deg2rad(120), 2.105),
    (np.deg2rad(150), 0.612),
    (np.deg2rad(180), -1.321),
    (np.deg2rad(210), -1.906),
    (np.deg2rad(240), -2.412),
    (np.deg2rad(270), -2.802),
    (np.deg2rad(300), -2.703),
    (np.deg2rad(330), -1.610),
    (np.deg2rad(360), -1.500),
]

basis_set = [
    lambda x: 1,      
    lambda x: np.cos(x),      
    lambda x: np.cos(2*x), 
    lambda x: np.sin(x), 
    lambda x: np.sin(2*x)
]

df_phi, df_M, df_b_prime, df_a, a_coeffs, sse, mse, rmse = least_squares_fit(points_data, basis_set)

In [86]:
# Construct Design Matrix Phi (n x m)
df_phi.style

Unnamed: 0,φ_1(x),φ_2(x),φ_3(x),φ_4(x),φ_5(x)
0,1.0,0.866025,0.5,0.5,0.866025
1,1.0,0.5,-0.5,0.866025,0.866025
2,1.0,0.0,-1.0,1.0,0.0
3,1.0,-0.5,-0.5,0.866025,-0.866025
4,1.0,-0.866025,0.5,0.5,-0.866025
5,1.0,-1.0,1.0,0.0,-0.0
6,1.0,-0.866025,0.5,-0.5,0.866025
7,1.0,-0.5,-0.5,-0.866025,0.866025
8,1.0,-0.0,-1.0,-1.0,0.0
9,1.0,0.5,-0.5,-0.866025,-0.866025


In [87]:
## Form Gram Matrix M = Phi^T * Phi (m x m)
df_M.style

Unnamed: 0,0,1,2,3,4
0,12.0,-0.0,-0.0,-0.0,0.0
1,-0.0,6.0,-0.0,0.0,-0.0
2,-0.0,-0.0,6.0,-0.0,-0.0
3,-0.0,0.0,-0.0,6.0,-0.0
4,0.0,-0.0,-0.0,-0.0,6.0


In [88]:
## Form Target Vector b' = Phi^T * y (m x 1)
df_b_prime.style

Unnamed: 0,0
0,-2.912
1,2.161528
2,-3.1235
3,18.022614
4,2.590282


In [89]:
## Solve the Normal Equations Ma = b'
df_a.style

Unnamed: 0,a_j
a_1,-0.242667
a_2,0.360255
a_3,-0.520583
a_4,3.003769
a_5,0.431714


In [90]:
## Error
print(f"Sum of Squared Errors (S_min): {sse: .12f}")
print(f"Mean Squared Error (MSE):     {mse: .12f}")
print(f"Root Mean Square Error (RMSE):  {rmse: .12f}")

Sum of Squared Errors (S_min):  2.554753192516
Mean Squared Error (MSE):      0.212896099376
Root Mean Square Error (RMSE):   0.461406652939


# Non-linear 

## Reading data

In [91]:
#Reading file

def parse_xy_data(filepath, delimiter=None):
    """
    Reads a CSV-like file with x, y data and returns a list of (x, y) tuples.

    This function is designed to handle different delimiters (like ';' or ' ')
    and assumes that commas (',') are used as decimal separators, based on
    the provided image.

    Args:
        filepath (str): The path to the data file.
        delimiter (str, optional): The column delimiter (e.g., ';', ' '). 
                                   If None, the function will try to 
                                   auto-detect it.

    Returns:
        list: A list of (x, y) float tuples.
              Returns an empty list if the file cannot be read or is empty.
    """
    data_points = []
    detected_delimiter = delimiter
    
    # --- 1. Delimiter Sniffing (if not provided) ---
    if detected_delimiter is None:
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                # Read the first non-empty line to guess
                first_line = ""
                for line in f:
                    first_line = line.strip()
                    if first_line:
                        break
                
                if ';' in first_line:
                    detected_delimiter = ';'
                elif ' ' in first_line:
                    # Check if it's likely a space delimiter
                    parts = re.split(r'\s+', first_line)
                    if len(parts) == 2:
                        try:
                            # Try to parse to see if it makes sense
                            float(parts[0].replace(',', '.'))
                            float(parts[1].replace(',', '.'))
                            detected_delimiter = ' '
                        except (ValueError, IndexError):
                             # Not a valid 2-column space-delimited float line
                             pass
                
                if detected_delimiter is None and ',' in first_line:
                    # Comma is the last guess, as it's ambiguous with decimal
                    detected_delimiter = ','
                
                if detected_delimiter is None:
                    # Final fallback based on your image
                    print("Warning: Could not auto-detect delimiter. Falling back to ';'.")
                    detected_delimiter = ';'
        except Exception as e:
            print(f"Error opening/reading file for sniffing: {e}")
            return [] # Return empty list on error
    
    print(f"Using delimiter: '{detected_delimiter}'")

    # --- 2. File Parsing ---
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            for line_number, line in enumerate(f, 1):
                line = line.strip()
                if not line or line.startswith('#'):
                    continue # Skip empty lines or comment lines

                # Split the line by the detected delimiter
                if detected_delimiter == ' ':
                    # Use regex split for spaces to handle multiple spaces
                    parts = re.split(r'\s+', line)
                else:
                    parts = line.split(detected_delimiter)

                # Ensure we have exactly two columns
                if len(parts) == 2:
                    x_str, y_str = parts
                    
                    try:
                        # KEY STEP: Replace comma with dot for float conversion
                        x_val = float(x_str.strip().replace(',', '.'))
                        y_val = float(y_str.strip().replace(',', '.'))
                        data_points.append((x_val, y_val))
                    except ValueError as e:
                        # Warn if conversion to float fails
                        print(f"Warning: Could not parse numbers on line {line_number}: '{line}'. Error: {e}")
                else:
                    # Warn if the line doesn't have exactly two parts
                    print(f"Warning: Skipping malformed line {line_number}: '{line}'. Expected 2 columns, found {len(parts)}")
    
    except FileNotFoundError:
        print(f"Error: File not found at '{filepath}'")
        return []
    except Exception as e:
        print(f"An error occurred while reading the file: {e}")
        return []

    return data_points

In [92]:
print("--- Example: Reading '19data.csv' ---")
    
# You would replace '19data.csv' with the path to your actual file
file_to_read = '0.csv' 
    
# Check if the file exists before trying to read it
if os.path.exists(file_to_read):
    # Call the function to parse the file.
    # It will try to auto-detect the delimiter.
    all_points = parse_xy_data(file_to_read)
        
    if all_points:
        print(f"Successfully parsed {len(all_points)} data points:")
        df_input = pd.DataFrame(all_points, columns=['x', 'y'])
        print(df_input.to_string(index=False))
    else:
        print(f"Could not parse any data points from '{file_to_read}'.")
        print("Please check the file format and warnings above.")
            
else:
    print(f"Error: The file '{file_to_read}' was not found.")
    print("Please create this file or change 'file_to_read' variable")
    print("to point to your existing data file.")

--- Example: Reading '19data.csv' ---
Using delimiter: ','
Successfully parsed 141 data points:
   x          y
 1.0  2.2621524
 1.1  2.4367657
 1.2  2.5980224
 1.3  2.7453105
 1.4  2.8781576
 1.5  2.9962354
 1.6  3.0993633
 1.7  3.1875101
 1.8  3.2607942
 1.9  3.3194825
 2.0  3.3639878
 2.1  3.3948645
 2.2  3.4128035
 2.3  3.4186244
 2.4  3.4132685
 2.5  3.3977883
 2.6  3.3733377
 2.7  3.3411602
 2.8  3.3025764
 2.9  3.2589711
 3.0  3.2117790
 3.1  3.1624709
 3.2  3.1125386
 3.3  3.0634802
 3.4  3.0167850
 3.5  2.9739188
 3.6  2.9363090
 3.7  2.9053306
 3.8  2.8822923
 3.9  2.8684234
 4.0  2.8648616
 4.1  2.8726418
 4.2  2.8926854
 4.3  2.9257912
 4.4  2.9726276
 4.5  3.0337259
 4.6  3.1094747
 4.7  3.2001164
 4.8  3.3057443
 4.9  3.4263024
 5.0  3.5615852
 5.1  3.7112402
 5.2  3.8747712
 5.3  4.0515434
 5.4  4.2407898
 5.5  4.4416186
 5.6  4.6530225
 5.7  4.8738882
 5.8  5.1030082
 5.9  5.3390923
 6.0  5.5807807
 6.1  5.8266579
 6.2  6.0752662
 6.3  6.3251209
 6.4  6.5747245
 6.5  6.

## Modifying the data

In [93]:
def modify_value(original_points: List[Tuple[float, float]],
                 transform: Callable[[float, float], Tuple[float, float]] = lambda x, y: (x, y)
):
    """Apply a user-provided transform to each (x, y) and return the transformed list.

    Args:
        original_points: list of (x, y) tuples.
        transform: callable that takes (x, y) and returns (X, Y).
                   Example: lambda x, y: (x, np.log(y))

    Returns:
        List of transformed (X, Y) tuples.
    """
    transformed_points: List[Tuple[float, float]] = []
    for x, y in original_points:
        try:
            X, Y = transform(x, y)
        except Exception as e:
            raise ValueError(f"Error applying transform to point (x={x}, y={y}): {e}")
        transformed_points.append((X, Y))

    return transformed_points

## Linear

In [94]:
def fit_linear_model(
    transformed_points: List[Tuple[float, float]]
) -> Tuple[float, float]:

    # --- 2. Define the Linear Problem ---
    # We are fitting Y = A*X + B*SINX
    # Basis function for A is phi_1(X) = X
    # Basis function for B is phi_2(X) = SINX
    linear_basis = [
        lambda x: x,  # For A
        lambda x: np.sin(x)   # For B
    ]
    
    # --- 3. Solve for Linear Coefficients (A, B) ---
    # linear_coeffs will be [ [A], [B] ]
    df_phi, df_M, df_b_prime, df_a, linear_coeffs, sse, mse, rmse = least_squares_fit(
        transformed_points, 
        linear_basis
    )
    
    if linear_coeffs is None:
        return None, None
        
    # --- 4. Transform Coefficients Back ---
    A = linear_coeffs[0][0]  # A = a
    B = linear_coeffs[1][0]  # B = b
    
    a = A
    b = B
    
    return A, B, a, b

In [95]:
print("Original (x, y) converted to (X, Y) where X=x, Y=y")
transformed_points = modify_value(all_points, lambda x, y: (x,y))

df_transformed = pd.DataFrame(transformed_points, columns=['X_i', 'Y_i'])

Original (x, y) converted to (X, Y) where X=x, Y=y


In [96]:
# --- Run the Full Process ---
A, B, a,b = fit_linear_model(transformed_points)

In [97]:
print(f"Solved Linear A (a) = {A: .12f}")
print(f"Solved Linear B (b)     = {B: .12f}")
print(f"-> Non-Linear a = A = {a: .12f}")
print(f"-> Non-Linear b = B   = {b: .12f}")
print("-" * 30)
print(f"Final Equation: y = {a:.6f} * x  + {b:.6f} * sinx)")

Solved Linear A (a) =  0.999999346380
Solved Linear B (b)     =  1.500002978916
-> Non-Linear a = A =  0.999999346380
-> Non-Linear b = B   =  1.500002978916
------------------------------
Final Equation: y = 0.999999 * x  + 1.500003 * sinx)


## y = ae^(bx) (a > 0)

In [98]:
def fit_exponential_model(
    transformed_points: List[Tuple[float, float]]
) -> Tuple[float, float]:
    """
    Fits data to y = a * exp(b*x) by linearizing to ln(y) = ln(a) + b*x.
    
    Args:
        original_points: List of (x, y) data tuples.
        
    Returns:
        (a, b): The calculated non-linear coefficients.
    """

    # --- 2. Define the Linear Problem ---
    # We are fitting Y = A + B*X
    # Basis function for A is phi_1(X) = 1
    # Basis function for B is phi_2(X) = X
    linear_basis = [
        lambda x: 1,  # For A
        lambda x: x   # For B
    ]
    
    # --- 3. Solve for Linear Coefficients (A, B) ---
    # linear_coeffs will be [ [A], [B] ]
    df_phi, df_M, df_b_prime, df_a, linear_coeffs, sse, mse, rmse = least_squares_fit(
        transformed_points, 
        linear_basis
    )
    
    if linear_coeffs is None:
        return None, None
        
    # --- 4. Transform Coefficients Back ---
    A = linear_coeffs[0][0]  # A = ln(a)
    B = linear_coeffs[1][0]  # B = b
    
    a = np.exp(A)
    b = B
    
    return A, B, a, b

In [99]:
#--- Example Data ---
example_points = [
    (1, 7.1),
    (2, 27.8),
    (3, 63.1),
    (4, 110),
    (5, 161)
]

In [100]:
print("Original (x, y) converted to (X, Y) where X=x, Y=ln(y)")
transformed_points = modify_value(example_points, lambda x, y: (x, np.log(y)))

df_transformed = pd.DataFrame(transformed_points, columns=['X_i', 'Y_i'])

df_transformed.style

Original (x, y) converted to (X, Y) where X=x, Y=ln(y)


Unnamed: 0,X_i,Y_i
0,1,1.960095
1,2,3.325036
2,3,4.144721
3,4,4.70048
4,5,5.081404


In [101]:
# --- Run the Full Process ---
A, B, a,b = fit_exponential_model(transformed_points)

In [102]:
print(f"Solved Linear A (ln(a)) = {A: .12f}")
print(f"Solved Linear B (b)     = {B: .12f}")
print(f"-> Non-Linear a = e^A = {a: .12f}")
print(f"-> Non-Linear b = B   = {b: .12f}")
print("-" * 30)
print(f"Final Equation: y = {a:.6f} * e^({b:.6f} * x)")

Solved Linear A (ln(a)) =  1.556928208923
Solved Linear B (b)     =  0.761806350697
-> Non-Linear a = e^A =  4.744225569408
-> Non-Linear b = B   =  0.761806350697
------------------------------
Final Equation: y = 4.744226 * e^(0.761806 * x)


## y = ax^b (a>0, x>=0)

In [103]:
def fit_power_law_model(
    transformed_points: List[Tuple[float, float]]
) -> Tuple[float, float]:
    """
    Fits data to y = a * x^b by linearizing to ln(y) = ln(a) + b*ln(x).
    
    Args:
        original_points: List of (x, y) data tuples.
        
    Returns:
        (a, b): The calculated non-linear coefficients.
    """
        
    # --- 2. Define the Linear Problem ---
    # We are fitting Y = A + B*X
    # Basis function for A is phi_1(X) = 1
    # Basis function for B is phi_2(X) = X
    linear_basis = [
        lambda x: 1,  # For A
        lambda x: x   # For B
    ]
    
    # --- 3. Solve for Linear Coefficients (A, B) ---
    # This function will print steps 1-6 for the *linear* problem
    df_phi, df_M, df_b_prime, df_a, linear_coeffs, sse, mse, rmse = least_squares_fit(
        transformed_points, 
        linear_basis,
    )
    
    if linear_coeffs is None:
        return None, None
        
    # --- 4. Transform Coefficients Back ---
    A = linear_coeffs[0][0]  # A = ln(a)
    B = linear_coeffs[1][0]  # B = b
    
    a = np.exp(A)
    b = B
    
    
    return A, B, a, b

In [104]:
#--- Example Data ---
example_points = [
    (1, 7.1),
    (2, 27.8),
    (3, 63.1),
    (4, 110),
    (5, 161)
]


In [105]:
print("Original (x, y) converted to (X, Y) where X=ln(x), Y=ln(y)")
transformed_points = modify_value(example_points, lambda x, y: (np.log(x), np.log(y)))

df_transformed = pd.DataFrame(transformed_points, columns=['X_i', 'Y_i'])

df_transformed.style

Original (x, y) converted to (X, Y) where X=ln(x), Y=ln(y)


Unnamed: 0,X_i,Y_i
0,0.0,1.960095
1,0.693147,3.325036
2,1.098612,4.144721
3,1.386294,4.70048
4,1.609438,5.081404


In [106]:
# --- Run the Full Process ---
A, B, a, b = fit_power_law_model(transformed_points)

In [107]:
print(f"Solved Linear A (ln(a)) = {A: .12f}")
print(f"Solved Linear B (b)     = {B: .12f}")
print(f"-> Non-Linear a = e^A = {a: .12f}")
print(f"-> Non-Linear b = B   = {b: .12f}")
print("-" * 30)
print(f"Final Equation: y = {a:.6f} * x^({b:.6f})")

Solved Linear A (ln(a)) =  1.970942958206
Solved Linear B (b)     =  1.954472616720
-> Non-Linear a = e^A =  7.177441325559
-> Non-Linear b = B   =  1.954472616720
------------------------------
Final Equation: y = 7.177441 * x^(1.954473)
