In [4]:
import numpy as np
from scipy.optimize import curve_fit
from scipy.signal import savgol_filter
import pandas as pd
from sklearn.linear_model import LinearRegression

df = pd.read_csv("cosas_finales/tiempos copy.csv")
df

def func_1(x, a, b):
    return a + b

def func_log(x, a, b):
    return a + b * np.log(x)

def func_n(x, a, b):
    return a + b * x

def func_nlogn(x, a, b):
    return a + b * x * np.log(x)

def func_n2(x, a, b):
    return a + b * x**2

def func_n3(x, a, b):
    return a + b * x**3

functions = [func_1, func_log, func_n, func_nlogn, func_n2, func_n3]
function_names = ["CTE", "LOG", "N", "NLOGN", "N2", "N3"]

errors = 0
errors_per_order = [0] * len(functions)
cases_per_order = [0] * len(functions)

for i in range(0, len(df)):
    # Read current test case
    row = df.iloc[i]
    x_data = np.array([row['tamano'], row['tamano.1'], row['tamano.2']])
    y_data = np.array([row['tiempo'], row['tiempo.1'], row['tiempo.2']])
    index = int(row['tipo'])
    problem_name = row['name']

    # Smooth the data for better fitting?
    #y_data = savgol_filter(y_data, window_length=3, polyorder=2)

    #Detect constant order through linear regression with small slope
    model = LinearRegression()
    x = np.log2(x_data).reshape(-1, 1)
    y = np.log2(y_data)
    model.fit(x, y)
    if(model.coef_[0] < 0.1):
        cases_per_order[0] += 1
        if index != 0:
            print("Problem:", problem_name,"Real order:", function_names[index],"vs Predicted order: CTE")
            errors += 1
            errors_per_order[index] += 1
        continue

    #find best_params
    best_func_id = -1
    best_ssr = np.Infinity

    for j in range(1, len(functions)):
        func_j = functions[j]
        params_j, cov_j = curve_fit(functions[j], x_data, y_data)
        # Calculate the residual sum of squares
        residuals_j = y_data - func_j(x_data, *params_j)
        # Calculate the sum of squares of the residuals
        ssr_j = np.sum(residuals_j**2)
        # Choose the function with the lowest sum of squares of the residuals
        if ssr_j < best_ssr:
            best_ssr = ssr_j
            best_func_id = j
            #best_params = params_j

    cases_per_order[index] += 1
    if best_func_id != index:
        #show only wrong results
        print("Problem:", problem_name,"Real order:", function_names[index], "vs Predicted order:", function_names[best_func_id])
        errors += 1
        errors_per_order[index] += 1


print("Total errors:", errors, "/", len(df), "Error percent:" , (errors/len(df)*100), "%")
for i in range(0, len(errors_per_order)):
    if cases_per_order[i] > 0:
        print("Order", function_names[i], "Errors:", errors_per_order[i], "/", cases_per_order[i], "Error percent:", (errors_per_order[i]/cases_per_order[i]*100), "%")

Problem: 140DanielVallejo Real order: N vs Predicted order: CTE
Problem: 140hnko Real order: N vs Predicted order: CTE
Problem: 140Orgalorexe Real order: N vs Predicted order: CTE
Total errors: 3 / 39 Error percent: 7.6923076923076925 %
Order CTE Errors: 0 / 15 Error percent: 0.0 %
Order N Errors: 3 / 18 Error percent: 16.666666666666664 %
Order N2 Errors: 0 / 6 Error percent: 0.0 %
