In [None]:
import pandas as pd
import re
from io import StringIO
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
from scipy.optimize import minimize
import math

In [None]:
# read the txt into a cleaned dataframe
df_overhead = pd.read_csv(
    'overhead_data.txt',
    sep = r'\s+',
    comment = '#',
    names = ['month', 'y_overhead_pct']
)

In [None]:
# convert month to integer
df_overhead['month'] = df_overhead['month'].astype(int)
print(df_overhead['month'])

In [None]:
# extract x and y values
x_overhead = df_overhead['month'].values
y_overhead = df_overhead['y_overhead_pct'].values

In [None]:
# define polynomial function
def polynomial(theta, x):
    a, b, c = theta
    return a * x**2 + b * x + c

In [None]:
# define mean squared error loss function
def loss_mse(theta, x, y):
    y_pred = polynomial(theta, x)
    error = y_pred - y
    return np.mean(error**2)

In [None]:
# initial guess for theta
theta0_overhead = np.array([0.1, 0.1, 0.1])

In [None]:
# minimize the loss function to find optimal parameters for the polynomial model
result_overhead = minimize(loss_mse, theta0_overhead, args=(x_overhead, y_overhead))
print(result_overhead)

In [None]:
# extract fitted parameters 
theta_fit_overhead = result_overhead.x
a, b, c = theta_fit_overhead

print(a, b, c)

In [None]:
# plot the data and the fitted polynomial curve
plt.figure(figsize=(10, 6))
xx = np.linspace(x_overhead.min(), x_overhead.max(), 100)
y_fit = polynomial(theta_fit_overhead, xx)

In [None]:
# plot data points and fitted curve
plt.scatter(x_overhead, y_overhead, label = "overhead")
plt.plot(xx, y_fit, label = "test line", color = "red")