In [1]:
### find breakpoints

In [None]:
import pandas as pd
import pwlf
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df_composer = pd.read_csv("complexity and accumulation for cakephp_15segments_RMSE")

In [None]:
# Prepare the data for pwlf
x = df_composer['accumulative_raw'].values
y = df_composer['complexity_raw'].values

In [None]:
# Fit piecewise linear model with a chosen number of segments
my_pwlf = pwlf.PiecewiseLinFit(x, y)
num_segments = 30
breakpoints = my_pwlf.fitfast(num_segments)

breakpoints

In [None]:
my_pwlf.fit_with_breaks(breakpoints)

In [None]:
### plot the graph 

In [None]:
# Plot raw data
plt.figure(figsize=(12,5))
plt.scatter(x, y, s=8, label='raw')

# For each segment, fit a simple linear model on points within breakpoint interval and plot the line
for i in range(len(breakpoints)-1):
    x0, x1 = breakpoints[i], breakpoints[i+1]
    mask = (x >= x0) & (x <= x1)
    if mask.sum() < 2:
        continue
    Xi = x[mask]
    Yi = y[mask]
    X1 = np.vstack([np.ones_like(Xi), Xi]).T
    beta, *_ = np.linalg.lstsq(X1, Yi, rcond=None)
    a, b = beta[0], beta[1]
    xs = np.linspace(Xi.min(), Xi.max(), 200)
    ys = a + b*xs
    plt.plot(xs, ys, linewidth=2, label=f'segment {i+1}' if i==0 else None)

# draw vertical lines at breakpoints (except first & last if out of range)
for bp in breakpoints[1:-1]:
    plt.axvline(bp, linestyle='--', alpha=0.3)

plt.title('Piecewise linear fit using your pwlf breakpoints %s segments)' %(str(num_segments)))
plt.xlabel('accumulative complexity')
plt.ylabel('complexity')
plt.grid(True, linestyle='--', alpha=0.5)
plt.legend()
plt.show()