# Cycloid Fitting

### Simplified to reduce notebook size

## Load Cyloid Data Points and Bezier Curves

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import curves.bezier as bezier
import curves.fitCurves as fit
import StressTools as tools
import utils
import fitting
import cycloidData
from scipy import stats
from os import path
from datetime import datetime

interior = utils.import_interior('interior1')

TOLERANCE = 1

cycloids = cycloidData.load_all_cycloids()
highResCycloids = cycloidData.load_all_cycloids(points_per_curve=1000)

min_vals = np.array([0, 0.1, 0])
max_vals = np.array([360, 1, 360])
constraints = [
    dict(wrapValue=True, minValue=1e-8, maxValue=1),
    dict(minValue=1e-8, maxValue=1, unstick=True),
    dict(wrapValue=True, minValue=0, maxValue=1)
]

tight_obliquity_contstraints = [
        dict(wrapValue=True, minValue=1e-8, maxValue=1),
        dict(minValue=0.62, maxValue=0.82, unstick=True),
        dict(wrapValue=True, minValue=0, maxValue=1)
    ]

def getConstraints(obliquity, longitude_max=1):
    return [
        dict(wrapValue=True, minValue=1e-8, maxValue=1),
        dict(minValue=obliquity, maxValue=obliquity, unstick=False),
        dict(wrapValue=True, minValue=0, maxValue=longitude_max)
    ]

def get_longitude_only_constraints(obliquity, phase):
    return [
        dict(minValue=phase, maxValue=phase, unstick=False),
        dict(minValue=obliquity, maxValue=obliquity, unstick=False),
        dict(wrapValue=True, minValue=0, maxValue=1)
    ]



### Helper Functions

In [2]:
def logmessage(msg):
    now = datetime.now()
    timestamp = now.strftime("%H:%M:%S")
    
    print(f'[{timestamp}] {msg}')
    

def translate_params(params, minVals, maxVals):
    if len(params) == 3:
        variables = params * (max_vals - min_vals) + min_vals # denormalize
    else:
        variables = params * (max_vals[0:2:] - min_vals[0:2:]) + min_vals[0:2:]

    return variables

def setChartXLimit(points, plt):
    BUFFER_PERCENT = 0.025

    first = points['lon'].max()
    last = points['lon'].min()

    buffer = (first - last) * BUFFER_PERCENT

    plt.xlim(first + buffer, last - buffer)


def check_fit(params, minVals, maxVals, curve, interior, tolerance=0.25, title='',
              verbose=True, path='./output/stressfield.csv.gz'):

    if len(params) == 3:
        variables = params * (max_vals - min_vals) + min_vals # denormalize
    else:
        variables = params * (max_vals[0:2:] - min_vals[0:2:]) + min_vals[0:2:]

    data, loss = fitting.match_stresses(curve,
                                        variables,
                                        interior,
                                        save_stress_field=True,
                                        path=path)
    if verbose:
        plt.figure()
        plt.title(f'{title} - Orientation Match')
        fit_points = data.loc[data['deltaHeading'] < tolerance].copy()

        if len(variables) >= 3:
            fit_points['lon'] = fit_points['lon'] - variables[2]

        plt.plot(curve['lon'], curve['lat'])
        setChartXLimit(curve, plt)

        plt.scatter(fit_points['lon'], fit_points['lat'], alpha=0.3, color='green')

        plt.figure()
        plt.title(f'{title} - Stress Magnitude')
        plt.scatter(fit_points['pointNumber'], fit_points['stress'])

        print(np.array(variables))

    return data

def plot_time(data):
    timeData = data.copy()
    time = np.array(timeData['time'])
    time[time < 180] = time[time < 180] + 360
    timeData['time'] = time

    plt.figure()
    plt.scatter(timeData['pointNumber'], timeData['time'],s=1)
    plt.title("Time")

def analyze_fit(opt, curve, tolerance, name=''):
    plt.figure()
    plt.title('Optimizer Loss Values')
    plt.plot(opt[0])

    display(opt[1])
    display(opt[2])

    params = opt[1]['parameters']
    bestCase = check_fit(params, min_vals, max_vals, curve, interior, tolerance=tolerance,
                         title=f'{name} Best Fit')

    params = opt[2]['parameters']
    finalCase = check_fit(params, min_vals, max_vals, curve, interior, tolerance=tolerance,
                         title=f'{name} Final Fit')


    # Plot time progression
    plot_time(bestCase)

    return bestCase, finalCase

def analyze_params(params, curve, tolerance, name='', verbose=True, folder='./output/', suffix=''):
    fieldPath = path.join(folder, f'{name}{suffix}StressField.csv.gz')
    results = check_fit(params, min_vals, max_vals, curve, interior, tolerance=tolerance,
                       title=f'{name} Highest Probability Fit', verbose=verbose, path=fieldPath)

    if verbose:
        plot_time(results)

    return results

def direct_fit(curve, tolerance, show_plots=True, params=[0, 0, 0]):
    data, loss = fitting.match_stresses(curve, params, interior)
    fit_points = data.loc[data['deltaHeading'] < tolerance].copy()

    if show_plots:
        plt.figure()
        plt.plot(curve['lon'], curve['lat'])
        setChartXLimit(curve, plt)

        plt.scatter(fit_points['lon'], fit_points['lat'], alpha=0.3, color='green')
        plt.figure()
        plt.scatter(fit_points['pointNumber'], fit_points['stress'])

        plot_time(data)

    return data, loss

def process_cycloid(curve,
                    name,
                    paramCount = 2,
                    analysisCurve=None,
                    folder='./output/',
                    iterations=500,
                    constraints=tight_obliquity_contstraints,
                    verbose=True):

    logmessage(f'Processing Cycloid: {name}')
    plt.close('all')
    numParams = paramCount
    start_params = [np.random.rand() for iter in range(numParams)]
    start_params[1] = 0.67

    optimizer = fitting.Adam(alpha=0.05)

    opt = optimizer.minimize(
        fitting.test_stress_parameters,
        curve,
        start_params,
        interior,
        constraints=constraints,
        max_iterations=iterations,
        verbose=verbose,
        batch_size=16
    )

    params = fitting.find_best_parameters(opt)
    fullCurve = analysisCurve if analysisCurve is not None else curve
    
    logmessage(f'Analyzing optimization parameters for {name}')
    bestFit = analyze_params(np.array(params), fullCurve, 0.25, name, verbose, folder)

    # TODO: Handle dynamic parameter list
    cols = ['loss', 'phase', 'obliquity']
    if len(params) == 3:
        cols.append('longitude')
        
    df = pd.DataFrame(opt[3], columns=cols).copy()

    df['phase'] = df['phase'] * (max_vals[0] - min_vals[0]) + min_vals[0]
    df['obliquity'] = df['obliquity'] * (max_vals[1] - min_vals[1]) + min_vals[1]

    df.to_csv(f'{folder}{name}Fits.csv.gz', index=False, encoding='utf-8', compression='gzip')
    bestFit.to_csv(f'{folder}{name}BestFit.csv.gz', index=False, encoding='utf-8', compression='gzip')
    

def process_cycloid_top_fits(curve,
                    name,
                    paramCount = 2,
                    analysisCurve=None,
                    folder='./output/',
                    iterations=500,
                    constraints=constraints,
                    verbose=True,
                    number_of_fits=5):

    numParams = paramCount
    start_params = [np.random.rand() for iter in range(numParams)]
    start_params[1] = 0.67

    optimizer = fitting.Adam(alpha=0.05)

    opt = optimizer.minimize(
        fitting.test_stress_parameters,
        curve,
        start_params,
        interior,
        constraints=constraints,
        max_iterations=iterations,
        verbose=verbose,
        batch_size=16
    )

    cols = ['loss', 'phase', 'obliquity']
    if paramCount == 3:
        cols.append('longitude')
    fitFrame = pd.DataFrame(opt[3],
                            columns=cols).sort_values('loss')[0:number_of_fits]
    fitFrame['FitNumber'] = range(1, number_of_fits + 1)

    fullCurve = analysisCurve if analysisCurve is not None else curve

    for fit in fitFrame.itertuples():
        params = [fit.phase, fit.obliquity]
        if paramCount == 3:
            params.append(fit.longitude)
        bestFit = analyze_params(np.array(params),
                                 fullCurve,
                                 0.25,
                                 name,
                                 verbose,
                                 folder,
                                 suffix=fit.FitNumber)
        bestFit.to_csv(f'{folder}{name}BestFit{fit.FitNumber}.csv.gz',
                       index=False, encoding='utf-8', compression='gzip')

    df = pd.DataFrame(opt[3], columns=cols).copy()

    df['phase'] = df['phase'] * (max_vals[0] - min_vals[0]) + min_vals[0]
    df['obliquity'] = df['obliquity'] * (max_vals[1] - min_vals[1]) + min_vals[1]
    if paramCount == 3:
        df['longitude'] = df['longitude'] * (max_vals[2] - min_vals[2]) + min_vals[2]

    df.to_csv(f'{folder}{name}Fits.csv.gz', index=False, encoding='utf-8', compression='gzip')

    
def process_cycloid_by_name(name, folder='./output/', iterations=3000, constraints=constraints, verbose=False, paramCount=2):
    process_cycloid(cycloids[name].curve,
                    name,
                    analysisCurve=highResCycloids[name].curve,
                    folder=folder,
                    iterations=iterations,
                    constraints=constraints,
                    verbose=verbose,
                    paramCount=paramCount)

## Generate Cyloid Data in Full Obliquity Range

In [3]:
for key in cycloids:
    process_cycloid_by_name(key, 
                            iterations=2000,
                            folder='./output/fullObliquityRange/',
                            verbose=False)

[15:05:10] Processing Cycloid: alex
Iteration 150/2000 -- Loss Output: 1.4412968280813365 -- Moving Avg Loss: 3.187500242991867
	Parameters used: [0.64034691 0.48889579]
Iteration 300/2000 -- Loss Output: 0.06872068050246764 -- Moving Avg Loss: 0.07524279631654662
	Parameters used: [0.71643325 0.56122124]
Iteration 450/2000 -- Loss Output: 0.06574918896083055 -- Moving Avg Loss: 0.062303407176191684
	Parameters used: [0.75027096 0.59339873]
Iteration 600/2000 -- Loss Output: 11.010938307460401 -- Moving Avg Loss: 1.9546183074555403
	Parameters used: [0.01947357 0.8509783 ]
Iteration 750/2000 -- Loss Output: 0.06192930258936101 -- Moving Avg Loss: 0.059950053225756424
	Parameters used: [0.78400875 0.88523679]
Iteration 900/2000 -- Loss Output: 15.01961276388642 -- Moving Avg Loss: 7.748228653643021
	Parameters used: [0.37353927 0.50402265]
Iteration 1050/2000 -- Loss Output: 0.06493183854299693 -- Moving Avg Loss: 0.06120119246017501
	Parameters used: [0.79175633 0.90334334]
Iteration 1

## Non-Optimized Fits 

* 0.25 Deg Obliquity
* Phases 0, 60, 120, 180, 240 and 300 degrees

In [4]:
losses = []
folder = './output/lockedFits/'

OBLIQUITY = 0.25
phases = [0, 60, 120, 180, 240, 300]

for current in cycloids:
    for phase in phases:
        print(f'Processing phase {phase}')
        data, loss = direct_fit(cycloids[current].curve, 
                                0.25, 
                                show_plots=False, 
                                params=[phase, OBLIQUITY, 0])

        losses.append(dict(cycloid=current, loss=loss, phase=phase))
        print(current,'\t-', loss)

        filename = f'{folder}{current}-phase{phase}.csv.gz'
        data.to_csv(filename, index=False, compression='gzip')

lossFrame = pd.DataFrame(losses)
filename = f'{folder}Losses.csv'
lossFrame.to_csv(filename, index=False)

Processing phase 0
alex 	- 1.101083245680132
Processing phase 60
alex 	- 1.4971956846431838
Processing phase 120
alex 	- 0.6598343140300722
Processing phase 180
alex 	- 0.7702316207406991
Processing phase 240
alex 	- 0.43309652602466875
Processing phase 300
alex 	- 0.5071788920359364
Processing phase 0
carly 	- 0.35328613497182065
Processing phase 60
carly 	- 0.21267623114570441
Processing phase 120
carly 	- 0.24922552695109376
Processing phase 180
carly 	- 0.538335195440918
Processing phase 240
carly 	- 0.8777948253803868
Processing phase 300
carly 	- 1.8209416009303054
Processing phase 0
cilicia 	- 0.012293092595647051
Processing phase 60
cilicia 	- 0.01239612808496264
Processing phase 120
cilicia 	- 0.012877744702335128
Processing phase 180
cilicia 	- 0.013864483392336313
Processing phase 240
cilicia 	- 0.011287991762470551
Processing phase 300
cilicia 	- 0.011243036088465809
Processing phase 0
delphi 	- 0.013205383710725268
Processing phase 60
delphi 	- 0.013160345707482517
Process

## Direct Fits (no phase or obliquity) for all cycloids

In [5]:
losses = []
folder = './output/directFits/'

for current in cycloids:
    data, loss = direct_fit(cycloids[current].curve, 0.25, show_plots=False)

    losses.append(dict(cycloid=current, loss=loss))
    print(current,'\t-', loss)

    filename = f'{folder}{current}.csv.gz'
    data.to_csv(filename, index=False, compression='gzip')

lossFrame = pd.DataFrame(losses)
filename = f'{folder}directLosses.csv'
lossFrame.to_csv(filename, index=False)

alex 	- 0.90675499538864
carly 	- 0.4679169956204177
cilicia 	- 0.011850136742198897
delphi 	- 0.01290892547457404
dirk 	- 2.1386070161654858
mira 	- 2.261342358536797
odessa 	- 0.6678619449895173
sidon 	- 0.010174794559418736
tyrrel 	- 0.8524575485454637
yaphet 	- 0.21000699087411415


## Fits with Tight Obliquity

In [6]:
for cycloid in cycloids:
    process_cycloid_top_fits(cycloids[cycloid].curve,
                             cycloid,
                             folder='./output/tightObliquity/',
                             iterations=2000,
                             constraints=tight_obliquity_contstraints,
                             number_of_fits=5,
                             paramCount=2,
                             verbose=False
                            )

Iteration 150/2000 -- Loss Output: 8.028798871030212 -- Moving Avg Loss: 7.161403804839364
	Parameters used: [0.03865885 0.64632175]
Iteration 300/2000 -- Loss Output: 0.0849318188792649 -- Moving Avg Loss: 0.2742772959311655
	Parameters used: [0.64213435 0.6557327 ]
Iteration 450/2000 -- Loss Output: 0.07604066554203465 -- Moving Avg Loss: 0.2056619535944901
	Parameters used: [0.63431565 0.75516557]
Iteration 600/2000 -- Loss Output: 0.06912360607798695 -- Moving Avg Loss: 0.2669931409451784
	Parameters used: [0.64196153 0.67627278]
Iteration 750/2000 -- Loss Output: 0.0702255251940562 -- Moving Avg Loss: 0.1583508998035677
	Parameters used: [0.63486726 0.73519788]
Iteration 900/2000 -- Loss Output: 0.09585557062183099 -- Moving Avg Loss: 6.030047399311965
	Parameters used: [0.80287311 0.67053078]
Iteration 1050/2000 -- Loss Output: 0.11422449818758477 -- Moving Avg Loss: 0.24389000835591115
	Parameters used: [0.63916597 0.64868204]
Iteration 1200/2000 -- Loss Output: 0.05028402339152

## Fits with Specific Obliquity

In [7]:
OBLIQUITY = 0.25
constraints = getConstraints(OBLIQUITY, longitude_max=0)

for cycloid in cycloids:
    process_cycloid_top_fits(cycloids[cycloid].curve,
                             cycloid,
                             folder='./output/lockedObliquity025/',
                             iterations=2000,
                             constraints=constraints,
                             number_of_fits=5,
                             paramCount=2,
                             verbose=False
                            )
    
# process_cycloid_top_fits(cycloids['alex'].curve,
#                          cycloid,
#                          folder='./output/lockedObliquity025/',
#                          iterations=2000,
#                          constraints=constraints,
#                          number_of_fits=5,
#                          paramCount=2,
#                          verbose=False
#                         )

Iteration 150/2000 -- Loss Output: 1.4248075384285983 -- Moving Avg Loss: 1.1883207512159861
	Parameters used: [0.74257724 0.25      ]
Iteration 300/2000 -- Loss Output: 2.2319747593251553 -- Moving Avg Loss: 0.8793087615137398
	Parameters used: [0.67137518 0.25      ]
Iteration 450/2000 -- Loss Output: 1.8551155894105715 -- Moving Avg Loss: 1.1156791536528265
	Parameters used: [0.7431446 0.25     ]
Iteration 600/2000 -- Loss Output: 2.2753502336085654 -- Moving Avg Loss: 1.6835541664231528
	Parameters used: [0.54834402 0.25      ]
Iteration 750/2000 -- Loss Output: 2.1466281134023473 -- Moving Avg Loss: 2.8258506444263882
	Parameters used: [0.58251043 0.25      ]
Iteration 900/2000 -- Loss Output: 1.8328936090408587 -- Moving Avg Loss: 1.0576207875360288
	Parameters used: [0.71936988 0.25      ]
Iteration 1050/2000 -- Loss Output: 5.922528065039167 -- Moving Avg Loss: 9.893401589187995
	Parameters used: [0.18281204 0.25      ]
Iteration 1200/2000 -- Loss Output: 4.039426328330896 -- M

## Testng with Longitude translation

In [8]:
for key in cycloids:
    process_cycloid_by_name(key, 
                            iterations=3000,
                            folder='./output/nsr/',
                            paramCount=3,
                            verbose=False)

[16:49:14] Processing Cycloid: alex
Iteration 150/3000 -- Loss Output: 3.993348575793139 -- Moving Avg Loss: 6.066777333419317
	Parameters used: [0.46012209 0.81010718 0.07752626]
Iteration 300/3000 -- Loss Output: 3.4830213137547266 -- Moving Avg Loss: 8.58269274451601
	Parameters used: [0.49358162 0.27003794 0.10659239]
Iteration 450/3000 -- Loss Output: 4.438351592889975 -- Moving Avg Loss: 7.21941105413801
	Parameters used: [0.4875027  0.55898334 0.17224785]
Iteration 600/3000 -- Loss Output: 6.5211076353545 -- Moving Avg Loss: 7.1587789803364394
	Parameters used: [0.46812296 0.12196166 0.11444358]
Iteration 750/3000 -- Loss Output: 0.2709421453561012 -- Moving Avg Loss: 0.24088192288309382
	Parameters used: [0.0180393  0.07847188 0.66818887]
Iteration 900/3000 -- Loss Output: 1.4457301532433176 -- Moving Avg Loss: 4.56963180786252
	Parameters used: [0.04175198 0.12000308 0.69987714]
Iteration 1050/3000 -- Loss Output: 0.6666224419993874 -- Moving Avg Loss: 5.516989568703891
	Param

## Experiments

In [None]:
opt = pd.DataFrame(output[3], columns=['loss', 'phase', 'obliquity', 'longitude'])

df = opt.loc[opt['loss'] < 1]

# Find loss threshold
loss_hist = np.histogram(df['loss'], bins=100, density=True)
index = np.argmax(loss_hist[0])
threshold = loss_hist[1][index + 1]

In [None]:
best_fits = df.loc[df['loss'] <= threshold]
param_hist = np.histogramdd((best_fits['phase'], best_fits['obliquity'], best_fits['longitude']), bins=50, density=True)

# param_hist = param_hist = np.histogram2d(best_fits['phase'], best_fits['obliquity'], bins=50, density=True)

In [None]:
index = np.unravel_index(np.argmax(param_hist[0]), param_hist[0].shape)
phase = np.average(param_hist[1][0][index[0]:index[0] + 2])
obliquity = np.average(param_hist[1][1][index[1]:index[1] + 2])
longitude = np.average(param_hist[1][2][index[2]:index[2] + 2])

In [None]:
phase, obliquity, longitude

In [None]:
param_hist_3d[1][2][index[2]:index[2] + 2]

loss_hist[0]

In [None]:
np.argmax(loss_hist[0])

Compare 2d vs dd histogram values

In [None]:
# 2D
param_hist = np.histogram2d(best_fits['phase'], best_fits['obliquity'], bins=50, density=True)
index = np.unravel_index(np.argmax(param_hist[0]), param_hist[0].shape)
phase = np.average(param_hist[1][index[0]:index[0] + 2])
obliquity = np.average(param_hist[2][index[1]:index[1] + 2])

print('2D')
display((phase, obliquity))

#3d
param_hist = np.histogramdd((best_fits['phase'], best_fits['obliquity'], best_fits['longitude']), bins=50, density=True)
index = np.unravel_index(np.argmax(param_hist[0]), param_hist[0].shape)
phase = np.average(param_hist[1][0][index[0]:index[0] + 2])
obliquity = np.average(param_hist[1][1][index[1]:index[1] + 2])
longitude = np.average(param_hist[1][2][index[2]:index[2] + 2])

print('3D')
display((phase, obliquity, longitude))

### K-Means Clustering

In [None]:
from sklearn.cluster import KMeans

def find_best_parameters_kmeans(optimize_history):
    # Filter by low loss
    threshold = optimize_history['loss'].quantile(0.10)
    best_fits = optimize_history.loc[optimize_history['loss'] <= threshold]

    # Applying K-Means clustering
    kmeans = KMeans(n_clusters=5).fit(best_fits[['phase', 'obliquity']])

    # Finding the cluster with the highest frequency
    labels, counts = np.unique(kmeans.labels_, return_counts=True)
    most_common_cluster = labels[np.argmax(counts)]

    # Calculating the centroid of the most common cluster
    phase, obliquity = kmeans.cluster_centers_[most_common_cluster]

    return phase, obliquity


In [None]:
find_best_parameters_kmeans(opt)

In [None]:
opt['loss'].quantile(0.10)
df = opt[opt['loss'] <= 0.05409826806047949]
print(len(df))

kmeans = KMeans(n_clusters=6, n_init='auto').fit(df[['phase', 'obliquity', 'longitude']])

In [None]:
np.unique(kmeans.labels_, return_counts=True)
labels, counts = np.unique(kmeans.labels_, return_counts=True)
most_common_cluster = labels[np.argmax(counts)]

In [None]:
kmeans.cluster_centers_[most_common_cluster]

In [None]:
kmeans.inertia_

In [None]:
threshold = opt['loss'].quantile(0.10)
best_fits = opt.loc[opt['loss'] <= threshold]

In [None]:
len(best_fits)

In [None]:
23 // 10