In [12]:
import pandas as pd
import numpy as np
from utilits import read_initial_csv

In [13]:
def plot_n_random_samples_interpolate_vs_initial_data(
        path_to_file,
        n_grid,
        interpolator_names,
        n_samples,
        seed = 42
    ):
    """Plot n random samples from initial dataframe VS interpolated data"""
    import random

    initial_data = read_initial_csv(
        path_to_file=path_to_file,
        columns_list=['mjd', 'mag', 'magerr']
    )
    
    indexs = random.sample(range(initial_data.shape[0]), n_samples)
    
    for i in indexs:
        for y_label in ['mag', 'magerr']:
            for interpolator_name in interpolator_names:
                x = initial_data.loc[i, 'mjd']
                y = initial_data.loc[i, y_label]
                interpolator = fit_interpolator(interpolator_name, x, y)
                xnew = np.linspace(x.min(), x.max(), n_grid)
                ynew = interpolator(xnew)
                xdata = [x, xnew]
                ydata = [y, ynew]
                lst = [[x, y, 'real'],
                        [xnew, ynew, 'iterpolate']]
                df = pd.DataFrame(lst, columns=['x', 'y', 'label'])
                plot_real_VS_interpolate(
                    df,
                    y_label=y_label,
                    title=(
                        f"Variable {y_label} TIC {initial_data.loc[i, 'TIC']}"
                        f" cadence_id {initial_data.loc[i, 'cadence_id']}"
                        f" interpolator {interpolator_name}"
                    )
                )

def fit_interpolator(interpolator, x, y):
    """Fit interpolator for initial data"""
    from scipy.interpolate import CubicSpline, UnivariateSpline, Akima1DInterpolator

    interpolator_object = None
    if interpolator == "CubicSpline":
        interpolator_object = CubicSpline(x, y)
    elif interpolator == "Akima1DInterpolator":
        interpolator_object = Akima1DInterpolator(x, y)
    elif interpolator == "UnivariateSpline":
        interpolator_object = UnivariateSpline(x, y, s=0.01)
    else:
        raise ValueError("Undefined interpolator name")
    return interpolator_object


def plot_real_VS_interpolate(df, y_label, title):
    """Plot data via plotly library"""
    import plotly.graph_objects as go
    import os
    if not os.path.exists("images"):
        os.mkdir("images")

    fig = go.Figure()
    for index, row in df.iterrows():
        fig.add_trace(
            go.Scatter(
                x=row['x'],
                y=row['y'],
                mode='lines',
                name=row['label']
            )
        )

    fig.update_layout(
        title=title,
        xaxis_title='MJD',
        yaxis_title=y_label)
    fig.write_html(f'images/{title}.html')
    fig.show()

In [14]:
plot_n_random_samples_interpolate_vs_initial_data(
    path_to_file='generated_250k.csv',
    n_grid=100,
    interpolator_names=['UnivariateSpline','Akima1DInterpolator','CubicSpline'],
    n_samples=10
)