In [None]:
from pathlib import Path
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


In [None]:
DATA_PATH = '../01_extracted_features/internal_datasets/all_features_truncated.csv'
data_path = Path(DATA_PATH)

In [None]:
def save_figure(fig, path, dpi=300):
    """
    Save a figure to a file.
    """
    # Ensure parent directory exists or create it
    path.parent.mkdir(parents=True, exist_ok=True)
    # Save the figure
    fig.savefig(path, dpi=dpi)


# Load data from the CSV file
data_path = Path(DATA_PATH)
data = pd.read_csv(data_path)

# Extract file names without extensions from the 'filename' column
filenames = [name.split('-')[0].split('_')[1:-1] for name in data['filename'].values]

# Convert extracted filenames to datetime objects, considering only date components
datetimes = [
    datetime.datetime(
        int(name[0]), int(name[1]), int(name[2]), 0, 0, 0
    ) for name in filenames
]

# Add the 'datetimes' column to the DataFrame and sort it by datetime
data['datetimes'] = datetimes
data = data.sort_values(by='datetimes').reset_index(drop=True)

# Convert datetime objects to string format for better readability
data['datetimes_str'] = data['datetimes'].apply(lambda x: str(x))

In [None]:
def plot_timeseries_sns(data, feature, level="replicate"):
    """Plot a timeseries of a feature."""
    fig, ax = plt.subplots(figsize=(20, 10), dpi=300)
    sns.lineplot(data=data, x='datetimes_str', y=feature, hue=level)
    xticklabels = data['datetimes_str'].unique()
    xticklabels = [x.split(' ')[0] for x in xticklabels]
    ax.xaxis.set_ticks(range(len(xticklabels)))
    ax.set_xticklabels(xticklabels, rotation=45)
    
    ax.set_xlabel('Date', fontsize=14)
    ax.set_ylabel(feature, fontsize=14)
    ax.set_title(f"{feature} over time", fontsize=18, fontweight='bold')

    ax.grid()
    plt.tight_layout()
    return fig

In [None]:
data['dataset'].unique()

In [None]:
dataset = 'accessions_dataset1'
data_dataset = data[data['dataset'] == dataset]

data_dataset['class'].unique()

In [None]:
class_ = 'Ws-2'

data_class = data_dataset[data_dataset['class'] == class_]
list(data_class.columns[4:])

In [None]:
feature = 'HoG'

sns_fig = plot_timeseries_sns(data_class, feature, level="replicate")

In [None]:

px.line(data_class, x='datetimes_str', y='HoG', color='replicate')