In [None]:
soup = BeautifulSoup(requests.get("https://coinmarketcap.com/").text, "lxml")
dom = soup.find(string="Dominance").parent.parent.text
dom = unicodedata.normalize("NFKD", dom)
dom

matches = re.findall(r"(\w+): (\d+\.\d+)%", dom)

# Create dictionaries to store the percentages
percentage_dict = {}

for match in matches:
    currency, percentage = match
    percentage_dict[currency] = float(percentage)

print(percentage_dict)

In [None]:
# %%


def smooth_and_detect_peaks_multi(
    df: pd.DataFrame,
    window_daily: int = 12,  # 2 weeks for daily data
    polyorder: int = 2,
    peak_threshold: float = 0.15,
) -> pd.DataFrame:
    """
    Process multiple tokens with mixed daily/hourly data.
    Adds smoothed price and peak detection columns for each token.
    """

    def rolling_savgol(series: pd.Series, window: int) -> pd.Series:
        """
        Forward-only rolling Savitzky-Golay filter.
        At each point t, only uses data from [t-window+1 : t].

        Parameters:
        - series: Price series
        - window: Window size
        - polyorder: Polynomial order
        """
        values = series.values
        result = np.full_like(values, np.nan)

        # First few points just use the original values
        result[: polyorder + 2] = values[: polyorder + 2]

        # For each point, fit polynomial to previous window points
        for i in tqdm(range(polyorder + 2, len(values))):
            # Get window of past data
            start_idx = max(0, i - window + 1)
            window_data = values[start_idx : i + 1]

            if len(window_data) > polyorder + 1:
                try:
                    # Fit polynomial to window
                    x = np.arange(len(window_data))
                    coeffs = np.polyfit(x, window_data, polyorder)

                    # Use polynomial value at last point
                    result[i] = np.polyval(coeffs, len(window_data) - 1)
                except:
                    # If fit fails, use original value
                    result[i] = values[i]
            else:
                result[i] = values[i]

        return pd.Series(result, index=series.index)

    def detect_peaks(smooth_prices: pd.Series, raw_prices: pd.Series, window: int) -> pd.Series:
        """
        Detect peaks and troughs using find_peaks with minimal lookahead
        """
        signals = pd.Series(0, index=smooth_prices.index)
        prices = smooth_prices.values

        # Parameters for find_peaks
        lookahead = 1  # Maximum forward-looking window
        distance = 3  # Minimum samples between peaks
        prominence = 0.0005  # Minimum prominence relative to neighbors

        for i in range(window, len(prices) - lookahead):
            # Get local window of prices
            local_window = prices[max(0, i - window) : i + lookahead + 1]
            center_idx = min(window, i)

            # Adjust prominence based on local volatility
            local_std = np.std(local_window) / np.mean(local_window)
            adaptive_prominence = max(prominence, local_std * 0.5)

            if window >= 480:  # For hourly data
                distance = 4

            # Find peaks in local window
            peak_indices, peak_props = find_peaks(local_window, distance=distance, prominence=adaptive_prominence)

            # Find troughs in inverted window
            trough_indices, trough_props = find_peaks(-local_window, distance=distance, prominence=adaptive_prominence)

            # Check if center point is a peak or trough
            if len(peak_indices) > 0 and center_idx in peak_indices:
                # Confirm peak with slope check
                if all(prices[i] > prices[i - a] for a in range(1, lookahead)) and all(
                    prices[i] > prices[i + a] for a in range(1, lookahead)
                ):
                    signals.iloc[i] = 1

            elif len(trough_indices) > 0 and center_idx in trough_indices:
                # Confirm trough with slope check
                if all(prices[i] < prices[i - a] for a in range(1, lookahead)) and all(
                    prices[i] < prices[i + a] for a in range(1, lookahead)
                ):
                    signals.iloc[i] = -1

        return signals

    def process_token(price_series: pd.Series) -> tuple[pd.Series, pd.Series]:
        """Process single token with mixed frequency data"""

        # Rest of the processing...
        time_deltas = price_series.index.to_series().diff()
        is_hourly = time_deltas <= pd.Timedelta(hours=1)
        is_daily = ~is_hourly

        window_hourly = window_daily * 24

        print(price_series[is_hourly].index[0], price_series[is_hourly].index[-1])
        print(price_series[is_daily].index[0], price_series[is_daily].index[-1])

        # Apply smoothing with appropriate window
        smooth_hourly = rolling_savgol(price_series[is_hourly], window_hourly)
        smooth_daily = rolling_savgol(price_series[is_daily], window_daily)

        # Detect peaks with appropriate window
        peaks_hourly = detect_peaks(smooth_hourly, price_series[is_hourly], window_hourly)
        peaks_daily = detect_peaks(smooth_daily, price_series[is_daily], window_daily)

        # Combine daily and hourly data
        smooth_combined = pd.concat([smooth_daily, smooth_hourly]).sort_index()
        peaks_combined = pd.concat([peaks_daily, peaks_hourly]).sort_index()

        # Drop duplicate indices
        smooth_combined = smooth_combined[~smooth_combined.index.duplicated(keep="last")]
        peaks_combined = peaks_combined[~peaks_combined.index.duplicated(keep="last")]

        return smooth_combined, peaks_combined

    # df = df.copy()

    # Process each token
    token_columns = [col for col in df.columns if col.endswith("_price")]

    df = df.copy()

    # Remove duplicates from main DataFrame index
    df = df[~df.index.duplicated(keep="last")]

    for price_col in token_columns:
        token = price_col.replace("_price", "")

        print(f"Processing {token}...")

        smooth_series, peaks_series = process_token(df[price_col])

        # Align indices before assignment
        df[f"{token}_smooth"] = smooth_series.reindex(df.index)
        df[f"{token}_peaks"] = peaks_series.reindex(df.index)

    return df


savgol_df = smooth_and_detect_peaks_multi(new_master_df)

In [None]:
# %%

from collections import OrderedDict


def add_token_indicators(
    df: pd.DataFrame,
    base_metrics=["price", "total_volume"],
    market_cols=[
        "total_market_cap",
        "total_volume",
        "alt_market_cap",
        "alt_dominance",
        "alt_fgindex",
    ],
) -> pd.DataFrame:
    """
    Adds technical indicators for each token, ensuring no forward-looking bias.

    Args:
            df: DataFrame with columns in format {token}_{metric}
            base_metrics: List of base metrics to process
    Returns:
            DataFrame with ordered columns and indicators
    """

    def expanding_min_max_scale(
        series: pd.Series,
        method: str = "min_max",
        # clip_quantile: bool = False,
        clip_on_min: bool = False,
        clip_value: float = None,
    ) -> pd.Series:
        """
        Rolling scaling using only past data
        """
        if method == "min_max":
            expanding_max = series.expanding().max()
            expanding_min = series.expanding().min()
            if clip_value and not clip_on_min:
                quantile = series.quantile(clip_value)
                expanding_max = expanding_max.clip(upper=quantile)
            if clip_value and clip_on_min:
                quantile = series.quantile(clip_value)
                expanding_min = expanding_min.clip(lower=quantile)

            denominator = expanding_max - expanding_min

            scaled_col = np.where(
                denominator != 0,
                np.clip((series - expanding_min) * 100 / denominator, 0, 100),
                0,
            )
        elif method == "percentile":
            expanding_percentile = series.expanding().apply(
                lambda x: pd.Series(x).rank(pct=True).iloc[-1] * 100, raw=False
            )
            scaled_col = expanding_percentile
        else:
            raise ValueError("Invalid method. Choose 'min_max' or 'percentile'.")

        return scaled_col

    def get_first_hourly_index(series: pd.Series) -> pd.Timestamp:
        """Find the first hourly index in the Series"""
        time_diffs = series.index.to_series().diff()
        hour_index = time_diffs[time_diffs < pd.Timedelta(days=1)].index[0]
        hour_idx = series.index.get_loc(hour_index)
        return hour_idx

    def calculate_roc(series: pd.Series, window: int) -> pd.Series:
        """
        Vectorized ROC calculation handling both daily and hourly data

        Args:
                series: Input time series
                window: Window size in days
        Returns:
                Rate of change series
        """
        result = pd.Series(index=series.index, dtype=float)
        hour_loc = get_first_hourly_index(series)

        # Daily data calculation
        daily_slice = slice(None, hour_loc)
        result.iloc[daily_slice] = series.iloc[daily_slice] / series.iloc[daily_slice].shift(window) - 1

        # Hourly data calculation
        hourly_slice = slice(hour_loc, None)
        hourly_window = window * 24
        result.iloc[hourly_slice] = series.iloc[hourly_slice] / series.iloc[hourly_slice].shift(hourly_window) - 1

        return result.fillna(0)

        # print(get_first_hourly_index(series))
        # dp(series.iloc[:get_first_hourly_index(series)], series.iloc[get_first_hourly_index(series):])

        # return (series / series.shift(window) - 1).fillna(0)

    def calculate_acceleration(series: pd.Series, short_window: int, long_window: int) -> pd.Series:
        """Vectorized acceleration calculation"""
        return (calculate_roc(series, short_window) - calculate_roc(series, long_window)).fillna(0)

    # Extract unique tokens
    tokens: Set[str] = {col.split("_")[0] for col in df.columns if "price" in col}

    # Initialize new DataFrame to maintain column order
    result_df = pd.DataFrame(index=df.index)

    # Process each token
    for token in tokens:
        token_cols = []  # Track columns for this token

        # Base metrics
        for metric in base_metrics:
            col_name = f"{token}_{metric}"
            if col_name in df.columns:
                result_df[col_name] = df[col_name]
                token_cols.append(col_name)

        # Price indicators
        if f"{token}_price" in df.columns:
            price = df[f"{token}_price"]

            # Core price indicators
            indicators = {
                "price_scaled": expanding_min_max_scale(price),
                # "price_roc_14d": calculate_roc(price, 14),
                # "price_roc_30d": calculate_roc(price, 30),
                # "price_accel": calculate_acceleration(price, 14, 30),
            }

            # Add indicators to result
            for suffix, values in indicators.items():
                col_name = f"{token}_{suffix}"
                result_df[col_name] = values
                token_cols.append(col_name)

        # Volume indicators
        if f"{token}_total_volume" in df.columns:
            volume = df[f"{token}_total_volume"]

            # Core volume indicators
            indicators = {
                "volume_scaled": expanding_min_max_scale(volume, clip_value=0.995, clip_on_min=False),
                # "volume_roc_14d": calculate_roc(volume, 14),
                # "price_roc_30d": calculate_roc(volume, 30),
                # "price_accel": calculate_acceleration(price, 14, 30),
            }

            # Add indicators to result
            for suffix, values in indicators.items():
                col_name = f"{token}_{suffix}"
                result_df[col_name] = values
                token_cols.append(col_name)

        # Reorder columns for this token
        result_df = result_df.reindex(
            columns=[col for col in result_df.columns if col not in token_cols] + sorted(token_cols)
        )

    # Market-wide indicators
    # if len(tokens) > 1:
    # market_cols = []

    # Total market volume
    # total_volume = sum(
    #     df[f"{token}_total_volume"]
    #     for token in tokens
    #     if f"{token}_total_volume" in df.columns
    # )

    for col in market_cols:
        result_df[col] = df[col]
        if col == "alt_dominance":
            result_df[f"{col}_scaled"] = expanding_min_max_scale(df[col], clip_value=0.01, clip_on_min=True)
        else:
            result_df[f"{col}_scaled"] = expanding_min_max_scale(df[col])

    result_df["alt_dominance_roc_30d"] = calculate_roc(df["alt_dominance"], 30)

    # result_df["market_total_volume"] = total_volume
    # result_df["market_total_volume_scaled"] = expanding_min_max_scale(total_volume)
    # market_cols.extend(["market_total_volume", "market_total_volume_scaled"])

    # Volume dominance
    # for token in tokens:
    #     if f"{token}_total_volume" in df.columns:
    #         col_name = f"{token}_volume_dominance"
    #         result_df[col_name] = (
    #             df[f"{token}_total_volume"] / total_volume
    #         ).fillna(0)
    #         market_cols.append(col_name)

    # # Move market columns to the end
    # non_market_cols = [col for col in result_df.columns if col not in market_cols]
    # result_df = result_df.reindex(columns=non_market_cols + sorted(market_cols))

    cols_order = []
    for token in tokens:
        col_suffixes = [
            f"{token}_{suffix}"
            for suffix in [
                "price",
                "price_scaled",
                # "price_roc_14d",
                # "price_roc_30d",
                # "price_accel",
                "total_volume",
                "volume_scaled",
                # "volume_roc_14d",
            ]
        ]
        cols_order.extend(col_suffixes)

    for col in market_cols:
        cols_order.extend([col, f"{col}_scaled"])

    cols_order.insert(cols_order.index("alt_dominance_scaled") + 1, "alt_dominance_roc_30d")

    result_df = result_df[cols_order]

    # Clean up any remaining infinities or NaNs
    # result_df = result_df.replace([np.inf, -np.inf], np.nan)
    # result_df = result_df.fillna(method="ffill").fillna(0)

    return result_df


new_master_df = add_token_indicators(master_df)
new_master_df


In [None]:
# %%
# Create boolean mask for rows where empyreal price is null/nan/none
# Create boolean mask where empyreal price is valid (1) or NA (0)
def plot_data_availability(df, col_name):
    """
    Plot data availability for a given column in the DataFrame.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the data
    col_name (str): The column name to check for data availability
    """
    data_mask = (~df[col_name].isna()).astype(int)

    # Plot the mask values with datetime index
    fig = px.line(
        x=df.index,
        y=data_mask,
        title=f"{col_name} Data Availability (1=Valid, 0=NA)",
    )
    fig.update_layout(
        xaxis_title="Time",
        yaxis_title="Data Available",
        yaxis=dict(tickmode="linear", tick0=0, dtick=1),
    )
    fig.show()


# Example usage:
token_names = list(set([col.split("_")[0] for col in df.columns]))

for token in token_names:
    plot_data_availability(df, f"{token}_price")

In [None]:
# %%
# -- Align and merge dataframes --


def align_and_merge_dataframes(
    primary_df: pd.DataFrame,
    secondary_dfs: list[tuple[pd.DataFrame, list[str], dict[str, str]]],
    ffill_limit: int = None,
) -> pd.DataFrame:
    """
    Align and merge multiple dataframes with a primary dataframe based on datetime index.

    Parameters:
    -----------
    primary_df : pd.DataFrame
            The primary dataframe with datetime index to which others will be aligned
    secondary_dfs : list of tuples
            List of (dataframe, columns_to_include, rename_dict) tuples where:
            - dataframe: DataFrame to be merged
            - columns_to_include: List of column names to include
            - rename_dict: Dictionary mapping original column names to new names
    ffill_limit : int, optional
            Maximum number of consecutive NaN values to forward fill

    Returns:
    --------
    pd.DataFrame
            Merged dataframe with aligned datetime index
    """
    # Ensure primary df has datetime index and is sorted
    if not isinstance(primary_df.index, pd.DatetimeIndex):
        raise ValueError("Primary DataFrame index must be DatetimeIndex")

    result_df = primary_df.copy().sort_index()

    # Process each secondary dataframe
    for df, cols, rename_dict in secondary_dfs:
        # Validate columns exist
        if missing_cols := set(cols) - set(df.columns):
            raise ValueError(f"Columns {missing_cols} not found in secondary DataFrame")

        # Process secondary dataframe
        processed_df = df[cols].rename(columns=rename_dict).sort_index().reindex(result_df.index)

        # Forward fill if specified
        # if ffill_limit is not None:
        #     processed_df = processed_df.ffill(limit=ffill_limit)

        # Merge with result
        result_df = pd.concat([result_df, processed_df], axis=1)
        result_df[[col for col in rename_dict.values()]] = result_df[[col for col in rename_dict.values()]].ffill(
            limit=ffill_limit
        )

    return result_df


df = pd.read_csv("token_data.csv", index_col=0, parse_dates=True)
dp(df)
master_df = align_and_merge_dataframes(
    df,
    [
        # (fg_df, ["score"], {"score": "fgindex"}),
        (alt_fgindex, ["value"], {"value": "alt_fgindex"}),
        (
            mcap_df,
            ["total_market_cap", "total_volume", "alt_market_cap", "alt_dominance"],
            {
                "total_market_cap": "total_market_cap",
                "total_volume": "total_volume",
                "alt_market_cap": "alt_market_cap",
                "alt_dominance": "alt_dominance",
            },
        ),
    ],
)

master_df


In [None]:
# %%

def smooth_and_detect_peaks_multi(
    df: pd.DataFrame,
    window_daily: int = 20,
    polyorder: int = 3,
    lookahead: int = 3
    ) -> pd.DataFrame:
    """
    Combined smoothing and peak detection using forward-looking Savitzky-Golay
    """
    def rolling_savgol_with_peaks(series: pd.Series, window: int) -> tuple[pd.Series, pd.Series]:
        """
        Rolling Savitzky-Golay with peak detection in single pass
        Returns (smooth_series, peaks_series)
        """
        values = series.values
        smooth_result = np.full_like(values, np.nan)
        peaks_result = np.full_like(values, 0)

        # Determine optimal lookbehind window
        time_deltas = series.index.to_series().diff()
        is_hourly = time_deltas.median() <= pd.Timedelta(hours=1)

        # For hourly data, use larger lookbehind to capture daily patterns
        # For daily data, use smaller lookbehind to maintain responsiveness
        lookbehind = window if is_hourly else window // 2
        prominence = 0.00001 if is_hourly else 0.0001

        min_points = polyorder + 2
        smooth_result[:min_points] = values[:min_points]

        for i in tqdm(range(min_points, len(values) - lookahead)):
            try:
                # Get extended window
                start_idx = max(0, i - lookbehind)
                window_data = values[start_idx:i + lookahead + 1]

                if len(window_data) > polyorder + 1:
                    x = np.arange(len(window_data))
                    coeffs = np.polyfit(x, window_data, polyorder)

                    # Get all smoothed points in window
                    smoothed_window = np.polyval(coeffs, x)

                    # Store smoothed value for current point
                    # Use the point that corresponds to current position in window
                    current_pos = i - start_idx
                    smooth_result[i] = smoothed_window[current_pos]

                    # Peak detection on local window
                    if current_pos >= 2 and current_pos < len(smoothed_window) - 2:
                        local_std = np.std(smoothed_window) / np.mean(smoothed_window)
                        adaptive_prominence = max(prominence, local_std * 0.1)
                        adaptive_prominence

                        # Check for peak
                        if (smoothed_window[current_pos] > smoothed_window[current_pos - 1] and
                            smoothed_window[current_pos] > smoothed_window[current_pos + 1] and
                            smoothed_window[current_pos] == max(smoothed_window[current_pos - 2:current_pos + 3])):
                            if abs(smoothed_window[current_pos] - smoothed_window[current_pos - 1]) > adaptive_prominence:
                                peaks_result[i] = 1

                        # Check for trough
                        elif (smoothed_window[current_pos] < smoothed_window[current_pos - 1] and
                            smoothed_window[current_pos] < smoothed_window[current_pos + 1] and
                            smoothed_window[current_pos] == min(smoothed_window[current_pos - 2:current_pos + 3])):
                            if abs(smoothed_window[current_pos] - smoothed_window[current_pos - 1]) > adaptive_prominence:
                                peaks_result[i] = -1

                else:
                    smooth_result[i] = values[i]

            except Exception:
                smooth_result[i] = values[i]

        return (pd.Series(smooth_result, index=series.index),
                pd.Series(peaks_result, index=series.index))

    def process_token(price_series: pd.Series) -> tuple[pd.Series, pd.Series]:
        """Process single token"""
        time_deltas = price_series.index.to_series().diff()
        is_hourly = time_deltas <= pd.Timedelta(hours=1)
        is_daily = ~is_hourly

        window_hourly = window_daily * 24

        # Process hourly and daily data
        if len(price_series[is_hourly]) > 0:
            smooth_hourly, peaks_hourly = rolling_savgol_with_peaks(
                price_series[is_hourly], window_hourly)
        else:
            smooth_hourly = peaks_hourly = pd.Series()

        if len(price_series[is_daily]) > 0:
            smooth_daily, peaks_daily = rolling_savgol_with_peaks(
                price_series[is_daily], window_daily)
        else:
            smooth_daily = peaks_daily = pd.Series()

        # Combine results
        smooth_combined = pd.concat([smooth_daily, smooth_hourly]).sort_index()
        peaks_combined = pd.concat([peaks_daily, peaks_hourly]).sort_index()

        return (smooth_combined[~smooth_combined.index.duplicated(keep="last")],
                peaks_combined[~peaks_combined.index.duplicated(keep="last")])

    # Main processing
    df = df.copy()[~df.index.duplicated(keep="last")]

    for price_col in [col for col in df.columns if col.endswith("_price")]:
        token = price_col.replace("_price", "")
        smooth_series, peaks_series = process_token(df[price_col])

        df[f"{token}_smooth"] = smooth_series.reindex(df.index)
        df[f"{token}_peaks"] = peaks_series.reindex(df.index)

    return df

savgol_df = smooth_and_detect_peaks_multi(new_master_df.filter(like = 'swap'))

In [None]:
# %%

# df = pd.read_csv("token_data.csv", index_col=0, parse_dates=True)


# def reindex_to_daily(df, cutoff_datetime):
#     """
#     Reindex the DataFrame to daily frequency for all dates prior to the given datetime.

#     Parameters:
#     df (pd.DataFrame): The DataFrame to reindex
#     cutoff_datetime (datetime): The datetime value to use as the cutoff

#     Returns:
#     pd.DataFrame: The reindexed DataFrame
#     """
#     # Ensure the index is in datetime format
#     #df.index = pd.to_datetime(df.index)

#     # Split the DataFrame into two parts: before and after the cutoff datetime
#     before_cutoff = df[df.index < cutoff_datetime]
#     after_cutoff = df[df.index >= cutoff_datetime]

#     # Reindex the 'before_cutoff' part to daily frequency
#     before_cutoff = before_cutoff[before_cutoff.index.time == pd.Timestamp("00:00:00").time()]

#     # Concatenate the two parts back together
#     reindexed_df = pd.concat([before_cutoff, after_cutoff]).sort_index()

#     return reindexed_df

# # Example usage
# df_ = reindex_to_daily(df, '2023-11-20 08:00:00')

# df_.to_csv("token_data.csv")

In [None]:
# %%


def plot_token_signals(df: pd.DataFrame, tokens: list[str] = None, days_back: int = 180) -> go.Figure:

    """
    Plot price, smoothed price, and peaks/troughs for multiple tokens

    Parameters:
    - df: DataFrame with token data
    - tokens: List of token names (without _price suffix)
    - days_back: Number of days to plot
    """
    if tokens == None:
        tokens = list(set(col.replace("_price", "") for col in df.columns if col.endswith("_price")))

    # Create subplots
    fig = make_subplots(
        rows=len(tokens),
        cols=1,
        subplot_titles=[t.upper() for t in tokens],
        vertical_spacing=0.05,
    )

    # Plot each token
    for idx, token in enumerate(tokens, 1):
        # Get recent data
        plot_df = df

        # Add price line
        fig.add_trace(
            go.Scatter(
                x=plot_df.index,
                y=plot_df[f"{token}_price"],
                name=f"{token} Price",
                line=dict(color="lightgray"),
                showlegend=idx == 1,
            ),
            row=idx,
            col=1,
        )

        # Add smoothed line
        fig.add_trace(
            go.Scatter(
                x=plot_df.index,
                y=plot_df[f"{token}_smooth"],
                name="Smoothed Price",
                line=dict(color="blue"),
                showlegend=idx == 1,
            ),
            row=idx,
            col=1,
        )

        # Add peaks
        peaks = plot_df[plot_df[f"{token}_peaks"] == 1]
        if len(peaks) > 0:
            fig.add_trace(
                go.Scatter(
                    x=peaks.index,
                    y=peaks[f"{token}_price"],
                    mode="markers",
                    name="Peaks",
                    marker=dict(symbol="triangle-down", size=10, color="red"),
                    showlegend=idx == 1,
                ),
                row=idx,
                col=1,
            )

        # Add troughs
        troughs = plot_df[plot_df[f"{token}_peaks"] == -1]
        if len(troughs) > 0:
            fig.add_trace(
                go.Scatter(
                    x=troughs.index,
                    y=troughs[f"{token}_price"],
                    mode="markers",
                    name="Troughs",
                    marker=dict(symbol="triangle-up", size=10, color="green"),
                    showlegend=idx == 1,
                ),
                row=idx,
                col=1,
            )

    # Update layout
    fig.update_layout(
        height=300 * len(tokens),
        title_text="Token Prices with Peak Detection",
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    )

    # Update y-axes to log scale
    fig.update_yaxes(type="log")

    return fig


plot_token_signals(savgol_df)

In [None]:
# %%

# -- Plotting the indicators --


# Code to plot a list of columns in plotly and indicating whether they are a primary or secondary y axis
def plot_columns(df, columns, secondary_y_cols=None, marker_col=None):
    """
    Plot specified columns with primary and secondary y-axes, and add markers for peaks and troughs.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the data
    columns (list): List of column names to plot
    secondary_y_cols (list): List of column names to plot on secondary y-axis
    marker_col (str): Column name for markers (+1 for peaks, -1 for troughs)
    """
    if secondary_y_cols is None:
        secondary_y_cols = []

    fig = make_subplots(specs=[[{"secondary_y": True}]])

    for col in columns:
        if col in secondary_y_cols:
            fig.add_trace(
                go.Scatter(x=df.index, y=df[col], name=col, mode="lines"),
                secondary_y=True,
            )
        else:
            fig.add_trace(
                go.Scatter(x=df.index, y=df[col], name=col, mode="lines"),
                secondary_y=False,
            )

    if marker_col and marker_col in df.columns:
        peaks = df[(df[marker_col] == 1) & (df[f"{token}_price"] != 0)]
        troughs = df[(df[marker_col] == -1) & (df[f"{token}_price"] != 0)]

        secondary_y = False
        if f"{token}_price" in secondary_y_cols:
            secondary_y = True

        fig.add_trace(
            go.Scatter(
                x=peaks.index,
                y=peaks[f"{token}_price"],
                mode="markers",
                name="Peaks",
                marker=dict(symbol="triangle-down", size=10, color="red"),
            ),
            secondary_y=secondary_y,
        )

        fig.add_trace(
            go.Scatter(
                x=troughs.index,
                y=troughs[f"{token}_price"],
                mode="markers",
                name="Troughs",
                marker=dict(symbol="triangle-up", size=10, color="green"),
            ),
            secondary_y=secondary_y,
        )

    fig.update_layout(
        title="Plot of Specified Columns",
        xaxis_title="Date",
        hovermode="x unified",
        showlegend=True,
    )

    fig.update_yaxes(title_text="Primary Y-Axis", secondary_y=False)
    fig.update_yaxes(title_text="Secondary Y-Axis", secondary_y=True)

    fig.show()


# Example usage:
# columns_to_plot = ["alt_dominance", "alt_dominance_scaled", "alt_fgindex", "alt_fgindex_scaled"]
columns_to_plot = [
    "alt_dominance",
    "alt_dominance_scaled",
    # "alt_fgindex_scaled",
    "alt_market_cap",
    # "total_market_cap_scaled",
    "alt_market_cap_scaled",
    # "alt_dominance_roc_30d",
]
# columns_to_plot = []
secondary_y_columns = ["alt_market_cap", "alt_dominance_roc_30d"]
plot_columns(new_master_df, columns_to_plot, secondary_y_columns)


In [None]:

# %%
cmc_fgindex = pd.DataFrame(
    requests.get(
        f"https://api.coinmarketcap.com/data-api/v3/fear-greed/chart?start=1356978600&end={int(datetime.now().timestamp())}"
    ).json()["data"]["dataList"]
)

cmc_fgindex["timestamp"] = pd.to_datetime(cmc_fgindex["timestamp"], unit="s")

cmc_fgindex = cmc_fgindex.set_index("timestamp").sort_index()

cmc_fgindex

# %%
from pathlib import Path


def get_fear_greed_data(save_path="fear_greed_history.csv"):
    """
    Fetch and maintain Fear & Greed Index data with proper handling of
    daily historical and hourly recent data.

    Parameters:
    save_path (str): Path to save/load the csv file

    Returns:
    pd.DataFrame: Complete Fear & Greed Index history
    """

    def fetch_raw_data():
        """Fetch raw Fear & Greed Index data"""
        response = requests.get(
            f"https://api.coinmarketcap.com/data-api/v3/fear-greed/chart?start=1356978600&end={int(datetime.now().timestamp())}"
        ).json()

        df = pd.DataFrame(response["data"]["dataList"])
        df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
        dp(df)
        return df.set_index("timestamp").sort_index()

    def find_hourly_entry(df):
        """Find the last entry which should be hourly"""
        time_diffs = df.index.to_series().diff()
        hourly_entries = time_diffs[time_diffs < pd.Timedelta(days=1)]
        return hourly_entries.index[-1] if not hourly_entries.empty else None

    def create_initial_file():
        """Create initial file with historical daily data (excluding latest hourly)"""
        df = fetch_raw_data()

        # Exclude the last hourly entry
        df = df[:-1]

        # Round to daily for historical data
        df.index = df.index.round("D")

        # Remove any duplicates after rounding
        df = df[~df.index.duplicated(keep="last")].sort_index()
        return df

    def update_data(existing_df):
        """Update with latest data, maintaining daily history and adding new days"""
        new_df = fetch_raw_data()

        try:
            # Get the last hourly entry
            hourly_entry = find_hourly_entry(new_df)

            if hourly_entry is None:
                print("No hourly data found in new data")
                return existing_df

            # Split into historical and latest
            historical_data = new_df[:hourly_entry].copy()
            latest_entry = new_df[hourly_entry:].copy()

            # Round historical to daily
            historical_data.index = historical_data.index.round("D")
            historical_data = historical_data[~historical_data.index.duplicated(keep="last")]

            # Combine existing and new historical data
            combined_historical = pd.concat([existing_df, historical_data])
            combined_historical = combined_historical[~combined_historical.index.duplicated(keep="last")]

            # Add the latest hourly entry
            updated_df = pd.concat([combined_historical, latest_entry])
            updated_df = updated_df[~updated_df.index.duplicated(keep="last")].sort_index()

            return updated_df

        except Exception as e:
            print(f"Error updating data: {e}")
            return existing_df

    file_path = Path(save_path)

    if not file_path.exists():
        # Initial file creation
        print("Creating new Fear & Greed Index database...")
        df = create_initial_file()
        df.to_csv(file_path)
        return df

    # Update existing file
    print("Updating Fear & Greed Index data...")
    existing_df = pd.read_csv(file_path, index_col=0, parse_dates=True)
    updated_df = update_data(existing_df)

    # Save if there are changes
    if not updated_df.equals(existing_df):
        print("New data found, saving updates...")
        updated_df.to_csv(file_path)
    else:
        print("No new data to update")

    return updated_df

    # Example usage:


fg_df = get_fear_greed_data()
fg_df

In [None]:
def first_two_consecutive_nonnan(column):
    # Shift the column by 1
    shifted_column = column.shift(-1)

    same_value = column == shifted_column
    same_value = same_value[same_value == True]

    return same_value

    # # Find indices where both current and next values are not NaN
    # non_nan_indices = column.notna() & shifted_column.notna()

    # # Get the first index where the condition is True
    # first_valid_index = non_nan_indices.idxmax() if non_nan_indices.any() else None

    # return first_valid_index


for token in token_data_df.columns:
    dis(token, first_two_consecutive_nonnan(token_data_df[token]))