## Segmentation Notebook

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
## To check
import sovai as sov
sov.token_auth(token="your_token_here")

#### Processed Dataset

In [392]:
df_accounting = sov.data("accounting/weekly").select_stocks("mega")

In [389]:

import numpy as np
import pandas as pd
from numba import jit
from typing import Tuple
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import dash
from dash import dcc, html
from dash.dependencies import Input, Output



class ImprovedCusumDetector:
    def __init__(self, window_size_ratio: float = 0.05,
                 threshold_factor: float = 2.5,
                 drift_factor: float = 0.05,
                 min_distance: int = 30,
                 sideways_threshold: float = 0.03,
                 warm_up_period_ratio: float = 0.1):
        self.window_size_ratio = window_size_ratio
        self.threshold_factor = threshold_factor
        self.drift_factor = drift_factor
        self.min_distance = min_distance
        self.sideways_threshold = sideways_threshold
        self.warm_up_period_ratio = warm_up_period_ratio

    def detect_changepoints(self, signal: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, int]:
        n = len(signal)
        window_size = max(int(n * self.window_size_ratio), 8)
        warm_up_period = int(n * self.warm_up_period_ratio)
        
        ewma = self._calculate_ewma(signal, window_size)
        z_scores = self._calculate_rolling_z_scores(signal, ewma, window_size)
        scores, changepoints, trend_codes = self._calculate_two_sided_cusum(
            signal, z_scores, self.drift_factor, self.threshold_factor, self.min_distance, self.sideways_threshold, warm_up_period)
        
        trends = np.where(trend_codes == 1, 'Increasing', 
                 np.where(trend_codes == -1, 'Decreasing', 'Sideways'))
        
        return changepoints, trends, scores, window_size

    @staticmethod
    @jit(nopython=True)
    def _calculate_ewma(signal, window_size):
        alpha = 2 / (window_size + 1)
        ewma = np.zeros_like(signal)
        ewma[0] = signal[0]
        for i in range(1, len(signal)):
            ewma[i] = alpha * signal[i] + (1 - alpha) * ewma[i-1]
        return ewma

    @staticmethod
    @jit(nopython=True)
    def _calculate_rolling_z_scores(signal, ewma, window_size):
        rolling_var = np.zeros_like(signal)
        for i in range(window_size, len(signal)):
            rolling_var[i] = np.var(signal[i-window_size+1:i+1])
        rolling_std = np.sqrt(rolling_var)
        z_scores = (signal - ewma) / (rolling_std + 1e-8)
        return z_scores

    @staticmethod
    @jit(nopython=True)
    def _calculate_two_sided_cusum(signal, z, drift, threshold_factor, min_distance, sideways_threshold, warm_up_period):
        cs_upper = np.zeros_like(z)
        cs_lower = np.zeros_like(z)
        changepoints = []
        trend_codes = []
        last_cp = -min_distance

        for i in range(1, len(z)):
            if i < warm_up_period:
                continue

            cs_upper[i] = max(0, cs_upper[i-1] + z[i] - drift)
            cs_lower[i] = min(0, cs_lower[i-1] + z[i] + drift)
            
            if i - last_cp >= min_distance:
                if cs_upper[i] > threshold_factor * np.sqrt(i):
                    changepoints.append(i)
                    trend_codes.append(1)
                    cs_upper[i] = 0
                    cs_lower[i] = 0
                    last_cp = i
                elif cs_lower[i] < -threshold_factor * np.sqrt(i):
                    changepoints.append(i)
                    trend_codes.append(-1)
                    cs_upper[i] = 0
                    cs_lower[i] = 0
                    last_cp = i
                elif i > 0 and abs((signal[i] - signal[last_cp]) / signal[last_cp]) < sideways_threshold:
                    changepoints.append(i)
                    trend_codes.append(0)
                    cs_upper[i] = 0
                    cs_lower[i] = 0
                    last_cp = i

        return np.maximum(cs_upper, -cs_lower), np.array(changepoints), np.array(trend_codes)

def plot_cusum_results(df_signal, signal_array, changepoints, trends, scores, ticker, feature):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1, 
                        subplot_titles=(f"{feature} Over Time for {ticker}", "CUSUM Score"))

    fig.add_trace(go.Scatter(
        x=df_signal.index,
        y=signal_array,
        mode='lines',
        line=dict(color='gold', width=2),
        name=feature
    ), row=1, col=1)

    fig.add_trace(go.Scatter(
        x=df_signal.index,
        y=scores,
        mode='lines',
        line=dict(color='rgba(30, 144, 255, 0.7)', width=1.5),
        name='CUSUM Score'
    ), row=2, col=1)

    colors = {'Increasing': 'lime', 'Decreasing': 'red', 'Sideways': 'yellow'}
    symbols = {'Increasing': 'triangle-up', 'Decreasing': 'triangle-down', 'Sideways': 'circle'}

    for trend in set(trends):
        mask = trends == trend
        fig.add_trace(go.Scatter(
            x=df_signal.index[changepoints[mask]],
            y=signal_array[changepoints[mask]],
            mode='markers',
            marker=dict(color=colors[trend], size=10, symbol=symbols[trend]),
            name=f'{trend}'
        ), row=1, col=1)

    fig.update_layout(
        title=f"{feature} Change Point Detection for {ticker}",
        plot_bgcolor='rgba(0,0,0,0.95)',
        paper_bgcolor='rgba(0,0,0,0.95)',
        font=dict(color='white', size=12),
        autosize=True,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.06,
            xanchor="right",
            x=1,
            bgcolor='rgba(0,0,0,0.5)',
            bordercolor='rgba(255,255,255,0.5)',
        ),
        showlegend=True,
    )

    fig.update_xaxes(title_text="Date", row=2, col=1, gridcolor='rgba(255,255,255,0.2)')
    fig.update_yaxes(title_text=feature, type='log', row=1, col=1, gridcolor='rgba(255,255,255,0.2)')
    fig.update_yaxes(title_text="CUSUM Score", row=2, col=1, gridcolor='rgba(255,255,255,0.2)')

    for i, (cp, trend) in enumerate(zip(changepoints, trends)):
        fig.add_annotation(
            x=df_signal.index[cp],
            y=signal_array[cp],
            text=f"{trend[0]} {i+1}",
            showarrow=False,
            yshift=10 if trend == 'Increasing' else -10,
            font=dict(color=colors[trend]),
            row=1, col=1
        )

    return fig

# Assuming df_accounting is already loaded
app = dash.Dash(__name__)

app.layout = html.Div([
    
    
    html.Div([
        html.H3("Financial Metric Change Point Detection", style={'textAlign': 'center', 'marginBottom': '20px'}),
        html.Div([
            html.Label("Select Ticker:"),
            dcc.Dropdown(
                id='ticker-dropdown',
                options=[{'label': ticker, 'value': ticker} for ticker in df_accounting.index.get_level_values('ticker').unique()],
                value='AAPL'
            ),
        ], style={'width': '48%', 'display': 'inline-block'}),
        
        html.Div([
            html.Label("Select Feature:"),
            dcc.Dropdown(
                id='feature-dropdown',
                options=[{'label': col, 'value': col} for col in df_accounting.columns],
                value='total_revenue'
            ),
        ], style={'width': '48%', 'float': 'right', 'display': 'inline-block'})
    ], style={'padding': '20px 0px'}),
    
    dcc.Graph(id='cusum-plot', style={'height': '80vh'})  # Use viewport height
], style={'padding': '0 20px', 'margin': '0 auto'})

@app.callback(
    Output('cusum-plot', 'figure'),
    [Input('ticker-dropdown', 'value'),
     Input('feature-dropdown', 'value')]
)
def update_graph(selected_ticker, selected_feature):
    df_signal = df_accounting.query(f"ticker == '{selected_ticker}'").reset_index().set_index("date")[selected_feature]
    signal_array = df_signal.values

    detector = ImprovedCusumDetector()
    changepoints, trends, scores, window_size = detector.detect_changepoints(signal_array)

    return plot_cusum_results(df_signal, signal_array, changepoints, trends, scores, selected_ticker, selected_feature)

app.run_server(debug=False)

def perform_cusum_analysis(df_signal, ticker, feature):
    detector = ImprovedCusumDetector()
    signal_array = df_signal.values
    changepoints, trends, scores, window_size = detector.detect_changepoints(signal_array)

    # Create main analysis DataFrame
    analysis_df = pd.DataFrame({
        'Date': df_signal.index[changepoints],
        'Trend': trends,
        'Signal_Value': signal_array[changepoints],
        'CUSUM_Score': scores[changepoints - 1]  # Take the score from just before the change point
    })

    # Calculate duration between change points
    analysis_df['Duration'] = analysis_df['Date'].diff().shift(-1).fillna(pd.Timedelta(days=0))
    analysis_df['Duration_Days'] = analysis_df['Duration'].dt.days

    # Calculate percentage change between change points
    analysis_df['Percent_Change'] = (analysis_df['Signal_Value'].pct_change().shift(-1) * 100).fillna(0)

    # Overall statistics
    overall_stats = pd.DataFrame({
        'Statistic': [
            'Ticker', 'Feature', 'Total_Change_Points', 'Average_Duration_Days',
            'Average_Percent_Change', 'Increasing_Trends', 'Decreasing_Trends',
            'Sideways_Trends', 'Max_CUSUM_Score', 'Min_CUSUM_Score', 'Window_Size'
        ],
        'Value': [
            ticker, feature, len(changepoints), analysis_df['Duration_Days'].mean(),
            analysis_df['Percent_Change'].mean(), sum(trends == 'Increasing'),
            sum(trends == 'Decreasing'), sum(trends == 'Sideways'),
            scores.max(), scores[scores > 0].min() if any(scores > 0) else 0, window_size
        ]
    })

    analysis_df.stats = overall_stats

    return analysis_df
    
# Example usage
selected_ticker = 'AAPL'
selected_feature = 'total_revenue'

df_signal = df_accounting.query(f"ticker == '{selected_ticker}'").reset_index().set_index("date")[selected_feature]

analysis_df  = perform_cusum_analysis(df_signal, selected_ticker, selected_feature)




Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



In [390]:
analysis_df.stats

Unnamed: 0,Statistic,Value
0,Ticker,AAPL
1,Feature,total_revenue
2,Total_Change_Points,25
3,Average_Duration_Days,319.200
4,Average_Percent_Change,18.194
5,Increasing_Trends,10
6,Decreasing_Trends,4
7,Sideways_Trends,11
8,Max_CUSUM_Score,97.349
9,Min_CUSUM_Score,0.010


In [None]:
df_change = df_accounting.segment(method="change_point", type="dataframe", ticker="AAPL", feature="total_revenue")
stats_change = df_accounting.segment(method="change_point", type="stats", ticker="AAPL", feature="total_revenue")
df_accounting.segment(method="change_point", type="plot", ticker="AAPL", feature="total_revenue")

In [None]:
change_point

In [358]:
overall_stats

Unnamed: 0,Statistic,Value
0,Ticker,AAPL
1,Feature,total_revenue
2,Total_Change_Points,25
3,Average_Duration_Days,319.200
4,Average_Percent_Change,18.194
5,Increasing_Trends,10
6,Decreasing_Trends,4
7,Sideways_Trends,11
8,Max_CUSUM_Score,97.349
9,Min_CUSUM_Score,0.010


In [357]:
analysis_df

Unnamed: 0,Date,Trend,Signal_Value,CUSUM_Score,Duration,Duration_Days,Percent_Change
0,1997-09-05,Decreasing,1713937536.0,33.601,231 days,231,-15.696
1,1998-04-24,Decreasing,1444923136.0,36.299,245 days,245,7.687
2,1998-12-25,Decreasing,1556000000.0,39.855,210 days,210,-0.171
3,1999-07-23,Sideways,1553333376.0,7.621,301 days,301,24.571
4,2000-05-19,Increasing,1935000064.0,44.951,952 days,952,-25.239
5,2002-12-27,Decreasing,1446625024.0,53.811,329 days,329,15.94
6,2003-11-21,Increasing,1677222272.0,55.883,210 days,210,16.709
7,2004-06-18,Increasing,1957461504.0,57.142,238 days,238,77.321
8,2005-02-11,Increasing,3471000064.0,59.76,210 days,210,2.751
9,2005-09-09,Sideways,3566470656.0,42.133,385 days,385,30.5


In [336]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import ruptures as rpt
import matplotlib.colors as mcolors
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# Assuming df_accounting is already loaded

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H3("Financial Metric Regime Change Detection", style={
    'textAlign': 'center', 
    'marginBottom': '20px', 
    'color': '#FFFFFF',  # White text color for visibility on dark backgrounds
    'backgroundColor': '#333333',  # Dark background color
}),
    
    html.Div([
        html.Div([
            html.Label("Select Ticker:"),
            dcc.Dropdown(
                id='ticker-dropdown',
                options=[{'label': ticker, 'value': ticker} for ticker in df_accounting.index.get_level_values('ticker').unique()],
                value='TSLA'
            ),
        ], style={'width': '48%', 'display': 'inline-block'}),
        
        html.Div([
            html.Label("Select Feature:"),
            dcc.Dropdown(
                id='feature-dropdown',
                options=[{'label': col, 'value': col} for col in df_accounting.columns],
                value='total_revenue'
            ),
        ], style={'width': '48%', 'float': 'right', 'display': 'inline-block'})
    ], style={'padding': '20px 0px'}),
    
    dcc.Graph(id='regime-change-plot', style={'height': '80vh'})
], style={'padding': '0 20px', 'maxWidth': '1200px', 'margin': '0 auto'})

@app.callback(
    Output('regime-change-plot', 'figure'),
    [Input('ticker-dropdown', 'value'),
     Input('feature-dropdown', 'value')]
)
def update_graph(selected_ticker, selected_feature):
    df_signal = df_accounting.query(f"ticker == '{selected_ticker}'").reset_index().set_index("date")[selected_feature]
    signal_array = df_signal.values

    # Detection using ruptures
    algo = rpt.Pelt(model="rbf").fit(signal_array)
    breakpoints = algo.predict(pen=10)

    # Ensure breakpoints are within the range of df_signal's index
    dates = df_signal.index.tolist()
    max_index = len(dates) - 1  # Maximum valid index
    valid_breakpoints = [0] + [bp for bp in breakpoints if bp <= max_index]
    # Add the end of the series as the last breakpoint if not already included
    if valid_breakpoints[-1] != max_index:
        valid_breakpoints.append(max_index)

    # Create a Plotly line plot
    fig = go.Figure()

    # Plot the entire time series
    fig.add_trace(go.Scatter(x=df_signal.index, y=df_signal, mode='lines', name=selected_feature, line=dict(color='gray')))

    # Define color maps for regimes
    cmap_up = mcolors.LinearSegmentedColormap.from_list("", ["lightgreen", "green"])
    cmap_down = mcolors.LinearSegmentedColormap.from_list("", ["lightcoral", "red"])
    cmap_sideways = mcolors.LinearSegmentedColormap.from_list("", ["lightyellow", "yellow"])

    def get_shaded_color(change_percentage, duration, cmap):
        avg_daily_change = change_percentage / duration
        norm_value = np.clip(avg_daily_change * duration, 0, 1)  # Account for duration in the shading
        return mcolors.to_hex(cmap(norm_value))

    # Threshold for determining sideways trend
    sideways_threshold = 0.01

    # Add vertical lines and annotations for regime changes
    for i in range(1, len(valid_breakpoints)):
        start_idx = valid_breakpoints[i - 1]
        end_idx = valid_breakpoints[i]
        segment = df_signal.iloc[start_idx:end_idx]
        
        # Determine trend direction and calculate intensity
        start_value = segment.iloc[0]
        end_value = segment.iloc[-1]
        change_percentage = abs((end_value - start_value) / start_value)
        duration = (segment.index[-1] - segment.index[0]).days
        
        if change_percentage < sideways_threshold:
            current_color = get_shaded_color(change_percentage, duration, cmap_sideways)
        elif end_value > start_value:
            current_color = get_shaded_color(change_percentage, duration, cmap_up)
        else:
            current_color = get_shaded_color(change_percentage, duration, cmap_down)
        
        fig.add_trace(go.Scatter(x=segment.index, y=segment, mode='lines', 
                                 name=f'Segment {i}', 
                                 line=dict(color=current_color),
                                 fill='tozeroy'))
        
        # Add vertical line for regime change
        if i < len(valid_breakpoints) - 1:
            change_date = segment.index[-1]
            fig.add_shape(type="line", x0=change_date, y0=0, x1=change_date, y1=1,
                          line=dict(color="red", width=2, dash="dash"), xref='x', yref='paper')
            # Calculate duration of the regime
            fig.add_annotation(x=change_date, y=1.05, text=f'{duration} days', 
                               showarrow=False, yref='paper', align='center', font=dict(color='white'), textangle=90)

    # Calculate duration of the current regime
    current_regime_duration = (df_signal.index[-1] - df_signal.index[valid_breakpoints[-2]]).days
    current_regime_date = df_signal.index[-1]
    fig.add_annotation(x=current_regime_date + pd.Timedelta(days=100), y=1.05, text=f'Current Regime: {current_regime_duration} days', 
                       showarrow=False, yref='paper', align='center', font=dict(color='white'), textangle=90)

    # Update layout
    fig.update_layout(
        title=f'{selected_feature} with Regime Changes for {selected_ticker}',
        xaxis_title='Date',
        yaxis_title=selected_feature,
        showlegend=False,
        plot_bgcolor='rgba(0, 0, 0, 0.95)',
        paper_bgcolor='rgba(0, 0, 0, 0.95)',
        font=dict(color='white', size=12),
        autosize=True,
        margin=dict(l=50, r=50, t=100, b=50)
    )

    return fig

if __name__ == '__main__':
    app.run_server(debug=True)

In [360]:
import numpy as np
import pandas as pd
import ruptures as rpt
import matplotlib.colors as mcolors

def perform_regime_change_analysis(df_signal, ticker, feature):
    signal_array = df_signal.values

    # Detection using ruptures
    algo = rpt.Pelt(model="rbf").fit(signal_array)
    breakpoints = algo.predict(pen=10)

    # Ensure breakpoints are within the range of df_signal's index
    dates = df_signal.index.tolist()
    max_index = len(dates) - 1  # Maximum valid index
    valid_breakpoints = [0] + [bp for bp in breakpoints if bp <= max_index]
    if valid_breakpoints[-1] != max_index:
        valid_breakpoints.append(max_index)

    # Define color maps for regimes
    cmap_up = mcolors.LinearSegmentedColormap.from_list("", ["lightgreen", "green"])
    cmap_down = mcolors.LinearSegmentedColormap.from_list("", ["lightcoral", "red"])
    cmap_sideways = mcolors.LinearSegmentedColormap.from_list("", ["lightyellow", "yellow"])

    def get_shaded_color(change_percentage, duration, cmap):
        avg_daily_change = change_percentage / duration
        norm_value = np.clip(avg_daily_change * duration, 0, 1)
        return mcolors.to_hex(cmap(norm_value))

    sideways_threshold = 0.01

    # Create analysis DataFrame
    analysis_data = []
    for i in range(1, len(valid_breakpoints)):
        start_idx = valid_breakpoints[i - 1]
        end_idx = valid_breakpoints[i]
        segment = df_signal.iloc[start_idx:end_idx]
        
        start_value = segment.iloc[0]
        end_value = segment.iloc[-1]
        change_percentage = (end_value - start_value) / start_value
        duration = (segment.index[-1] - segment.index[0]).days
        
        if abs(change_percentage) < sideways_threshold:
            trend = "Sideways"
            color = get_shaded_color(abs(change_percentage), duration, cmap_sideways)
        elif change_percentage > 0:
            trend = "Increasing"
            color = get_shaded_color(change_percentage, duration, cmap_up)
        else:
            trend = "Decreasing"
            color = get_shaded_color(abs(change_percentage), duration, cmap_down)
        
        analysis_data.append({
            'Start_Date': segment.index[0],
            'End_Date': segment.index[-1],
            'Duration_Days': duration,
            'Start_Value': start_value,
            'End_Value': end_value,
            'Percent_Change': change_percentage * 100,
            'Trend': trend,
        })

    analysis_df = pd.DataFrame(analysis_data)

    # Overall statistics
    overall_stats = pd.DataFrame({
        'Statistic': [
            'Ticker', 'Feature', 'Total_Regimes', 'Average_Duration_Days',
            'Average_Percent_Change', 'Increasing_Trends', 'Decreasing_Trends',
            'Sideways_Trends'
        ],
        'Value': [
            ticker, feature, len(analysis_df), analysis_df['Duration_Days'].mean(),
            analysis_df['Percent_Change'].mean(),
            sum(analysis_df['Trend'] == 'Increasing'),
            sum(analysis_df['Trend'] == 'Decreasing'),
            sum(analysis_df['Trend'] == 'Sideways')
        ]
    })

    return analysis_df, overall_stats

# Example usage
selected_ticker = 'AAPL'
selected_feature = 'total_revenue'
df_signal = df_accounting.query(f"ticker == '{selected_ticker}'").reset_index().set_index("date")[selected_feature]
analysis_df, overall_stats = perform_regime_change_analysis(df_signal, selected_ticker, selected_feature)

# Display results
print("Regime Change Analysis:")
print(analysis_df)
print("\nOverall Statistics:")
print(overall_stats)

Regime Change Analysis:
  Start_Date   End_Date  Duration_Days     Start_Value       End_Value  \
0 1994-01-28 2009-12-18           5803  2468854016.000 13000077312.000   
1 2009-12-25 2011-11-18            693 13447230464.000 32438384640.000   
2 2011-11-25 2014-11-07           1078 33827846144.000 44621152256.000   
3 2014-11-14 2020-11-20           2198 47119306752.000 75484381184.000   
4 2020-11-27 2024-07-12           1323 79079849984.000 90752999424.000   

   Percent_Change       Trend  
0         426.563  Increasing  
1         141.227  Increasing  
2          31.907  Increasing  
3          60.198  Increasing  
4          14.761  Increasing  

Overall Statistics:
                Statistic          Value
0                  Ticker           AAPL
1                 Feature  total_revenue
2           Total_Regimes              5
3   Average_Duration_Days       2219.000
4  Average_Percent_Change        134.931
5       Increasing_Trends              5
6       Decreasing_Trends      

In [338]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import ruptures as rpt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# Assuming df_accounting is already loaded

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H3("Rolling PCA Regime (All Features)", style={'textAlign': 'center', 'marginBottom': '20px'}),
    
    html.Div([
        html.Label("Select Ticker:"),
        dcc.Dropdown(
            id='ticker-dropdown',
            options=[{'label': ticker, 'value': ticker} for ticker in df_accounting.index.get_level_values('ticker').unique()],
            value='MCD'
        ),
    ], style={'width': '30%', 'margin': '0 auto', 'padding': '20px 0px'}),
    
    dcc.Graph(id='pca-regime-plot', style={'height': '80vh'})
], style={'padding': '0 20px', 'maxWidth': '1200px', 'margin': '0 auto'})

@app.callback(
    Output('pca-regime-plot', 'figure'),
    [Input('ticker-dropdown', 'value')]
)
def update_graph(selected_ticker):
    df_signal = df_accounting.query(f"ticker == '{selected_ticker}'").reset_index().set_index("date").drop(columns=["ticker"])
    signal_array = df_signal.values

    # Detection using ruptures
    algo = rpt.Pelt(model="rbf").fit(signal_array)
    breakpoints = algo.predict(pen=10)

    # Scale the data
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df_signal)

    # Apply rolling PCA
    window_size = 52  # Example: rolling window of 52 weeks (1 year)
    pca_values = []
    for i in range(len(df_scaled) - window_size + 1):
        window_data = df_scaled[i:i + window_size]
        pca = PCA(n_components=1)
        pca_transformed = pca.fit_transform(window_data)
        pca_values.append(pca_transformed[-1])  # Append the last PCA value of the window

    # Create a DataFrame for the PCA values
    pca_series = pd.Series(np.concatenate(pca_values), index=df_signal.index[window_size - 1:])

    # Ensure breakpoints are within the range of pca_series's index
    dates = pca_series.index.tolist()
    max_index = len(dates) - 1  # Maximum valid index
    valid_breakpoints = [0] + [bp for bp in breakpoints if bp <= max_index]
    # Add the end of the series as the last breakpoint if not already included
    if valid_breakpoints[-1] != max_index:
        valid_breakpoints.append(max_index)

    # Create a Plotly line plot
    fig = go.Figure()

    # Plot the entire time series
    fig.add_trace(go.Scatter(x=pca_series.index, y=pca_series, mode='lines', name='PCA Transformed Series', line=dict(color='gray')))

    # Define colors for regimes
    color_1 = 'blue'
    color_2 = 'orange'
    current_color = color_1

    # Add vertical lines and annotations for regime changes
    for i in range(1, len(valid_breakpoints)):
        start_idx = valid_breakpoints[i - 1]
        end_idx = valid_breakpoints[i]
        segment = pca_series.iloc[start_idx:end_idx]
        
        # Alternate colors
        current_color = color_1 if current_color == color_2 else color_2
        
        fig.add_trace(go.Scatter(x=segment.index, y=segment, mode='lines', 
                                 name=f'Segment {i}', 
                                 line=dict(color=current_color),
                                 fill='tozeroy'))
        
        # Add vertical line for regime change
        if i < len(valid_breakpoints) - 1:
            change_date = segment.index[-1]
            fig.add_shape(type="line", x0=change_date, y0=0, x1=change_date, y1=1,
                          line=dict(color="red", width=2, dash="dash"), xref='x', yref='paper')
            
            # Calculate duration of the regime
            duration = (segment.index[-1] - segment.index[0]).days
            fig.add_annotation(x=change_date, y=1.05, text=f'{duration} days', 
                               showarrow=False, yref='paper', align='center', font=dict(color='white'), textangle=90)

    # Calculate duration of the current regime
    current_regime_duration = (pca_series.index[-1] - pca_series.index[valid_breakpoints[-2]]).days
    current_regime_date = pca_series.index[-1]
    fig.add_annotation(x=current_regime_date + pd.Timedelta(days=100), y=1.05, text=f'Current Regime: {current_regime_duration} days', 
                       showarrow=False, yref='paper', align='center', font=dict(color='white'), textangle=90)

    # Update layout
    fig.update_layout(
        title=f'Rolling PCA Transformed Dataset with Regime Changes for {selected_ticker}',
        xaxis_title='Date',
        yaxis_title='PCA Transformed Dataset',
        showlegend=False,
        plot_bgcolor='rgba(0, 0, 0, 0.95)',
        paper_bgcolor='rgba(0, 0, 0, 0.95)',
        font=dict(color='white', size=12),
        autosize=True,
        margin=dict(l=50, r=50, t=100, b=50)
    )

    return fig

app.run_server(debug=True)

In [362]:
import numpy as np
import pandas as pd
import ruptures as rpt
import matplotlib.colors as mcolors
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

def perform_regime_change_analysis(df_accounting, ticker, feature, window_size=52):
    df_signal = df_accounting.query(f"ticker == '{ticker}'").reset_index().set_index("date")
    signal_array = df_signal[feature].values

    # Apply PCA to all features
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df_signal.drop(columns=["ticker"]))

    # Apply rolling PCA
    pca_values = []
    for i in range(len(df_scaled) - window_size + 1):
        window_data = df_scaled[i:i + window_size]
        pca = PCA(n_components=1)
        pca_transformed = pca.fit_transform(window_data)
        pca_values.append(pca_transformed[-1][0])  # Append the last PCA value of the window

    # Create a Series for the PCA values
    pca_series = pd.Series(pca_values, index=df_signal.index[window_size - 1:])

    # Detection using ruptures
    algo = rpt.Pelt(model="rbf").fit(signal_array)
    breakpoints = algo.predict(pen=10)

    # Ensure breakpoints are within the range of df_signal's index
    dates = df_signal.index.tolist()
    max_index = len(dates) - 1  # Maximum valid index
    valid_breakpoints = [0] + [bp for bp in breakpoints if bp <= max_index]
    if valid_breakpoints[-1] != max_index:
        valid_breakpoints.append(max_index)

    # Define color maps for regimes
    cmap_up = mcolors.LinearSegmentedColormap.from_list("", ["lightgreen", "green"])
    cmap_down = mcolors.LinearSegmentedColormap.from_list("", ["lightcoral", "red"])
    cmap_sideways = mcolors.LinearSegmentedColormap.from_list("", ["lightyellow", "yellow"])

    def get_shaded_color(change_percentage, duration, cmap):
        avg_daily_change = change_percentage / duration
        norm_value = np.clip(avg_daily_change * duration, 0, 1)
        return mcolors.to_hex(cmap(norm_value))

    sideways_threshold = 0.01

    # Create analysis DataFrame
    analysis_data = []
    for i in range(1, len(valid_breakpoints)):
        start_idx = valid_breakpoints[i - 1]
        end_idx = valid_breakpoints[i]
        segment = df_signal[feature].iloc[start_idx:end_idx]
        
        start_value = segment.iloc[0]
        end_value = segment.iloc[-1]
        change_percentage = (end_value - start_value) / start_value
        duration = (segment.index[-1] - segment.index[0]).days
        
        if abs(change_percentage) < sideways_threshold:
            trend = "Sideways"
            color = get_shaded_color(abs(change_percentage), duration, cmap_sideways)
        elif change_percentage > 0:
            trend = "Increasing"
            color = get_shaded_color(change_percentage, duration, cmap_up)
        else:
            trend = "Decreasing"
            color = get_shaded_color(abs(change_percentage), duration, cmap_down)
        
        # Get PCA values for this segment
        pca_start = pca_series.loc[segment.index[0]] if segment.index[0] in pca_series.index else np.nan
        pca_end = pca_series.loc[segment.index[-1]] if segment.index[-1] in pca_series.index else np.nan
        
        analysis_data.append({
            'Start_Date': segment.index[0],
            'End_Date': segment.index[-1],
            'Duration_Days': duration,
            'Start_Value': start_value,
            'End_Value': end_value,
            'Percent_Change': change_percentage * 100,
            'Trend': trend,
            'Color': color,
            'PCA_Start': pca_start,
            'PCA_End': pca_end,
            'PCA_Change': pca_end - pca_start if not np.isnan(pca_start) and not np.isnan(pca_end) else np.nan
        })

    analysis_df = pd.DataFrame(analysis_data)

    # Overall statistics
    overall_stats = pd.DataFrame({
        'Statistic': [
            'Ticker', 'Feature', 'Total_Regimes', 'Average_Duration_Days',
            'Average_Percent_Change', 'Increasing_Trends', 'Decreasing_Trends',
            'Sideways_Trends', 'Average_PCA_Change', 'Window_Size'
        ],
        'Value': [
            ticker, feature, len(analysis_df), analysis_df['Duration_Days'].mean(),
            analysis_df['Percent_Change'].mean(),
            sum(analysis_df['Trend'] == 'Increasing'),
            sum(analysis_df['Trend'] == 'Decreasing'),
            sum(analysis_df['Trend'] == 'Sideways'),
            analysis_df['PCA_Change'].mean(),
            window_size
        ]
    })

    return analysis_df, overall_stats 

# Example usage
selected_ticker = 'AAPL'
selected_feature = 'total_revenue'
analysis_df, overall_stats  = perform_regime_change_analysis(df_accounting, selected_ticker, selected_feature)



Regime Change Analysis:
  Start_Date   End_Date  Duration_Days     Start_Value       End_Value  \
0 1994-01-28 2009-12-18           5803  2468854016.000 13000077312.000   
1 2009-12-25 2011-11-18            693 13447230464.000 32438384640.000   
2 2011-11-25 2014-11-07           1078 33827846144.000 44621152256.000   
3 2014-11-14 2020-11-20           2198 47119306752.000 75484381184.000   
4 2020-11-27 2024-07-12           1323 79079849984.000 90752999424.000   

   Percent_Change       Trend    Color  PCA_Start  PCA_End  PCA_Change  
0         426.563  Increasing  #008000        NaN   -0.355         NaN  
1         141.227  Increasing  #008000     -0.363   -2.219      -1.856  
2          31.907  Increasing  #62cb62     -2.158    8.234      10.392  
3          60.198  Increasing  #39ac39      7.240   -0.905      -8.145  
4          14.761  Increasing  #7bde7b     -0.360   -2.424      -2.064  

Overall Statistics:
                Statistic          Value
0                  Ticker      

In [364]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats
from statsforecast.models import MSTL
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# Assuming df_accounting is already loaded

def categorize_trend(segment, window_size=12, increase_threshold=0.05, decrease_threshold=-0.05):
    if len(segment) < window_size:
        return 'Insufficient Data'
    recent_segment = segment[-window_size:]
    slope, _, _, _, _ = stats.linregress(range(len(recent_segment)), recent_segment)
    if slope > increase_threshold:
        return 'Increasing'
    elif slope < decrease_threshold:
        return 'Decreasing'
    else:
        return 'Sideways'

def dynamic_warm_up_trend_analysis(df_signal, height=300):
    n = len(df_signal)
    warm_up_period_ratio = 0.1
    warm_up_period = int(n * warm_up_period_ratio)
    window_size_ratio = 0.05
    window_size = max(int(n * window_size_ratio), 12)
    std_dev = np.std(df_signal)
    increase_threshold_ratio = 0.005
    decrease_threshold_ratio = -0.005
    increase_threshold = std_dev * increase_threshold_ratio
    decrease_threshold = std_dev * decrease_threshold_ratio

    trends = ['Insufficient Data'] * warm_up_period
    for i in range(warm_up_period, len(df_signal)):
        segment = df_signal.iloc[:i+1]
        trend = categorize_trend(segment, window_size, increase_threshold, decrease_threshold)
        trends.append(trend)

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df_signal.index, y=df_signal, mode='lines', name='Signal', line=dict(color='gray')))

    color_map = {'Increasing': 'green', 'Decreasing': 'red', 'Sideways': 'yellow'}
    current_trend = trends[warm_up_period]
    start_index = warm_up_period
    for i in range(warm_up_period + 1, len(trends)):
        if trends[i] != current_trend or i == len(trends) - 1:
            segment = df_signal.iloc[start_index:i]
            fig.add_trace(go.Scatter(x=segment.index, y=segment, mode='lines', 
                                     name=current_trend, 
                                     line=dict(color=color_map[current_trend]),
                                     fill='tozeroy'))
            current_trend = trends[i]
            start_index = i

    fig.update_layout(
        title='Reactive Time Series Trend Analysis',
        xaxis_title='Date',
        yaxis_title='Signal Value',
        showlegend=False,
        plot_bgcolor='rgba(0, 0, 0, 0.95)',
        paper_bgcolor='rgba(0, 0, 0, 0.95)',
        font=dict(color='white', size=11),
        height=height
    )

    return fig

def create_line_subplots(df, height_per_subplot=150):
    num_plots = len(df.columns)
    total_height = num_plots * height_per_subplot

    fig = make_subplots(rows=num_plots, cols=1, shared_xaxes=True, vertical_spacing=0.05)

    for i, column in enumerate(df.columns, start=1):
        fig.add_trace(go.Scatter(x=df.index, y=df[column], mode='lines', name=column), row=i, col=1)
        if i < num_plots:
            fig.add_hline(y=df[column].min(), row=i, col=1, line_dash="dash", line_color="grey")

    fig.update_layout(height=total_height, title_text="Decomposition Plots")

    return fig

app = dash.Dash(__name__)



app.layout = html.Div([
    html.H3("Financial Metric Analysis", style={'textAlign': 'center', 'marginBottom': '20px'}),
    
    html.Div([
        html.Div([
            html.Label("Select Ticker:"),
            dcc.Dropdown(
                id='ticker-dropdown',
                options=[{'label': ticker, 'value': ticker} for ticker in df_accounting.index.get_level_values('ticker').unique()],
                value='XOM'
            ),
        ], style={'width': '48%', 'display': 'inline-block'}),
        
        html.Div([
            html.Label("Select Feature:"),
            dcc.Dropdown(
                id='feature-dropdown',
                options=[{'label': col, 'value': col} for col in df_accounting.columns],
                value='total_revenue'
            ),
        ], style={'width': '48%', 'float': 'right', 'display': 'inline-block'})
    ], style={'padding': '20px 0px'}),
    
    dcc.Graph(id='combined-plot', style={'height': '1000px'})
])

@app.callback(
    Output('combined-plot', 'figure'),
    [Input('ticker-dropdown', 'value'),
     Input('feature-dropdown', 'value')]
)
def update_graph(selected_ticker, selected_feature):
    df_signal = df_accounting.query(f"ticker == '{selected_ticker}'").reset_index().set_index("date")[selected_feature]

    # Decomposition
    seasonal_periods = [13, 52]
    model = MSTL(season_length=seasonal_periods).fit(df_signal)
    decomposition = model.model_

    # Create subplots
    fig = make_subplots(rows=5, cols=1, shared_xaxes=True, vertical_spacing=0.02,
                        subplot_titles=("Observed", "Trend", "Seasonal (13)", "Seasonal (52)", "Reactive Trend Analysis"))

    # Add decomposition plots
    for i, component in enumerate(decomposition.columns, start=1):
        fig.add_trace(go.Scatter(x=decomposition.index, y=decomposition[component], mode='lines', name=component), row=i, col=1)

    # Add reactive trend analysis
    n = len(df_signal)
    warm_up_period_ratio = 0.1
    warm_up_period = int(n * warm_up_period_ratio)
    window_size_ratio = 0.05
    window_size = max(int(n * window_size_ratio), 12)
    std_dev = np.std(df_signal)
    increase_threshold_ratio = 0.005
    decrease_threshold_ratio = -0.005
    increase_threshold = std_dev * increase_threshold_ratio
    decrease_threshold = std_dev * decrease_threshold_ratio

    trends = ['Insufficient Data'] * warm_up_period
    for i in range(warm_up_period, len(df_signal)):
        segment = df_signal.iloc[:i+1]
        trend = categorize_trend(segment, window_size, increase_threshold, decrease_threshold)
        trends.append(trend)

    fig.add_trace(go.Scatter(x=df_signal.index, y=df_signal, mode='lines', name='Signal', line=dict(color='gray')), row=5, col=1)

    color_map = {'Increasing': 'green', 'Decreasing': 'red', 'Sideways': 'yellow'}
    current_trend = trends[warm_up_period]
    start_index = warm_up_period
    for i in range(warm_up_period + 1, len(trends)):
        if trends[i] != current_trend or i == len(trends) - 1:
            segment = df_signal.iloc[start_index:i]
            fig.add_trace(go.Scatter(x=segment.index, y=segment, mode='lines', 
                                     name=current_trend, 
                                     line=dict(color=color_map[current_trend]),
                                     fill='tozeroy'), row=5, col=1)
            current_trend = trends[i]
            start_index = i

    # Update layout
    fig.update_layout(
        height=1000,
        title_text=f"{selected_feature} Analysis for {selected_ticker}",
        showlegend=False,
        plot_bgcolor='rgba(0, 0, 0, 0.95)',
        paper_bgcolor='rgba(0, 0, 0, 0.95)',
        font=dict(color='white')
    )

    # Update y-axes titles
    fig.update_yaxes(title_text="Observed", row=1, col=1)
    fig.update_yaxes(title_text="Trend", row=2, col=1)
    fig.update_yaxes(title_text="Seasonal (13)", row=3, col=1)
    fig.update_yaxes(title_text="Seasonal (52)", row=4, col=1)
    fig.update_yaxes(title_text="Signal Value", row=5, col=1)

    # Update x-axis title
    fig.update_xaxes(title_text="Date", row=5, col=1)

    return fig
app.run_server(debug=True)

In [367]:
import numpy as np
import pandas as pd
from scipy import stats
from statsforecast.models import MSTL

def categorize_trend(segment, window_size=12, increase_threshold=0.05, decrease_threshold=-0.05):
    if len(segment) < window_size:
        return 'Insufficient Data'
    recent_segment = segment[-window_size:]
    slope, _, _, _, _ = stats.linregress(range(len(recent_segment)), recent_segment)
    if slope > increase_threshold:
        return 'Increasing'
    elif slope < decrease_threshold:
        return 'Decreasing'
    else:
        return 'Sideways'

def perform_decomposition_and_trend_analysis(df_accounting, ticker, feature):
    df_signal = df_accounting.query(f"ticker == '{ticker}'").reset_index().set_index("date")[feature]

    # Decomposition
    seasonal_periods = [13, 52]
    model = MSTL(season_length=seasonal_periods).fit(df_signal)
    decomposition = model.model_

    # Print column names for debugging
    print("Decomposition columns:", decomposition.columns)

    # Reactive Trend Analysis
    n = len(df_signal)
    warm_up_period_ratio = 0.1
    warm_up_period = int(n * warm_up_period_ratio)
    window_size_ratio = 0.05
    window_size = max(int(n * window_size_ratio), 12)
    std_dev = np.std(df_signal)
    increase_threshold_ratio = 0.005
    decrease_threshold_ratio = -0.005
    increase_threshold = std_dev * increase_threshold_ratio
    decrease_threshold = std_dev * decrease_threshold_ratio

    trends = ['Insufficient Data'] * warm_up_period
    for i in range(warm_up_period, len(df_signal)):
        segment = df_signal.iloc[:i+1]
        trend = categorize_trend(segment, window_size, increase_threshold, decrease_threshold)
        trends.append(trend)

    # Create analysis DataFrame
    analysis_df = pd.DataFrame({
        'Date': df_signal.index,
        'Observed': df_signal,
        'Trend': decomposition['trend']
    })

    # Add seasonal components if they exist
    for col in decomposition.columns:
        if 'season' in col.lower():
            analysis_df[f'Seasonal_{col}'] = decomposition[col]

    analysis_df['Reactive_Trend'] = trends

    # Calculate overall statistics
    trend_counts = pd.Series(trends).value_counts()
    overall_stats = pd.DataFrame({
        'Statistic': [
            'Ticker', 'Feature', 'Total_Observations', 'Average_Value',
            'Trend_Slope', 'Increasing_Trends', 'Decreasing_Trends', 'Sideways_Trends'
        ],
        'Value': [
            ticker, feature, len(df_signal), df_signal.mean(),
            stats.linregress(range(len(df_signal)), df_signal).slope,
            trend_counts.get('Increasing', 0), trend_counts.get('Decreasing', 0), trend_counts.get('Sideways', 0)
        ]
    })

    # Add seasonal amplitudes if seasonal components exist
    for col in decomposition.columns:
        if 'season' in col.lower():
            amplitude = decomposition[col].max() - decomposition[col].min()
            overall_stats = overall_stats.append({'Statistic': f'Seasonal_{col}_Amplitude', 'Value': amplitude}, ignore_index=True)

    return analysis_df, overall_stats, decomposition

# Example usage
selected_ticker = 'XOM'
selected_feature = 'total_revenue'
analysis_df, overall_stats, decomposition = perform_decomposition_and_trend_analysis(df_accounting, selected_ticker, selected_feature)

# Display results
print("Decomposition and Trend Analysis:")
print(analysis_df.head())
print("\nOverall Statistics:")
print(overall_stats)
print("\nDecomposition Components:")
print(decomposition.head())

In [385]:
import numpy as np
import pandas as pd
from scipy import stats
from statsforecast.models import MSTL

def categorize_trend(segment, window_size=12, increase_threshold=0.05, decrease_threshold=-0.05):
    if len(segment) < window_size:
        return 'Insufficient Data'
    recent_segment = segment[-window_size:]
    slope, _, _, _, _ = stats.linregress(range(len(recent_segment)), recent_segment)
    if slope > increase_threshold:
        return 'Increasing'
    elif slope < decrease_threshold:
        return 'Decreasing'
    else:
        return 'Sideways'
        
def perform_comprehensive_analysis(df_accounting, ticker, feature):
    df_signal = df_accounting.query(f"ticker == '{ticker}'").reset_index().set_index("date")[feature]
    # Decomposition
    seasonal_periods = [13, 52]
    model = MSTL(season_length=seasonal_periods).fit(df_signal)
    decomposition = model.model_
    # Reactive Trend Analysis
    n = len(df_signal)
    warm_up_period_ratio = 0.1
    warm_up_period = int(n * warm_up_period_ratio)
    window_size_ratio = 0.05
    window_size = max(int(n * window_size_ratio), 12)
    std_dev = np.std(df_signal)
    increase_threshold_ratio = 0.005
    decrease_threshold_ratio = -0.005
    increase_threshold = std_dev * increase_threshold_ratio
    decrease_threshold = std_dev * decrease_threshold_ratio
    trends = ['Insufficient Data'] * warm_up_period
    for i in range(warm_up_period, len(df_signal)):
        segment = df_signal.iloc[:i+1]
        trend = categorize_trend(segment, window_size, increase_threshold, decrease_threshold)
        trends.append(trend)
    # Create comprehensive DataFrame
    comprehensive_df = pd.DataFrame({
        'Observed': df_signal,
        'Trend': decomposition['trend']
    })
    # Add seasonal components dynamically
    for col in decomposition.columns:
        if 'season' in col.lower():
            comprehensive_df[f'Seasonal_{col}'] = decomposition[col]
    # Add Remainder and Reactive_Trend
    comprehensive_df['Remainder'] = decomposition['remainder']
    comprehensive_df['Reactive_Trend'] = trends
    
    # Calculate overall statistics
    overall_slope, _,_,_,_ = stats.linregress(range(len(df_signal)), df_signal)
    trend_counts = pd.Series(trends).value_counts()
    # Create a dictionary for overall statistics
    overall_stats = {
        'Ticker': ticker,
        'Feature': feature,
        'Total_Observations': len(df_signal),
        'Overall_Average': df_signal.mean(),
        'Overall_Trend_Slope': overall_slope,
        'Increasing_Trends_Count': trend_counts.get('Increasing', 0),
        'Decreasing_Trends_Count': trend_counts.get('Decreasing', 0),
        'Sideways_Trends_Count': trend_counts.get('Sideways', 0),
        'Remainder_Standard_Deviation': decomposition['remainder'].std()
    }
    # Add seasonal amplitudes
    for col in decomposition.columns:
        if 'season' in col.lower():
            amplitude = decomposition[col].max() - decomposition[col].min()
            overall_stats[f'Seasonal_{col}_Amplitude'] = amplitude
    return comprehensive_df, pd.DataFrame([overall_stats])
# Example usage
selected_ticker = 'XOM'
selected_feature = 'total_revenue'
comprehensive_df, overall_stats = perform_comprehensive_analysis(df_accounting, selected_ticker, selected_feature)

# # Display results
# print("Comprehensive Analysis DataFrame:")
# print(comprehensive_df.head())
# print("\nOverall Statistics:")
# print(pd.DataFrame([overall_stats]))
# print("\nDataFrame Info:")
# print(comprehensive_df.info())

In [386]:
comprehensive_df

Unnamed: 0_level_0,Observed,Trend,Seasonal_seasonal13,Seasonal_seasonal52,Remainder,Reactive_Trend
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1997-03-14,37511999488.000,35161187557.853,257864033.820,4363269406.843,-2270321510.516,Insufficient Data
1997-03-21,37076221952.000,35109728169.609,227435314.829,4411273167.058,-2672214699.496,Insufficient Data
1997-03-28,36640444416.000,35057721132.628,109515297.920,4403117337.071,-2929909351.618,Insufficient Data
1997-04-04,36204666880.000,35005175617.074,57541718.758,3578228373.103,-2436278828.935,Insufficient Data
1997-04-11,35768889344.000,34952111675.302,8811912.271,2657996728.050,-1850030971.623,Insufficient Data
...,...,...,...,...,...,...
2024-06-21,83083001856.000,84738188378.683,-31872703.473,-912416064.310,-710897754.900,Sideways
2024-06-28,83083001856.000,84691697465.100,-16080897.957,-714944794.464,-877669916.678,Sideways
2024-07-05,83083001856.000,84645076117.472,766052.413,-518674828.181,-1044165485.703,Sideways
2024-07-12,83083001856.000,84598299415.684,18652910.470,-323630842.098,-1210319628.056,Sideways


In [387]:
overall_stats

Unnamed: 0,Ticker,Feature,Total_Observations,Overall_Average,Overall_Trend_Slope,Increasing_Trends_Count,Decreasing_Trends_Count,Sideways_Trends_Count,Remainder_Standard_Deviation,Seasonal_seasonal13_Amplitude,Seasonal_seasonal52_Amplitude
0,XOM,total_revenue,1428,76952395776.0,22019699.922,546,373,367,4726969588.878,5342136799.408,9764134898.704
