In [None]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [None]:
def calculate_earnings_persistence(data):
    """
    Calculate earnings persistence using a regression model.

    Parameters:
    data (pd.DataFrame): A DataFrame with 'Date' and 'Revenue' columns. Dates should be chronological.

    Returns:
    dict: Regression summary and the persistence coefficient (beta).
    """
    # Ensure data is sorted by date
    data = data[10:]
    data = data.sort_values(by='year')

    # Create lagged revenue column
    data['Lagged_Revenue'] = data['revenue'].shift(1)

    # Drop rows with NaN values (due to lagging)
    data = data.dropna()

    # Define independent (X) and dependent (y) variables
    X = sm.add_constant(data['Lagged_Revenue'])  # Add constant for intercept
    y = data['revenue']

    # Perform regression
    model = sm.OLS(y, X).fit()

    # Extract the persistence coefficient (beta)
    beta = model.params['Lagged_Revenue']

    # Print the summary of the regression
    #print(model.summary())

    return beta



In [None]:
df = pd.read_csv("/content/asian_paints_quarters.csv")
result = calculate_earnings_persistence(df)
print(result)

-0.17671810685949607
