<a href="https://colab.research.google.com/github/simplexityware/algodyn/blob/master/Heterogeneous_Agents.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://figshare.com/articles/dataset/Data_for_A_detailed_heterogeneous_agent_model_for_a_single_asset_financial_market_with_trading_via_an_order_book/4579474

## A social network model of investment behaviour in the stock market

In [6]:
pip install cellpylib

Collecting cellpylib
  Downloading cellpylib-2.4.0.tar.gz (38 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: cellpylib
  Building wheel for cellpylib (setup.py) ... [?25l[?25hdone
  Created wheel for cellpylib: filename=cellpylib-2.4.0-py3-none-any.whl size=37922 sha256=1cc1e1272517d4764f08f25881eb1016e6de6de5514050748e3c7429848c2b08
  Stored in directory: /root/.cache/pip/wheels/44/bc/c4/4730d328071b164c25d392c2d932b513e56ff0857da63d17e7
Successfully built cellpylib
Installing collected packages: cellpylib
Successfully installed cellpylib-2.4.0


In [1]:
!pip install plotly ipywidgets



In [2]:
import numpy as np
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display, clear_output
import plotly.io as pio

pio.renderers.default = "colab"

In [4]:
import plotly.graph_objects as go

# Create simple data
x = [0, 1, 2, 3, 4, 5]
y = [0, 1, 0, 1, 0, 1]

# Create the figure
fig = go.Figure(data=go.Scatter(x=x, y=y, mode='lines+markers'))

# Update layout
fig.update_layout(title='Simple Plot',
                  xaxis_title='X Axis',
                  yaxis_title='Y Axis')

# Show the plot
fig.show()

In [5]:
import numpy as np
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display, clear_output
import plotly.io as pio

pio.renderers.default = "colab"

In [6]:
# Create sample data
x = np.linspace(0, 10, 100)
y = np.sin(x)

# Create widgets
amplitude_slider = widgets.FloatSlider(value=1.0, min=0.1, max=2.0, step=0.1, description='Amplitude:')
frequency_slider = widgets.FloatSlider(value=1.0, min=0.1, max=2.0, step=0.1, description='Frequency:')
run_button = widgets.Button(description="Update Plot")

def update_plot(b):
    amplitude = amplitude_slider.value
    frequency = frequency_slider.value
    y = amplitude * np.sin(frequency * x)

    fig = go.Figure(data=go.Scatter(x=x, y=y, mode='lines', name='Sine Wave'))
    fig.update_layout(title='Interactive Sine Wave',
                      xaxis_title='X',
                      yaxis_title='Y')
    fig.show()

run_button.on_click(update_plot)

display(amplitude_slider, frequency_slider, run_button)

FloatSlider(value=1.0, description='Amplitude:', max=2.0, min=0.1)

FloatSlider(value=1.0, description='Frequency:', max=2.0, min=0.1)

Button(description='Update Plot', style=ButtonStyle())

In [7]:
import numpy as np
import cellpylib as cpl

# Model parameters
N = 8000  # Number of traders
timesteps = 50
sigma_epsilon = 0.33  # Standard deviation of price perception error

# Initialize parameters for each trader
A = np.ones(N)  # Normalized asset price influence
B = np.random.normal(0, 1, N)  # Perceived change in normalized price influence
C = np.random.normal(5, 2, N)  # Social influence
D = np.random.normal(0, 1, N)  # Innate tendency towards buying or selling

# Initialize trust network matrix (random sparse trust)
alpha = np.random.choice([0, 1], size=(N, N), p=[0.996, 0.004])

# Initial state (0 = hold, -1 = buy, 1 = sell)
states = np.zeros(N)

def leaning_function(p, p_prev, epsilon, alpha, states, A, B, C, D):
    return A * (p + epsilon) + B * ((p + epsilon) - (p_prev + epsilon)) + C * np.dot(alpha, states) + D

def state_function(L, b=-1, s=1):
    if L < b:
        return -1  # buy
    elif L > s:
        return 1  # sell
    else:
        return 0  # hold

In [8]:
class StockMarketCA:
    def __init__(self, n, states, alpha, A, B, C, D):
        self.n = n
        self.states = states
        self.alpha = alpha
        self.A = A
        self.B = B
        self.C = C
        self.D = D
        self.p = 0  # Initial normalized price
        self.epsilon = np.random.normal(0, sigma_epsilon, n)

    def step(self):
        p_prev = self.p
        self.epsilon = np.random.normal(0, sigma_epsilon, self.n)
        leanings = leaning_function(self.p, p_prev, self.epsilon, self.alpha, self.states, self.A, self.B, self.C, self.D)
        self.states = np.array([state_function(L) for L in leanings])
        # Update the price to ensure buyers equal sellers
        self.p = self.update_price()

    def update_price(self):
        sorted_leanings = np.sort(leaning_function(self.p, self.p, self.epsilon, self.alpha, self.states, self.A, self.B, self.C, self.D))
        mid_point = len(sorted_leanings) // 2
        return sorted_leanings[mid_point]

# Initialize and run the cellular automaton
ca_model = StockMarketCA(N, states, alpha, A, B, C, D)

for t in range(timesteps):
    ca_model.step()
    print(f"Time step {t+1}, normalized price: {ca_model.p}")

Time step 1, normalized price: -0.47736696085370744
Time step 2, normalized price: -10.530057725453792
Time step 3, normalized price: -107.67744303919608
Time step 4, normalized price: -265.011626443028
Time step 5, normalized price: -422.3058146244998
Time step 6, normalized price: -579.6232158347151
Time step 7, normalized price: -736.927073751949
Time step 8, normalized price: -894.2149824552977
Time step 9, normalized price: -1051.5474154374024
Time step 10, normalized price: -1208.9289787335322
Time step 11, normalized price: -1366.2181573720247
Time step 12, normalized price: -1523.6174447790768
Time step 13, normalized price: -1681.0651577571266
Time step 14, normalized price: -1838.523937924578
Time step 15, normalized price: -1995.95202279538
Time step 16, normalized price: -2153.1626535154037
Time step 17, normalized price: -2310.449139568545
Time step 18, normalized price: -2467.716198248245
Time step 19, normalized price: -2624.981837555468
Time step 20, normalized price: -

In [9]:
# Zero trust network
alpha_zero_trust = np.zeros((N, N))
ca_model_zero_trust = StockMarketCA(N, states, alpha_zero_trust, A, B, C, D)

# Random sparse trust network
ca_model_random_trust = StockMarketCA(N, states, alpha, A, B, C, D)

# Epinions trust network (assuming epinions_alpha is precomputed)
# epinions_alpha = ... (load or generate the Epinions trust network)
# ca_model_epinions_trust = StockMarketCA(N, states, epinions_alpha, A, B, C, D)

# Run and analyze simulations
for t in range(timesteps):
    ca_model_zero_trust.step()
    ca_model_random_trust.step()
    # ca_model_epinions_trust.step()
    print(f"Time step {t+1}")
    print(f"Zero trust normalized price: {ca_model_zero_trust.p}")
    print(f"Random trust normalized price: {ca_model_random_trust.p}")
    # print(f"Epinions trust normalized price: {ca_model_epinions_trust.p}")

Time step 1
Zero trust normalized price: -0.04200840924597424
Random trust normalized price: -0.4865655053070622
Time step 2
Zero trust normalized price: -0.08450041988463974
Random trust normalized price: -10.78700560251561
Time step 3
Zero trust normalized price: -0.12073653618752744
Random trust normalized price: -107.73576499739076
Time step 4
Zero trust normalized price: -0.14436378674944972
Random trust normalized price: -265.1587101067157
Time step 5
Zero trust normalized price: -0.1824515114934833
Random trust normalized price: -422.5238897061002
Time step 6
Zero trust normalized price: -0.21763274274703887
Random trust normalized price: -579.8663756416781
Time step 7
Zero trust normalized price: -0.2528070049735178
Random trust normalized price: -737.1642774958067
Time step 8
Zero trust normalized price: -0.28249124241486917
Random trust normalized price: -894.3921905598545
Time step 9
Zero trust normalized price: -0.3189446810052531
Random trust normalized price: -1051.635092

In [14]:
!pip install plotly ipywidgets

Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi
Successfully installed jedi-0.19.1


In [20]:
import numpy as np
import plotly.graph_objects as go
import plotly.offline as pyo
import ipywidgets as widgets
from IPython.display import display, clear_output

pyo.init_notebook_mode(connected=True)

class StockMarketCA:
    def __init__(self, n, states, alpha, A, B, C, D, sigma_epsilon):
        self.n = n
        self.states = states
        self.alpha = alpha
        self.A = A
        self.B = B
        self.C = C
        self.D = D
        self.p = 0  # Initial normalized price
        self.epsilon = np.random.normal(0, sigma_epsilon, n)
        self.prices = []

    def step(self):
        p_prev = self.p
        self.epsilon = np.random.normal(0, sigma_epsilon, self.n)
        leanings = self.leaning_function(self.p, p_prev, self.epsilon, self.alpha, self.states, self.A, self.B, self.C, self.D)
        self.states = np.array([self.state_function(L) for L in leanings])
        # Update the price to ensure buyers equal sellers
        self.p = self.update_price()
        self.prices.append(self.p)

    def leaning_function(self, p, p_prev, epsilon, alpha, states, A, B, C, D):
        return A * (p + epsilon) + B * ((p + epsilon) - (p_prev + epsilon)) + C * np.dot(alpha, states) + D

    def state_function(self, L, b=-1, s=1):
        if L < b:
            return -1  # buy
        elif L > s:
            return 1  # sell
        else:
            return 0  # hold

    def update_price(self):
        sorted_leanings = np.sort(self.leaning_function(self.p, self.p, self.epsilon, self.alpha, self.states, self.A, self.B, self.C, self.D))
        mid_point = len(sorted_leanings) // 2
        return sorted_leanings[mid_point]

# Simulation parameters
sigma_epsilon = 0.33

def run_simulation(timesteps, num_traders):
    # Initialize parameters for each trader
    A = np.ones(num_traders)
    B = np.random.normal(0, 1, num_traders)
    C = np.random.normal(5, 2, num_traders)
    D = np.random.normal(0, 1, num_traders)
    alpha = np.random.choice([0, 1], size=(num_traders, num_traders), p=[0.996, 0.004])
    states = np.zeros(num_traders)

    ca_model = StockMarketCA(num_traders, states, alpha, A, B, C, D, sigma_epsilon)
    for t in range(timesteps):
        ca_model.step()
    return ca_model.prices

In [21]:
num_traders_slider = widgets.IntSlider(value=8000, min=100, max=10000, step=100, description='Num Traders:')
timesteps_slider = widgets.IntSlider(value=50, min=10, max=100, step=10, description='Timesteps:')
run_button = widgets.Button(description="Run Simulation")
output = widgets.Output()

def update_simulation(b):
    with output:
        clear_output(wait=True)
        num_traders = num_traders_slider.value
        timesteps = timesteps_slider.value
        prices = run_simulation(timesteps, num_traders)

        fig = go.Figure(data=go.Scatter(y=prices, mode='lines', name='Stock Price'))
        fig.update_layout(title='Stock Market Simulation Over Time',
                          xaxis_title='Time Steps',
                          yaxis_title='Normalized Price')
        pyo.iplot(fig)

run_button.on_click(update_simulation)

display(num_traders_slider, timesteps_slider, run_button, output)

IntSlider(value=8000, description='Num Traders:', max=10000, min=100, step=100)

IntSlider(value=50, description='Timesteps:', min=10, step=10)

Button(description='Run Simulation', style=ButtonStyle())

Output()

## MESA

In [None]:
pip install mesa


Collecting mesa
  Downloading mesa-2.3.1-py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.7/65.7 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting cookiecutter (from mesa)
  Downloading cookiecutter-2.6.0-py3-none-any.whl (39 kB)
Collecting mesa-viz-tornado>=0.1.3,~=0.1.0 (from mesa)
  Downloading Mesa_Viz_Tornado-0.1.3-py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m32.9 MB/s[0m eta [36m0:00:00[0m
Collecting solara (from mesa)
  Downloading solara-1.34.1-py2.py3-none-any.whl (5.7 kB)
Collecting binaryornot>=0.4.4 (from cookiecutter->mesa)
  Downloading binaryornot-0.4.4-py2.py3-none-any.whl (9.0 kB)
Collecting arrow (from cookiecutter->mesa)
  Downloading arrow-1.3.0-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
Collecting solara-server[dev,starlette]==1.34.1 (from solara->m

In [None]:
from mesa import Agent, Model
from mesa.time import RandomActivation
from mesa.datacollection import DataCollector
import numpy as np
import pandas as pd

class TechnicalAgent(Agent):
    def __init__(self, unique_id, model, fast_window, slow_window, max_wait_time, profit_threshold):
        super().__init__(unique_id, model)
        self.fast_window = fast_window
        self.slow_window = slow_window
        self.max_wait_time = max_wait_time
        self.profit_threshold = profit_threshold
        self.last_signal_price = None
        self.wait_time = self.random.uniform(0, max_wait_time)

    def step(self):
        # Implement the technical analysis strategy
        pass

class FundamentalAgent(Agent):
    def __init__(self, unique_id, model, fundamental_price, market_threshold, opinion_threshold):
        super().__init__(unique_id, model)
        self.fundamental_price = fundamental_price
        self.market_threshold = market_threshold
        self.opinion_threshold = opinion_threshold

    def step(self):
        # Implement the fundamental analysis strategy
        pass

In [None]:
from mesa import Model
from mesa.time import RandomActivation
from mesa.datacollection import DataCollector

class FinancialMarketModel(Model):
    def __init__(self, num_technical_agents, num_fundamental_agents):
        self.schedule = RandomActivation(self)
        self.order_book = {'buy': [], 'sell': []}
        self.price_history = []

        # Create agents
        for i in range(num_technical_agents):
            agent = TechnicalAgent(i, self, fast_window=5, slow_window=10, max_wait_time=50, profit_threshold=0.01)
            self.schedule.add(agent)
        for i in range(num_fundamental_agents):
            agent = FundamentalAgent(i + num_technical_agents, self, fundamental_price=self.random.uniform(20, 25), market_threshold=0.05, opinion_threshold=0.1)
            self.schedule.add(agent)

        # Set up data collection
        self.datacollector = DataCollector(
            model_reporters={"Price": lambda m: m.price_history[-1] if m.price_history else None},
            agent_reporters={"Wealth": lambda a: a.wealth}
        )

    def step(self):
        self.schedule.step()
        self.match_orders()
        self.datacollector.collect(self)

    def place_order(self, agent_id, order_type, price):
        self.order_book[order_type].append((agent_id, price))

    def match_orders(self):
        # Implement order matching logic
        pass

In [None]:
from mesa.batchrunner import BatchRunner

model_params = {
    "num_technical_agents": 1500,
    "num_fundamental_agents": 1000
}

# Create an instance of BatchRunner
batch_run = BatchRunner(
    FinancialMarketModel,
    fixed_parameters=model_params,
    iterations=5,  # Number of iterations
    max_steps=1000,  # Number of steps per iteration
    model_reporters={"Price": lambda m: m.price_history[-1] if m.price_history else None}
)

batch_run.run_all()

# Extract and analyze results
data = batch_run.get_model_vars_dataframe()
print(data)

ImportError: cannot import name 'BatchRunner' from 'mesa.batchrunner' (/usr/local/lib/python3.10/dist-packages/mesa/batchrunner.py)

In [None]:
from mesa import Agent, Model
from mesa.time import RandomActivation
from mesa.datacollection import DataCollector
from mesa.batchrunner import BatchRunner
import numpy as np
import pandas as pd

class TechnicalAgent(Agent):
    def __init__(self, unique_id, model, fast_window, slow_window, max_wait_time, profit_threshold):
        super().__init__(unique_id, model)
        self.fast_window = fast_window
        self.slow_window = slow_window
        self.max_wait_time = max_wait_time
        self.profit_threshold = profit_threshold
        self.last_signal_price = None
        self.wait_time = self.random.uniform(0, max_wait_time)

    def step(self):
        # Implement the technical analysis strategy
        pass

class FundamentalAgent(Agent):
    def __init__(self, unique_id, model, fundamental_price, market_threshold, opinion_threshold):
        super().__init__(unique_id, model)
        self.fundamental_price = fundamental_price
        self.market_threshold = market_threshold
        self.opinion_threshold = opinion_threshold

    def step(self):
        # Implement the fundamental analysis strategy
        pass

class FinancialMarketModel(Model):
    def __init__(self, num_technical_agents, num_fundamental_agents):
        self.schedule = RandomActivation(self)
        self.order_book = {'buy': [], 'sell': []}
        self.price_history = []

        # Create agents
        for i in range(num_technical_agents):
            agent = TechnicalAgent(i, self, fast_window=5, slow_window=10, max_wait_time=50, profit_threshold=0.01)
            self.schedule.add(agent)
        for i in range(num_fundamental_agents):
            agent = FundamentalAgent(i + num_technical_agents, self, fundamental_price=self.random.uniform(20, 25), market_threshold=0.05, opinion_threshold=0.1)
            self.schedule.add(agent)

        # Set up data collection
        self.datacollector = DataCollector(
            model_reporters={"Price": lambda m: m.price_history[-1] if m.price_history else None},
            agent_reporters={"Wealth": lambda a: a.wealth if hasattr(a, 'wealth') else None}
        )

    def step(self):
        self.schedule.step()
        self.match_orders()
        self.datacollector.collect(self)

    def place_order(self, agent_id, order_type, price):
        self.order_book[order_type].append((agent_id, price))

    def match_orders(self):
        # Implement order matching logic
        pass

# Define the model parameters
model_params = {
    "num_technical_agents": 750,
    "num_fundamental_agents": 1000
}

# Create an instance of BatchRunner
batch_run = BatchRunner(
    FinancialMarketModel,
    model_params=model_params,
    iterations=5,  # Number of iterations
    max_steps=1000,  # Number of steps per iteration
    model_reporters={"Price": lambda m: m.price_history[-1] if m.price_history else None}
)

# Run the batch run
batch_run.run_all()

# Extract and analyze results
data = batch_run.get_model_vars_dataframe()
print(data)

ImportError: cannot import name 'BatchRunner' from 'mesa.batchrunner' (/usr/local/lib/python3.10/dist-packages/mesa/batchrunner.py)

# MODEL OF HETEROGENEOUS MARKET WITH INTRINSIC RANDOMNESS

In [None]:
from mesa import Agent, Model
from mesa.time import RandomActivation
from mesa.space import Grid
import numpy as np

class TradingAgent(Agent):
    def __init__(self, unique_id, model):
        super().__init__(unique_id, model)
        self.holdings = np.random.uniform(0, 1)
        self.is_fundamentalist = np.random.choice([True, False])

    def step(self):
        market_price = self.model.market_price
        if self.is_fundamentalist:
            reference_price = np.log(market_price) + 0.5 * (np.log(self.model.fundamental_value) - np.log(market_price))
        else:
            reference_price = np.log(market_price) + 0.5 * (np.log(market_price) - np.log(market_price))

        excess_demand = self.holdings * np.log(reference_price / market_price)
        self.holdings += excess_demand

        if self.holdings > 1:
            self.holdings = 1
        elif self.holdings < 0:
            self.holdings = 0

        self.is_fundamentalist = self.holdings > 0.5

ImportError: cannot import name 'Grid' from 'mesa.space' (/usr/local/lib/python3.10/dist-packages/mesa/space.py)

# To be deleted

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import timedelta
import matplotlib.pyplot as plt
import os
import finnhub

In [None]:
finnhub_client = finnhub.Client(api_key="cm291hhr01qvesfhj9s0cm291hhr01qvesfhj9sg")

In [None]:
# res_df = pd.DataFrame([])

# for ticker in tickers:
#     res = finnhub_client.transcripts_list(ticker)
#     res = res['transcripts']
#     res = pd.DataFrame(res)
#     print(res.shape)
#     res_df = pd.concat([res_df, res])

# res_df.to_csv('earnings_res.csv', index=False)

In [None]:


# earnings_res = pd.read_csv('earnings_res.csv')

# res_dfs = pd.DataFrame([])

# for transcript_id in earnings_res['id'].to_list():
#     res = finnhub_client.transcripts(transcript_id)

#     res_df = pd.DataFrame(res['transcript'])
#     if len(res_df):
#         res_df['id'] = res['id']
#         res_df['time'] = res['time']
#         res_df['quarter'] = res['quarter']
#         res_df['symbol'] = res['symbol']

#     res_dfs = pd.concat([res_dfs, res_df])

# res_dfs.to_csv('earnings_res_details.csv', index=False)

In [None]:
[j for j in os.listdir('../data') if 'ibe' in j]

In [None]:
details = pd.read_csv('../data/ibes_us_details_200501_202402.csv').sort_values(['TICKER', 'FPEDATS']).dropna(subset=['VALUE'])
actuals = pd.read_csv('../data/ibes_us_actuals_200501_202402.csv').sort_values(['TICKER', 'PENDS']).dropna(subset=['VALUE'])

In [None]:
tickers = list(details['TICKER'].unique())

In [None]:
symbols = list(actuals['TICKER'].unique())

In [None]:
price_df = yf.download(symbols)
price_df = price_df.dropna(how='all', axis=1)
price_df = price_df.stack().reset_index()
price_df = price_df.rename(columns={'Date':'ANNDATS', 'Ticker':'TICKER'})

In [None]:
# PENDS - Financial Dates
# ANNDATS - Annoucements Dates
# Assume that between the PENDS and ACTDATS for the Actuals, the analyst details ACTDATS has to be within that range

# Use ANALYS to know if it is more star analyst or not
# Need the PENDS to merge but need the ANNDATS to trade

# Care about QTR vs ANN?
# Just take the latest or take the change: latest for now

In [None]:
# # Filter for test purposes only
# details = details[details['FPEDATS'] >= '2021-01-01']
# actuals = actuals[actuals['PENDS'] >= '2021-01-01']

In [None]:
details = details[details['report_curr'] == 'USD']
actuals = actuals[actuals['CURR_ACT'] == 'USD']

In [None]:
print(details.shape)

In [None]:
filter = actuals.nunique().sort_values().reset_index()
actuals = actuals.drop(columns=filter[filter[0] < 2]['index'].to_list())

filter = details.nunique().sort_values().reset_index()
details = details.drop(columns=filter[filter[0] < 3]['index'].to_list())

In [None]:
actuals = actuals.drop(columns=[j for j in actuals.columns if 'TIMS' in j])
details = details.drop(columns=[j for j in details.columns if 'TIMS' in j])

In [None]:
# # Filter for analysts that write too many (can do more complex next time)
# details['max_date'] = np.max(details[['ACTDATS', 'FPEDATS', 'ANNDATS']], axis=1)
# details = details.sort_values(['TICKER', 'FPEDATS', 'ANALYS', 'max_date'])
# details = details.drop_duplicates(subset=['TICKER', 'FPEDATS', 'ANALYS'], keep='last')

In [None]:
details = details.sort_values(['TICKER', 'FPEDATS', 'VALUE'])
details['ANALYS_VALUE_diff'] = details.groupby(['TICKER', 'FPEDATS', 'ANALYS'])['VALUE'].diff()
details['VALUE_diff'] = details.groupby(['TICKER', 'FPEDATS'])['VALUE'].diff()
details['diff'] = np.where(details['ANALYS_VALUE_diff'].notnull(), details['ANALYS_VALUE_diff'], details['VALUE_diff'])
details = details.drop(columns=['ANALYS_VALUE_diff', 'VALUE_diff'])

In [None]:
max_diff = details.groupby(['TICKER', 'FPEDATS', 'ANALYS'], as_index=False)['diff'].max().rename(columns={'diff':'max_diff'})
max_diff = pd.merge(details, max_diff, how='left')
max_diff = max_diff[max_diff['diff'] == max_diff['max_diff']][['TICKER', 'FPEDATS', 'ANALYS', 'VALUE']].rename(columns={'VALUE':'max_VALUE'})
max_diff = max_diff.groupby(['TICKER', 'FPEDATS', 'ANALYS'], as_index=False).agg({'max_VALUE':'max'})
max_diff = max_diff.drop_duplicates()

In [None]:
details = pd.merge(details, max_diff, how='left')
details['max_VALUE'] = np.where(details['max_VALUE'].isnull(), details['VALUE'], details['max_VALUE'])
details['freq'] = np.where(details['VALUE'] < details['max_VALUE'], 'QTR', 'ANN')
details = details.drop(columns=['diff', 'max_VALUE'])
# a lot of assumptions

In [None]:
#can add more filters
expec_df = details.groupby(['TICKER', 'FPEDATS', 'freq'], as_index=False).agg({'VALUE':['median', 'std', 'count']})
expec_df.columns = ['TICKER', 'PENDS', 'PDICITY', 'consensus_median', 'consensus_dispersion', 'consensus_attention']

In [None]:
# actuals_expec = pd.merge(actuals, expec_df, how='left')
# actuals_expec['earnings_surprise'] = (actuals_expec['VALUE'] - actuals_expec['consensus_median'])/actuals_expec['VALUE']

# Assume ANN and QTR equally important
# 'TICKER', 'PENDS', 'ANNDATS' is not accurate because apparently multiple PENDS can be on the same ANNDATS (so just use ANNDATS)
actuals_expec_group = actuals_expec.groupby(['TICKER', 'ANNDATS'], as_index=False).agg({'earnings_surprise':'mean'})
actuals_expec_group['earnings_surprise_sign'] = np.sign(actuals_expec_group['earnings_surprise'])

In [None]:
actuals_expec[(actuals_expec['TICKER'] == 'RIMS') & (actuals_expec['ANNDATS'] == '2005-01-03')]

In [None]:
trading_days = sorted(list(actuals_expec_group['ANNDATS'].unique()))
symbols = sorted(list(actuals_expec_group['TICKER'].unique()))
rebalance_days = 30

In [None]:
actuals_expec_group['ANNDATS'] = pd.to_datetime(actuals_expec_group['ANNDATS'])
actuals_expec_group = pd.merge(price_df, actuals_expec_group, how='left')

# actuals_expec_group = actuals_expec_group.dropna(subset=['Adj Close'])
actuals_expec_group = actuals_expec_group.sort_values(['TICKER', 'ANNDATS'])

actuals_expec_group['rebalance_price'] = actuals_expec_group.groupby(['TICKER'])['Adj Close'].shift(-rebalance_days)
actuals_expec_group = actuals_expec_group.dropna(subset=['earnings_surprise'])

actuals_expec_group['returns'] = actuals_expec_group['rebalance_price']/actuals_expec_group['Adj Close']-1
actuals_expec_group['year'] = actuals_expec_group['ANNDATS'].dt.year

actuals_expec_group['month'] = actuals_expec_group['ANNDATS'].dt.month

In [None]:
calendar_weights = actuals_expec_group[['ANNDATS', 'earnings_surprise']].dropna(subset=['earnings_surprise'])
calendar_weights['year'] = calendar_weights['ANNDATS'].dt.year
calendar_weights['month'] = calendar_weights['ANNDATS'].dt.month
calendar_weights['date'] = calendar_weights['ANNDATS'].dt.day
calendar_weights = calendar_weights.groupby(['year', 'month'], as_index=False).agg({'date':'nunique'})
calendar_weights['month_weights'] = 1/calendar_weights['date']

In [None]:
actuals_expec_group = pd.merge(actuals_expec_group, calendar_weights[['year', 'month', 'month_weights']], how='left')

In [None]:
actuals_expec_group


In [None]:
# actuals_expec_group.isnull().sum()

In [None]:
max_cash = True

In [None]:
trade_dfs = pd.DataFrame([])

for trading_date in trading_days:
    if len(actuals_expec_group[actuals_expec_group['ANNDATS'] == trading_date]):
        trade_df = actuals_expec_group[actuals_expec_group['ANNDATS'] == trading_date]
        trade_df['weights'] = 1/len(trade_df)
        if max_cash:
            trade_df['weighted_returns'] = trade_df['earnings_surprise_sign'] * trade_df['returns'] * trade_df['weights'] * trade_df['month_weights']
        else:
            trade_df['weighted_returns'] = trade_df['earnings_surprise_sign'] * trade_df['returns'] * trade_df['weights'] * 1/rebalance_days

        trade_dfs = pd.concat([trade_dfs, trade_df[['ANNDATS', 'weighted_returns']]])

In [None]:
agg_trade_dfs = trade_dfs.groupby('ANNDATS', as_index=False).agg({'weighted_returns':'sum'})
agg_trade_dfs  = agg_trade_dfs[agg_trade_dfs['ANNDATS'] >= '2021-01-01']
agg_trade_dfs ['cumsum_returns'] = agg_trade_dfs ['weighted_returns'].cumsum()
agg_trade_dfs = agg_trade_dfs.set_index('ANNDATS')

In [None]:
spy = yf.download('SPY')
spy['returns'] = spy['Adj Close'].pct_change().shift(-1)
spy  = spy[spy.index >= '2021-01-01']
spy['cumsum_returns'] = spy['returns'].cumsum()

In [None]:
plt.figure(figsize=(10,7))
plt.title('Cum Returns for Earnings Surprise Computation just follow')
plt.plot(agg_trade_dfs['cumsum_returns'], label='earnings strat')
plt.plot(spy['cumsum_returns'], label = 'spy')
plt.legend()
plt.show()