In [32]:
from ib_insync import *
util.startLoop()

In [33]:
ib = IB()
ib.connect('127.0.0.1', 7497, clientId=1)

<IB connected to 127.0.0.1:7497 clientId=1>

In [34]:
contract = Future('ES', '202403', 'CME')
ib.qualifyContracts(contract)

[Future(conId=533620665, symbol='ES', lastTradeDateOrContractMonth='20240315', multiplier='50', exchange='CME', currency='USD', localSymbol='ESH4', tradingClass='ES')]

In [35]:
ib.reqMarketDataType(3)

In [39]:
# Request historical news headlines
contract_id = contract.conId
provider_codes = 'BRFG'
start_date = "2022-12-27 15:00:00-06:00"
end_date = "2024-03-12 11:00:00-05:00"
total_results = 300
headlines = ib.reqHistoricalNews(conId=contract_id, providerCodes=provider_codes, startDateTime=start_date, endDateTime=end_date, totalResults=total_results)

# Process the news headlines
for headline in headlines:
    print(f"Date: {headline.time}")
    print(f"Headline: {headline.headline}")
    print(f"Provider: {headline.providerCode}")
    print(f"Article ID: {headline.articleId}")
    print("-------------------------")

In [40]:
historical_data = ib.reqHistoricalData(
    contract, endDateTime='', durationStr='10 y',
    barSizeSetting='1 hour', whatToShow='TRADES', useRTH=True)

df = util.df(historical_data)
df.head()

Unnamed: 0,date,open,high,low,close,volume,average,barCount
0,2022-12-27 15:00:00-06:00,3986.0,3986.0,3986.0,3986.0,1.0,3986.0,1
1,2022-12-28 10:00:00-06:00,3955.0,3955.0,3955.0,3955.0,1.0,3955.0,1
2,2022-12-28 11:00:00-06:00,3955.0,3955.0,3955.0,3955.0,0.0,3955.0,0
3,2022-12-28 12:00:00-06:00,3955.0,3955.0,3955.0,3955.0,0.0,3955.0,0
4,2022-12-28 13:00:00-06:00,3955.0,3955.0,3955.0,3955.0,0.0,3955.0,0


In [41]:
# save to csv
df.to_csv('es_202403.csv')

In [47]:
import pandas as pd

df = pd.read_csv('es_202403.csv', index_col=0, parse_dates=True)


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.



In [48]:
df.tail()

Unnamed: 0,date,open,high,low,close,volume,average,barCount
1668,2024-03-12 08:30:00-05:00,5143.75,5147.5,5117.75,5143.5,61601.0,5131.025,21691
1669,2024-03-12 09:00:00-05:00,5143.5,5174.5,5139.0,5169.0,79240.0,5158.15,24136
1670,2024-03-12 10:00:00-05:00,5169.0,5175.0,5161.0,5168.25,47084.0,5168.975,14952
1671,2024-03-12 11:00:00-05:00,5168.0,5169.0,5144.75,5153.25,38567.0,5156.175,12156
1672,2024-03-12 12:00:00-05:00,5153.25,5156.0,5146.25,5148.25,5217.0,5150.45,2061


In [49]:
import plotly.graph_objects as go

df['date'] = pd.to_datetime(df['date'], utc=True)

# set to US/Central timezone
df['date'] = df['date'].dt.tz_convert('US/Central')

# Create a hover text string that includes date and prices for each point
hover_texts = df['date'].dt.strftime('%Y-%m-%d %H:%M') + '<br>Open: ' + df['open'].astype(str) + '<br>High: ' + df['high'].astype(str) + '<br>Low: ' + df['low'].astype(str) + '<br>Close: ' + df['close'].astype(str)

# Create an interactive candlestick chart using Plotly with custom hover text
fig = go.Figure(data=[go.Candlestick(x=df.index,
                                     open=df['open'],
                                     high=df['high'],
                                     low=df['low'],
                                     close=df['close'],
                                     hovertext=hover_texts,
                                     hoverinfo='text')])  # Use 'text' for custom hovertext


# Find the index positions of the middle of each trading day
middle_indices = df.groupby(df['date'].dt.date).apply(lambda x: x.index[len(x)//2]).values

# Create custom x-axis labels based on the middle index positions
x_labels = df.loc[middle_indices, 'date'].dt.strftime('%m-%d')

# Correctly find the numeric index positions of the first and last entries for each trading day
first_indices = df.groupby(df['date'].dt.date).apply(lambda x: x.index[0]).values
last_indices = df.groupby(df['date'].dt.date).apply(lambda x: x.index[-1]).values


# Create vertical lines for the start and end of each trading day
shapes = [dict(type='line',
               x0=i, y0=df['low'].min(), x1=i, y1=df['high'].max(),
               xref='x', yref='y',
               line=dict(color='green', width=1, dash='dash'))
          for i in first_indices]

# Create a separate trace for the volume data
volume_trace = go.Bar(x=df.index, y=df['volume'], name='Volume', yaxis='y2')

# Add the volume trace to the figure
fig.add_trace(volume_trace)

# Customize the layout
fig.update_layout(
    title='Historical Prices',
    yaxis=dict(title='Price'),
    yaxis2=dict(title='Volume', overlaying='y', side='right'),
    xaxis=dict(
        tickmode='array',
        #tickvals=middle_indices,
        #ticktext=x_labels,
        tickangle=-45
    ),
    xaxis_rangeslider_visible=False,
    #shapes=shapes
)

# Display the interactive plot
fig.show()

In [53]:
# day scores
scores_df = pd.read_csv('daily_scores.csv')
scores_df['published_day'] = pd.to_datetime(scores_df['published_day'])

In [59]:
scores_df.head()

Unnamed: 0,published_day,centered_score
0,2024-02-12,0.105103
1,2024-02-13,0.665504
2,2024-02-14,0.249099
3,2024-02-15,-0.041641
4,2024-02-16,-0.207694


In [64]:
# Earlierst date for scores
earliest_date = scores_df['published_day'].min()
# Add 1 day to the earliest date
earliest_date = earliest_date + pd.Timedelta(days=1)
print(earliest_date)

2024-02-13 00:00:00


In [65]:
earliest_date = earliest_date.tz_localize('US/Central')
cropped_df = df[df['date'] > earliest_date]
cropped_df.head()

Unnamed: 0,date,open,high,low,close,volume,average,barCount
1516,2024-02-13 08:30:00-06:00,4975.0,4982.0,4966.0,4968.0,232229.0,4973.6,60496
1517,2024-02-13 09:00:00-06:00,4967.75,4982.75,4961.75,4975.25,287159.0,4973.175,69886
1518,2024-02-13 10:00:00-06:00,4975.5,4988.25,4975.5,4980.5,164094.0,4983.325,39615
1519,2024-02-13 11:00:00-06:00,4980.5,4983.0,4971.25,4972.75,129101.0,4977.85,31946
1520,2024-02-13 12:00:00-06:00,4972.75,4974.5,4963.25,4965.5,122061.0,4969.35,29737


In [67]:
# Assign a score to each trading day in the cropped_df
def assign_score(row):
    # extract the date with no time
    date = row['date'].date() - pd.Timedelta(days=1)
    # find the corresponding row in the scores_df
    score_row = scores_df[scores_df['published_day'].dt.date == date]
    # if there is no corresponding row, return a score of 0
    if score_row.empty:
        return 0
    # otherwise, return the score from the score_row
    return score_row['centered_score'].values[0]

cropped_df['prev_day_score'] = cropped_df.apply(assign_score, axis=1)
cropped_df.head(50)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,date,open,high,low,close,volume,average,barCount,prev_day_score
1516,2024-02-13 08:30:00-06:00,4975.0,4982.0,4966.0,4968.0,232229.0,4973.6,60496,0.105103
1517,2024-02-13 09:00:00-06:00,4967.75,4982.75,4961.75,4975.25,287159.0,4973.175,69886,0.105103
1518,2024-02-13 10:00:00-06:00,4975.5,4988.25,4975.5,4980.5,164094.0,4983.325,39615,0.105103
1519,2024-02-13 11:00:00-06:00,4980.5,4983.0,4971.25,4972.75,129101.0,4977.85,31946,0.105103
1520,2024-02-13 12:00:00-06:00,4972.75,4974.5,4963.25,4965.5,122061.0,4969.35,29737,0.105103
1521,2024-02-13 13:00:00-06:00,4965.25,4965.5,4940.25,4943.0,234850.0,4951.625,51690,0.105103
1522,2024-02-13 14:00:00-06:00,4943.0,4972.5,4936.5,4971.75,390812.0,4955.425,69362,0.105103
1523,2024-02-13 15:00:00-06:00,4971.75,4974.25,4968.25,4973.25,75018.0,4970.825,12618,0.105103
1524,2024-02-14 08:30:00-06:00,4997.25,5002.25,4986.75,4998.0,159257.0,4994.975,43413,0.665504
1525,2024-02-14 09:00:00-06:00,4998.0,5009.0,4988.5,4988.5,214926.0,4999.975,53786,0.665504


In [68]:
# save to csv
cropped_df.to_csv('es_202403_with_scores.csv')