In [28]:
import datetime
import ib_insync
import pandas as pd
import logging
import numpy as np
import time
import os

ib_insync.util.startLoop()

In [29]:
from ib_insync import*

In [30]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("ib_downloader")
handler = logging.FileHandler('ib_downloader_1min_bar.log')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

In [31]:
class IbDownloadTimer(object):
    def __init__(self):
        self._t_array: np.ndarray = np.array([])
    
    def reset(self):
        """
        Reset timer
        """
        self._t_array = np.array([])
    
    def add_new_event(self):
        """
        Add a new download event into the time array. IB
        only allows 60 requests every 10 minutes. Therefore
        set a time buffer to make sure no more than 60 requests
        are made each 10 minutes.
        """
        # Remove event that is 10 minutes ago
        time_diff = datetime.datetime.now() - self._t_array
        self._t_array = self._t_array[time_diff <= datetime.timedelta(minutes=10)]
        
        # If more than 55 requests made in this ten minutes
        if len(self._t_array) > 40:
            logger.info("Waiting until IB 60/10 minutes time buffer clears")
            while len(self._t_array) > 40:
                # wait sometime
                time.sleep(2)
                # delete all time stamps 10 minutes ago
                time_diff = datetime.datetime.now() - self._t_array
                self._t_array = self._t_array[time_diff <= datetime.timedelta(minutes=10)]

        self._t_array = np.append(self._t_array, datetime.datetime.now())

    @property
    def t_array(self):
        return self._t_array
            

In [46]:
ib = ib_insync.IB()
ib.connect('127.0.0.1', 7497, clientId=1)

2020-03-05 07:58:46,480 - ib_insync.client - INFO - Connecting to 127.0.0.1:7497 with clientId 1...
2020-03-05 07:58:46,482 - ib_insync.client - INFO - Connected
2020-03-05 07:58:46,595 - ib_insync.client - INFO - Logged on to server version 142
2020-03-05 07:58:46,652 - ib_insync.client - INFO - API connection ready
2020-03-05 07:58:46,830 - ib_insync.ib - INFO - Synchronization complete


<IB connected to 127.0.0.1:7497 clientId=1>

In [49]:
class IbDataDownloader(object):
    def __init__(self):
        self._timer: IbDownloadTimer = IbDownloadTimer()   
    @property
    def t_array(self):
        return self._timer.t_array
    
    def reset_timer(self):
        self._timer.reset()
        
    def load_symbols_csv(self,
                         csv_path: str):
        self._symbols_df = pd.read_csv(csv_path)
        
    def get_all_symbols_lifetime_data(self,
                                      symbols_list: list,
                                      exchanges_list: list,
                                      output_folder: str,
                                      duration_str='10 D',
                                      bar_size='1 min',
                                      what_to_Show='TRADES',
                                      useRTH=True,
                                      formatDate=1):
        """
        Given a list of symbols and corresponding exchanges, download data and save to local csv files.
        """
        for ix, symbol in enumerate(symbols_list):
            logger.info("Processing symbol: {}".format(symbol))
            exchange = exchanges_list[ix]
            sub_contracts: list = self.get_sub_contracts(symbol, exchange)
            


            for contract in sub_contracts:
                download_success = False
                while not download_success:
                    try:
                        df = self.get_one_symbol_lifetime_data(
                             contract=contract,
                             duration_str=duration_str,
                             bar_size=bar_size,
                             whatToShow=what_to_Show,
                             useRTH=useRTH,
                             formatDate=formatDate
                        )
                        file_name = contract.localSymbol + "_" + contract.lastTradeDateOrContractMonth + ".csv"
                        data_path = os.path.join(output_folder, file_name)
                        # Make sure the data returned is not a none object
                        if type(df) == pd.core.frame.DataFrame:
                            if not df.empty:
                                df.to_csv(data_path, index=False)
                        download_success = True
                    except:
                        time.sleep(30)
                        ib.connect('127.0.0.1', 7497, clientId=1)

        
    def get_sub_contracts(self,
                          symbol: str,
                          exchange: str):
        """
        Given an symbol and exchange, return all possible sub contracts in a list.
        """
        contract_details = ib.reqContractDetails(Future(symbol=symbol, includeExpired=True))
        time.sleep(10)
        if len(contract_details) != 0:
            cd_frame = ib_insync.util.df([cd.contract for cd in contract_details])
            frame = cd_frame[cd_frame.loc[:,"exchange"] == exchange]
            def datetime_transform(x):
                return datetime.datetime.strptime(x, '%Y%m%d')
            frame.loc[:, "LastdayDatetime"] = frame.loc[:, "lastTradeDateOrContractMonth"].apply(datetime_transform)
            latest_last_day = datetime.datetime.now() + datetime.timedelta(days=120)
            frame = frame[frame.loc[:, "LastdayDatetime"] < latest_last_day]
            sub_contracts_list = []
            for index, row in frame.iterrows():
                contract = Future(localSymbol=row.loc["localSymbol"],
                                  exchange=row.loc["exchange"],
                                  lastTradeDateOrContractMonth=row.loc["lastTradeDateOrContractMonth"],
                                  includeExpired=True)

                sub_contracts_list.append(contract)
            return sub_contracts_list
        else:
            return []

        
    def get_one_symbol_lifetime_data(self,
                                     contract: ib_insync.Contract,
                                     duration_str='10 D',
                                     bar_size='1 min',
                                     whatToShow='TRADES',
                                     useRTH=True,
                                     formatDate=1):
        """
        Get one contract data in its entire lifetime. Return a pandas dataframe.
        """
        # Make sure the contract is good and unique
        if len(ib.qualifyContracts(contract)) != 1:
            logger.error("incorrect contract: {}".format(str(contract)))
            logger.info("sleep 10 sec")
            time.sleep(10)
            return
        logger.info("sleep 10 sec")
        time.sleep(10)
        logger.info("downloading symbol: {}".format(str(contract)))
        # current time string for end data time is empty string
        dt = ""
        barsList = []
        while True:
            self._timer.add_new_event()
            logger.info("Start downloading data to date: {}".format(dt))
            start_time = time.time()
            bars = ib.reqHistoricalData(
                contract,
                endDateTime=dt,
                durationStr=duration_str,
                barSizeSetting=bar_size,
                whatToShow=whatToShow,
                useRTH=useRTH,
                formatDate=formatDate)


            end_time = time.time()


            # If no bars received or received bar data is always the last day, break
            if not bars:
                logger.info("sleep 30 sec")
                time.sleep(30)
                break
            elif len(bars) < 2:
                logger.info("sleep 30 sec")
                time.sleep(30)
                break
            else:
                barsList.append(bars)
                dt = bars[0].date
                logger.info("downloading using time: {} sec".format(round(end_time - start_time, 2)))
                logger.info("Finish downloading data to date: {}".format(dt))

                # Sleep some time before next request due to IB does not like frequent requests
                logger.info("sleep 30 sec")
                time.sleep(30)
            
        allBars = [b for bars in reversed(barsList) for b in bars]
        df = ib_insync.util.df(allBars)
        logger.info("Finish downloading all data for symbol: {}".format(str(contract)))
        return df

In [50]:
downloader = IbDataDownloader()

```python
# single simble download
contract = Future(localSymbol="CLM0", exchange = "NYMEX")
df = downloader.get_one_symbol_lifetime_data(contract,
                                             duration_str='120 D',
                                             bar_size='1 day',
                                             whatToShow='TRADES',
                                             useRTH=True,
                                             formatDate=1)
```

In [51]:
output_folder = r"C:\nutstore\Nutstore\future_1min_bars"

In [52]:
symbol_df_path = r"C:\nutstore\Nutstore\my_interested_symbols.csv"
symbol_df = pd.read_csv(symbol_df_path)

In [53]:
symbol_df

Unnamed: 0,Symbol,Name,Exchange,Start Date,First Contract,Months,Tick Size,Pricing Unit,Deliverable,Big Point Value
0,AD,CME Australian Dollar AUD,CME,3/16/1987,H1987,HMUZ,$.0001 per Australian dollar increments ($10.0...,US dollars and cents per Australian dollars,"100,000 Australian dollars",100000.0
1,BO,CBOT Soybean Oil,CBOT,9/20/1961,U1961,FHKNQUVZ,1/100 of a cent ($0.0001) per pound ($6.00 per...,Cents per pound,Crude soybean oil meeting exchange-approved gr...,600.0
2,BP,CME British Pound GBP,CME,9/15/1975,U1975,HMUZ,$.0001 per British pound increments ($6.25/con...,US dollars and cents,"62,500 British pounds",62500.0
3,C,CBOT Corn,CBOT,3/22/1960,H1960,HKNUZ,1/4 of one cent per bushel ($12.50 per contract),Cents per bushel,"#2 Yellow at contract Price, #1 Yellow at a 1....",50.0
4,CD,CME Canadian Dollar CAD,CME,6/14/1977,M1977,HMUZ,$.0001 per Canadian dollar increments ($10.00/...,US dollars and cents,"100,000 Canadian dollars",100000.0
5,CL,NYMEX WTI Crude Oil,NYMEX,5/18/1983,M1983,FGHJKMNQUVXZ,$0.01per barrel,U.S. Dollars and Cents per barrel,Delivery shall be made free-on-board (F.O.B.) ...,1000.0
6,DA,CME Class III Milk,CME,12/31/1997,Z1997,FGHJKMNQUVXZ,$0.01 per cwt (= $20.00 per contract),Cents per hundredweight (cwt.),Class III Milk,
7,EC,CME Euro FX,CME,3/15/1999,H1999,HMUZ,$.0001 per euro increments ($12.50/contract). ...,US dollars and cents,"None, this contract is cash settled.",125000.0
8,ED,CME Eurodollar,CME,3/15/1982,H1982,HMUZ,Nearest expiring contract month: One quarter o...,Quoted in IMM Three-Month LIBOR index points o...,"None, this contract is cash settled",2500.0
9,ES,CME S&P 500 Index E-Mini,CME,12/18/1997,Z1997,HMUZ,0.25 index points=$12.50,Index Points,"None, this contract is cash settled",50.0


In [54]:
symbols_list = symbol_df["Symbol"].tolist()[35:]
exchanges_list = symbol_df["Exchange"].tolist()[35:]

In [None]:

downloader.get_all_symbols_lifetime_data(symbols_list=symbols_list,
                                         exchanges_list=exchanges_list,
                                         output_folder=output_folder,
                                         duration_str='10 D',
                                         bar_size='1 min',
                                         what_to_Show='TRADES',
                                         useRTH=True,
                                         formatDate=1)
        