# Price
* `/v8/finance/chart/AAPL?symbol=AAPL&period1=0&period2=9999999999&interval=3mo`  

#### Intervals:

* `&interval=3mo` 3 months, going back until initial trading date.
* `&interval=1d` 1 day, going back until initial trading date.
* `&interval=5m` 5 minuets, going back 80(ish) days.
* `&interval=1m` 1 minuet, going back 4-5 days.

How far back you can go with each interval is a little confusing and seems inconsistent. My assumption is that internally yahoo is counting in trading days and my naive approach was not accounting for holidays. Although that's a guess and YMMV.

`period1=`: unix timestamp representation of the date you wish to **start** at. Values below the initial trading date will be rounded up to the initial trading date.

`period2=`: unix timestamp representation of the date you wish to **end** at. Values greater than the last trading date will be rounded down to the most recent timestamp available.

**Note:** *If you query with a `period1=` (start date) that is too far in the past for the interval you've chosen, yahoo will return prices in the `3mo` interval regardless of what interval you requested.*


In [1]:
# import libraries
import os
import difflib
import itertools
import pandas as pd
from multiprocessing.dummy import Pool
from datetime import datetime
import time
import urllib.request
import json
import http.client as httplib

In [2]:
# Let's make a code snippet which can tell if we have working internet connection or not.

def check_internet():
    conn = httplib.HTTPConnection("www.google.com", timeout=5)
    try:
        conn.request("HEAD", "/")
        conn.close()
        return True
    except:
        conn.close()
        return False

In [3]:
# Now write down the function which will `get_stock_price` for given `query_url`

def get_historic_price(query_url, json_path, csv_path):
    while not check_internet():
        print("Could not connect, trying again in 5 seconds...")
        time.sleep(5)
    
    stock_id = query_url.split("&period")[0].split("symbol=")[1]
    
    if os.path.exists(csv_path + stock_id + '.csv') and os.stat(csv_path + stock_id + '.csv').st_size != 0:
        print("<<<  Historical data of " + stock_id + " already exists, Updating data...")

    try:
        with urllib.request.urlopen(query_url) as url:
            parsed = json.loads(url.read().decode())
    except:
        print("|||  Historical data of " + stock_id + " doesn't exist")
        return
    else:
        if not os.path.exists(json_path):
            os.makedirs(json_path)
        if not os.path.exists(csv_path):
            os.makedirs(csv_path)
            
        if os.path.exists(json_path + stock_id + '.json'):
            os.remove(json_path + stock_id + '.json')
        with open(json_path + stock_id + '.json', 'w') as outfile:
            json.dump(parsed, outfile, indent=4)

        try:
            Date = []
            for i in parsed['chart']['result'][0]['timestamp']:
                Date.append(datetime.utcfromtimestamp(int(i)).strftime('%d-%m-%Y'))

            Low = parsed['chart']['result'][0]['indicators']['quote'][0]['low']
            Open = parsed['chart']['result'][0]['indicators']['quote'][0]['open']
            Volume = parsed['chart']['result'][0]['indicators']['quote'][0]['volume']
            High = parsed['chart']['result'][0]['indicators']['quote'][0]['high']
            Close = parsed['chart']['result'][0]['indicators']['quote'][0]['close']
            Adjusted_Close = parsed['chart']['result'][0]['indicators']['adjclose'][0]['adjclose']

            df = pd.DataFrame(list(zip(Date, Low, Open, Volume, High, Close, Adjusted_Close)),
                              columns=['Date', 'Low', 'Open', 'Volume', 'High', 'Close', 'Adjusted Close'])

            if os.path.exists(csv_path + stock_id + '.csv'):
                os.remove(csv_path + stock_id + '.csv')
            df.to_csv(csv_path + stock_id + '.csv', sep=',', index=None)
            print(">>>  Historical data of " + stock_id + " saved")
            return
        except:
            print(">>>  Historical data of " + stock_id + " exists but has no trading data")


In [4]:
# get the stock datas with multithreading.

def get_stock_data(country_names=None, desired_company_list=None):
    if country_names is None:
        
        # If no country names are provided, retrieve data for all countries
        country_names = []

    if desired_company_list is None:
        desired_company_list = []

    # Load ticker data
    ticker_file_path = "Assets" + os.sep + "Yahoo Ticker Symbols - September 2017.xlsx"
    temp_df = pd.read_excel(ticker_file_path)
    temp_df = temp_df.drop(temp_df.columns[[5, 6, 7]], axis=1)
    headers = temp_df.iloc[2]
    df = pd.DataFrame(temp_df.values[3:], columns=headers)

    # Filter ticker data based on country names
    if country_names:
        new_df = df[df["Country"].str.lower().isin([name.lower() for name in country_names])]
    else:
        new_df = df  # Retrieve data for all countries

    # Get ticker list for desired companies
    ticker_list = []
    for company in desired_company_list:
        try:
            exact_company_name = (difflib.get_close_matches(company, new_df['Name'])[0])
            ticker_for_the_company = new_df.loc[new_df['Name'] == exact_company_name, 'Ticker'].iloc[0]
            ticker_list.append(ticker_for_the_company)
        except:
            print("Company name " + company + " not found.")

    # Create query URLs for stock tickers
    query_urls = []
    for ticker in ticker_list:
        query_urls.append(
            "https://query1.finance.yahoo.com/v8/finance/chart/" + ticker + "?symbol=" + ticker + "&period1=0&period2=9999999999&interval=1d&includePrePost=true&events=div%2Csplit")

    # Set paths for saving JSON and CSV files
    json_path = os.path.join("historic_data", "json") + os.sep
    csv_path = os.path.join("historic_data", "csv") + os.sep

    # Get stock data using multithreading
    with Pool(processes=len(query_urls)) as pool:
        pool.starmap(get_historic_price, zip(query_urls, itertools.repeat(json_path), itertools.repeat(csv_path)))
    print("All downloads completed !")

# Example usage:
country_names_input = input("Enter country names separated by comma (leave blank for all countries): ")
desired_company_list_input = input("Enter desired company list separated by comma: ")

country_names = country_names_input.split(',') if country_names_input else None
desired_company_list = desired_company_list_input.split(',')

get_stock_data(country_names, desired_company_list)

Enter country names separated by comma (leave blank for all countries): India
Enter desired company list separated by comma: State Bank of India, Tata Motors
<<<  Historical data of TATAMOTORS.NS already exists, Updating data...<<<  Historical data of SBIN.NS already exists, Updating data...

>>>  Historical data of SBIN.NS saved
>>>  Historical data of TATAMOTORS.NS saved
All downloads completed !
