#### Combine tickers in download directory's ticker files into a set of unique tickers
#### Save the unique tickers as a csv for Yloader to download the OHLCV data for these tickers

In [1]:
import sys
from pathlib import Path
import pandas as pd
import os
from IPython.display import display, Markdown  # Assuming you use these for display


# Set pandas display options to show more columns and rows
pd.set_option('display.max_columns', None)  # Show all columns
# pd.set_option('display.max_rows', 10)       # Limit to 10 rows for readability
pd.set_option('display.width', 1000) 


# Notebook cell
%load_ext autoreload
%autoreload 2

# Get root directory (assuming notebook is in root/notebooks/)
NOTEBOOK_DIR = Path.cwd()
ROOT_DIR = NOTEBOOK_DIR.parent if NOTEBOOK_DIR.name == 'notebooks' else NOTEBOOK_DIR

# Add src directory to Python path
sys.path.append(str(ROOT_DIR / 'src'))

# Verify path
print(f"Python will look in these locations:\n{sys.path}")


# --- Execute the processor ---
import utils


_, _, ticker_csv_files = utils.main_processor(
    data_dir='.\\',  # search project ..\data
    # data_dir='C:/Users/ping/Desktop/yloader',  # search project ..\data
    downloads_dir=None,  # None searchs Downloads dir, '' omits search1
    downloads_limit=10,  # search the first 10 files
    clean_name_override=None,  # override filename
    start_file_pattern='ticker_2025', # search for files starting with 'df_'
    contains_pattern='.csv',  # search for files containing 'df_'
)


Python will look in these locations:
['C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\python311.zip', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\DLLs', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9\\Lib', 'C:\\Users\\ping\\.pyenv\\pyenv-win\\versions\\3.11.9', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv', '', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\win32', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\win32\\lib', 'c:\\Users\\ping\\Files_win10\\python\\py311\\.venv\\Lib\\site-packages\\Pythonwin', 'c:\\Users\\ping\\Files_win10\\python\\py311\\stocks\\src']


<span style='color:#00ffff;font-weight:500'>[Downloads] Scanned latest 10 files • Found 3 'ticker_2025' matches</span>

**Available 'starting with 'ticker_2025' and containing '.csv'' files:**

- (1) `[DOWNLOADS]` `ticker_2025-05-09_stocks_etfs.csv` <span style='color:#00ffff'>(0.01 MB, 2025-05-09 20:12)</span>

- (2) `[DOWNLOADS]` `ticker_2025-05-08_stocks_etfs.csv` <span style='color:#00ffff'>(0.01 MB, 2025-05-08 20:16)</span>

- (3) `[DOWNLOADS]` `ticker_2025-05-07_stocks_etfs.csv` <span style='color:#00ffff'>(0.01 MB, 2025-05-07 20:30)</span>


Input a number to select file (1-3)



    **Selected paths:**
    - Source: `C:\Users\ping\Downloads\ticker_2025-05-09_stocks_etfs.csv`
    - Destination: `c:\Users\ping\Files_win10\python\py311\stocks\notebooks\ticker_2025-05-09_stocks_etfs_clean.csv`
    

In [2]:
data_dir = 'C:/Users/ping/Downloads/'
yloader_ticker_dir = r'C:\Users\ping\Desktop\yloader\tickers'

print(f'Date directory: {data_dir}')
print(f"Ticker csv files: {ticker_csv_files}")


Date directory: C:/Users/ping/Downloads/
Ticker csv files: ['ticker_2025-05-09_stocks_etfs.csv', 'ticker_2025-05-08_stocks_etfs.csv', 'ticker_2025-05-07_stocks_etfs.csv']


In [3]:
import os
import pandas as pd

def combine_tickers_from_files(ticker_csv_files, data_dir):
    """
    Reads ticker symbols from multiple CSV files, combines them,
    and returns a sorted unique list of tickers.

    Args:
        ticker_csv_files (list): A list of filenames (e.g., ['file1.csv', 'file2.csv'])
                            located in data_dir. Each file should be a CSV
                            without a header and contain a single column of tickers.
        data_dir (str): The directory path where the ticker files are located.

    Returns:
        list: A sorted list of unique ticker symbols.
    """
    all_tickers = set()

    for file_name in ticker_csv_files:
        file_path = os.path.join(data_dir, file_name)
        if not file_path.endswith('.csv'):
            print(f"Warning: File '{file_name}' is not a CSV file and will be skipped.")
            continue
        try:
            # Read the CSV file, assuming no header and one column
            # Use pandas for robust CSV parsing, though a simple file read would also work
            df = pd.read_csv(file_path, header=None, names=['ticker'])
            if not df.empty and 'ticker' in df.columns:
                # Convert all tickers to string and strip whitespace
                tickers_from_file = df['ticker'].astype(str).str.strip().tolist()
                all_tickers.update(tickers_from_file)
            else:
                print(f"Warning: File '{file_name}' is empty or not formatted as expected.")
        except FileNotFoundError:
            print(f"Error: File '{file_name}' not found in directory '{data_dir}'.")
        except pd.errors.EmptyDataError:
            print(f"Warning: File '{file_name}' is empty and will be skipped.")
        except Exception as e:
            print(f"An error occurred while processing file '{file_name}': {e}")

    sorted_unique_tickers = sorted(list(all_tickers))
    return sorted_unique_tickers


In [4]:
ticker_list = combine_tickers_from_files(ticker_csv_files=ticker_csv_files, data_dir=data_dir)

print(f"\n{len(ticker_list)} Combined and Sorted Unique Tickers:")
print(f'ticker_list:\n{ticker_list}')



1576 Combined and Sorted Unique Tickers:
ticker_list:
['A', 'AA', 'AAL', 'AAON', 'AAPL', 'ABBV', 'ABEV', 'ABNB', 'ABT', 'ACGL', 'ACI', 'ACIW', 'ACM', 'ACN', 'ACWI', 'ACWV', 'ACWX', 'ADBE', 'ADC', 'ADI', 'ADM', 'ADMA', 'ADP', 'ADSK', 'ADT', 'AEE', 'AEG', 'AEM', 'AEP', 'AER', 'AES', 'AFG', 'AFL', 'AFRM', 'AGCO', 'AGG', 'AGI', 'AGNC', 'AIG', 'AIRR', 'AIT', 'AIZ', 'AJG', 'AKAM', 'AL', 'ALAB', 'ALB', 'ALC', 'ALGN', 'ALK', 'ALL', 'ALLE', 'ALLY', 'ALNY', 'ALSN', 'ALV', 'AM', 'AMAT', 'AMCR', 'AMD', 'AME', 'AMGN', 'AMH', 'AMLP', 'AMP', 'AMT', 'AMX', 'AMZN', 'AN', 'ANET', 'ANSS', 'AON', 'AOS', 'APA', 'APD', 'APG', 'APH', 'APO', 'APP', 'APPF', 'APTV', 'AR', 'ARCC', 'ARE', 'ARES', 'ARGX', 'ARKB', 'ARKK', 'ARM', 'ARMK', 'ARW', 'AS', 'ASML', 'ASND', 'ASR', 'ASTS', 'ASX', 'ATI', 'ATO', 'ATR', 'AU', 'AUR', 'AVAV', 'AVB', 'AVDE', 'AVDV', 'AVEM', 'AVGO', 'AVLV', 'AVTR', 'AVUS', 'AVUV', 'AVY', 'AWI', 'AWK', 'AXON', 'AXP', 'AXS', 'AXTA', 'AYI', 'AZEK', 'AZN', 'AZO', 'B', 'BA', 'BABA', 'BAC', 'BAH', 'BALL

In [5]:
import os
import csv

def save_tickers_to_csv(ticker_list, directory_path):
    """
    Saves a list of tickers to a CSV file named 'tickers.csv' in the specified directory.
    Each ticker will be on a new row.

    Args:
        ticker_list (list): A list of ticker symbols (strings).
        directory_path (str): The path to the directory where 'tickers.csv' will be saved.
    """
    # Define the output filename
    filename = "tickers.csv"
    # Construct the full file path
    file_path = os.path.join(directory_path, filename)

    try:
        # Create the directory if it doesn't exist
        os.makedirs(directory_path, exist_ok=True)

        # Write the tickers to the CSV file
        with open(file_path, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            for ticker in ticker_list:
                writer.writerow([ticker]) # writerow expects an iterable (e.g., a list)

        print(f"Successfully saved tickers to: {file_path}")

    except IOError as e:
        print(f"Error: Could not write to file {file_path}. IOError: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


In [7]:
print(f"Attempting to save tickers to: {yloader_ticker_dir}")
save_tickers_to_csv(ticker_list=ticker_list, directory_path=yloader_ticker_dir)

print(f"number of tickers saved: {len(ticker_list)}")

Attempting to save tickers to: C:\Users\ping\Desktop\yloader\tickers
Successfully saved tickers to: C:\Users\ping\Desktop\yloader\tickers\tickers.csv
number of tickers saved: 1576
