### Ticker Generation Workflow

1.  Run colab finviz to generate `ticker_yyyy-mm-dd_stocks_etfs`.
2.  Combine tickers in the download directory's ticker files into a set of unique tickers.
3.  Save the unique tickers as a CSV file for `Yloader` to download the OHLCV data.

### Run colab finviz to generate ticker_yyyy-mm-dd_stocks_etfs
### Combine tickers in download directory's ticker files into a set of unique tickers
### Save the unique tickers as a csv for Yloader to download the OHLCV data for these tickers

In [2]:
from pathlib import Path
import os # Used for os.path.expanduser to robustly find the home directory

def get_recent_downloads_csv_files(
    prefix: str = 'ticker',
    count: int = 10,
    directory_name: str = "Downloads"
) -> list[str]:
    """
    Reads the most recent CSV files starting with a specific prefix
    from the user's specified directory (defaulting to Downloads).

    Args:
        prefix (str): The prefix the CSV filenames must start with (e.g., 'ticker').
        count (int): The maximum number of recent filenames to return.
        directory_name (str): The name of the subdirectory in the user's home
                              folder to search (e.g., "Downloads", "Documents").

    Returns:
        list[str]: A list of the most recent filenames, sorted from
                   most recent to oldest. Returns an empty list if no
                   matching files are found or the directory doesn't exist.
    """
    try:
        # 1. Get the user's home directory
        home_dir = Path.home() # Preferred modern way
        # Fallback for some environments if Path.home() is problematic:
        # home_dir = Path(os.path.expanduser('~'))

        # 2. Construct the path to the Downloads directory
        target_dir = home_dir / directory_name

        if not target_dir.is_dir():
            print(f"Error: Directory '{target_dir}' not found.")
            return []

        # 3. Find all files matching the pattern (prefix*.csv)
        #    We use glob for pattern matching.
        #    We also ensure they are files, not directories named similarly.
        candidate_files = [
            f for f in target_dir.glob(f"{prefix}*.csv")
            if f.is_file()
        ]

        if not candidate_files:
            # print(f"No CSV files starting with '{prefix}' found in '{target_dir}'.") # Optional: more verbose
            return []

        # 4. Sort these files by modification time (most recent first)
        #    Path.stat().st_mtime gives the timestamp of the last modification.
        sorted_files = sorted(
            candidate_files,
            key=lambda f: f.stat().st_mtime,
            reverse=True  # True for most recent first
        )

        # 5. Get the top 'count' files and extract their names
        recent_filenames = [file.name for file in sorted_files[:count]]

        return recent_filenames

    except Exception as e:
        print(f"An error occurred: {e}")
        return []

if __name__ == "__main__":
    # Get the most recent 10 CSV files starting with 'ticker' from Downloads
    recent_ticker_files = get_recent_downloads_csv_files(prefix='ticker', count=10)

    if recent_ticker_files:
        print("Most recent 'ticker' CSV files found in Downloads:")
        for i, filename in enumerate(recent_ticker_files):
            print(f"{i+1}. {filename}")
    else:
        print("No 'ticker' CSV files found in your Downloads directory, or an error occurred.")

    # print("\n--- Example: Top 3 CSVs starting with 'data' from Documents ---")
    # # Example: Get the most recent 3 CSV files starting with 'data' from Documents
    # recent_data_files = get_recent_downloads_csv_files(prefix='data', count=3, directory_name="Documents")
    # if recent_data_files:
    #     print("Most recent 'data' CSV files found in Documents:")
    #     for i, filename in enumerate(recent_data_files):
    #         print(f"{i+1}. {filename}")
    # else:
    #     print("No 'data' CSV files found in your Documents directory, or an error occurred.")

Most recent 'ticker' CSV files found in Downloads:
1. ticker_2025-06-13_stocks_etfs.csv
2. ticker_2025-06-12_stocks_etfs.csv
3. ticker_2025-06-11_stocks_etfs.csv
4. ticker_2025-06-10_stocks_etfs.csv
5. ticker_2025-06-09_stocks_etfs.csv
6. ticker_2025-06-06_stocks_etfs.csv
7. ticker_2025-06-05_stocks_etfs.csv
8. ticker_2025-06-04_stocks_etfs.csv
9. ticker_2025-06-03_stocks_etfs.csv
10. ticker_2025-06-02_stocks_etfs.csv


In [3]:
data_dir = 'C:/Users/ping/Downloads/'
yloader_ticker_dir = r'C:\Users\ping\Desktop\yloader\tickers'

print(f'Data directory: {data_dir}')
print(f"Ticker csv files: {recent_ticker_files}")


Data directory: C:/Users/ping/Downloads/
Ticker csv files: ['ticker_2025-06-13_stocks_etfs.csv', 'ticker_2025-06-12_stocks_etfs.csv', 'ticker_2025-06-11_stocks_etfs.csv', 'ticker_2025-06-10_stocks_etfs.csv', 'ticker_2025-06-09_stocks_etfs.csv', 'ticker_2025-06-06_stocks_etfs.csv', 'ticker_2025-06-05_stocks_etfs.csv', 'ticker_2025-06-04_stocks_etfs.csv', 'ticker_2025-06-03_stocks_etfs.csv', 'ticker_2025-06-02_stocks_etfs.csv']


In [4]:
import os
import pandas as pd

def combine_tickers_from_files(recent_ticker_files, data_dir):
    """
    Reads ticker symbols from multiple CSV files, combines them,
    and returns a sorted unique list of tickers.

    Args:
        recent_ticker_files (list): A list of filenames (e.g., ['file1.csv', 'file2.csv'])
                            located in data_dir. Each file should be a CSV
                            without a header and contain a single column of tickers.
        data_dir (str): The directory path where the ticker files are located.

    Returns:
        list: A sorted list of unique ticker symbols.
    """
    all_tickers = set()

    for file_name in recent_ticker_files:
        file_path = os.path.join(data_dir, file_name)
        if not file_path.endswith('.csv'):
            print(f"Warning: File '{file_name}' is not a CSV file and will be skipped.")
            continue
        try:
            # Read the CSV file, assuming no header and one column
            # Use pandas for robust CSV parsing, though a simple file read would also work
            df = pd.read_csv(file_path, header=None, names=['ticker'])
            if not df.empty and 'ticker' in df.columns:
                # Convert all tickers to string and strip whitespace
                tickers_from_file = df['ticker'].astype(str).str.strip().tolist()
                all_tickers.update(tickers_from_file)
            else:
                print(f"Warning: File '{file_name}' is empty or not formatted as expected.")
        except FileNotFoundError:
            print(f"Error: File '{file_name}' not found in directory '{data_dir}'.")
        except pd.errors.EmptyDataError:
            print(f"Warning: File '{file_name}' is empty and will be skipped.")
        except Exception as e:
            print(f"An error occurred while processing file '{file_name}': {e}")

    sorted_unique_tickers = sorted(list(all_tickers))
    return sorted_unique_tickers


In [5]:
ticker_list = combine_tickers_from_files(recent_ticker_files=recent_ticker_files, data_dir=data_dir)

print(f"\n{len(ticker_list)} Combined and Sorted Unique Tickers:")
print(f'ticker_list:\n{ticker_list}')



1588 Combined and Sorted Unique Tickers:
ticker_list:
['A', 'AA', 'AAL', 'AAON', 'AAPL', 'ABBV', 'ABEV', 'ABNB', 'ABT', 'ACGL', 'ACHR', 'ACI', 'ACM', 'ACN', 'ACWI', 'ACWV', 'ACWX', 'ADBE', 'ADC', 'ADI', 'ADM', 'ADP', 'ADSK', 'ADT', 'AEE', 'AEG', 'AEM', 'AEP', 'AER', 'AES', 'AFG', 'AFL', 'AFRM', 'AGCO', 'AGG', 'AGI', 'AGNC', 'AIG', 'AIQ', 'AIRR', 'AIT', 'AIZ', 'AJG', 'AKAM', 'AL', 'ALAB', 'ALB', 'ALC', 'ALGN', 'ALK', 'ALL', 'ALLE', 'ALLY', 'ALNY', 'ALSN', 'ALV', 'AM', 'AMAT', 'AMCR', 'AMD', 'AME', 'AMGN', 'AMH', 'AMLP', 'AMP', 'AMT', 'AMX', 'AMZN', 'AN', 'ANET', 'ANSS', 'AON', 'AOS', 'APA', 'APD', 'APG', 'APH', 'APO', 'APP', 'APPF', 'APTV', 'AR', 'ARCC', 'ARE', 'ARES', 'ARGX', 'ARKB', 'ARKK', 'ARM', 'ARMK', 'ARW', 'AS', 'ASML', 'ASND', 'ASR', 'ASTS', 'ASX', 'ATI', 'ATO', 'ATR', 'AU', 'AUR', 'AVAV', 'AVB', 'AVDE', 'AVDV', 'AVEM', 'AVGO', 'AVLV', 'AVTR', 'AVUS', 'AVUV', 'AVY', 'AWI', 'AWK', 'AXON', 'AXP', 'AXS', 'AXTA', 'AYI', 'AZEK', 'AZN', 'AZO', 'B', 'BA', 'BABA', 'BAC', 'BAH', 'BALL'

In [5]:
import os
import csv

def save_tickers_to_csv(ticker_list, directory_path):
    """
    Saves a list of tickers to a CSV file named 'tickers.csv' in the specified directory.
    Each ticker will be on a new row.

    Args:
        ticker_list (list): A list of ticker symbols (strings).
        directory_path (str): The path to the directory where 'tickers.csv' will be saved.
    """
    # Define the output filename
    filename = "tickers.csv"
    # Construct the full file path
    file_path = os.path.join(directory_path, filename)

    try:
        # Create the directory if it doesn't exist
        os.makedirs(directory_path, exist_ok=True)

        # Write the tickers to the CSV file
        with open(file_path, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            for ticker in ticker_list:
                writer.writerow([ticker]) # writerow expects an iterable (e.g., a list)

        print(f"Successfully saved tickers to: {file_path}")

    except IOError as e:
        print(f"Error: Could not write to file {file_path}. IOError: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


In [6]:
print(f"Attempting to save tickers to: {yloader_ticker_dir}")
save_tickers_to_csv(ticker_list=ticker_list, directory_path=yloader_ticker_dir)

print(f"number of tickers saved: {len(ticker_list)}")

Attempting to save tickers to: C:\Users\ping\Desktop\yloader\tickers
Successfully saved tickers to: C:\Users\ping\Desktop\yloader\tickers\tickers.csv
number of tickers saved: 1588
