In [161]:
selector = '.styled-table-new'

columns_list = ['No.', 'Ticker', 'Company', 'Sector', 'Industry', 'Market Cap',
       'Dividend', 'Perf Week', 'Perf Month', 'Perf Quart', 'Perf Half',
       'Perf Year', 'Perf YTD', 'Beta', 'ATR', 'Volatility W', 'Volatility M',
       'SMA20', 'SMA50', 'SMA200', '52W High', '52W Low', 'RSI', 'Volume',
       'Price', 'Change', 'Single Category', 'Asset Type', 'AUM', 'Return% 1Y',
       'Return% 3Y', 'Return% 5Y',]

In [162]:
import requests
from bs4 import BeautifulSoup
import time
import random  # For a bit of randomness in the sleep time
import pandas as pd

def download_yahoo_finance_table(url, selector):
    """
    Downloads table data from a Yahoo Finance page with rate limiting.
    """
    try:
        # Add a User-Agent header to mimic a browser
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}  # Example User-Agent
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)

        soup = BeautifulSoup(response.content, 'html.parser')
        table_body = soup.select_one(selector)

        if table_body is None:
            print(f"Error: Table body not found using selector: {selector}")
            return None

        rows = table_body.find_all('tr')
        if not rows:
            print("Error: No rows found in the table.")
            return None

        # Extract headers from the first row (th elements)
        headers_list = [th.text.strip() for th in rows[0].find_all('th')]

        data = []
        for row in rows:
            cells = row.find_all('td')
            row_data = [cell.text.strip() for cell in cells]
            if row_data:  # Only append if the row has data
                data.append(row_data)

        if not data:
            print("Error: No data found in the table rows.")
            return None

        df = pd.DataFrame(data, columns=headers_list)
        return df

    except requests.exceptions.RequestException as e:
        print(f"Error during request: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

In [163]:
import random

url_mktcap ='https://finviz.com/screener.ashx?v=152&ft=4&o=-marketcap&c='
url_columns ='0,1,2,3,4,6,14,42,43,44,45,46,47,48,49,50,51,52,53,54,57,58,59,67,65,66,103,100,109,120,121,122'
url_rows = ['&r=1', '&r=21', '&r=41', '&r=61', '&r=81', '&r=101', '&r=121', '&r=141', '&r=161', '&r=181', '&r=201', '&r=221', '&r=241', '&r=261', '&r=281', '&r=301', '&r=321', '&r=341', '&r=361', '&r=381', '&r=401', '&r=421', '&r=441', '&r=461', '&r=481', '&r=501', '&r=521', '&r=541', '&r=561', '&r=581', '&r=601', '&r=621', '&r=641', '&r=661', '&r=681', '&r=701', '&r=721', '&r=741', '&r=761', '&r=781', '&r=801', '&r=821', '&r=841', '&r=861', '&r=881', '&r=901', '&r=921', '&r=941', '&r=961', '&r=981']
shuffled_url_rows = random.sample(url_rows, len(url_rows))  # Returns a new shuffled list

def add_random_column(columns_str):
    # Handle empty input to avoid conversion errors
    columns = []
    if columns_str:
        columns = [int(x) for x in columns_str.split(',')]
    
    # Generate set of available numbers (0-130 not in columns)
    available_nums = set(range(131)) - set(columns)
    
    if available_nums:
        # Determine how many numbers to add (1-3, but not exceeding available count)
        max_possible = len(available_nums)
        max_k = min(3, max_possible)
        k = random.randint(1, max_k)  # Randomly choose 1, 2, or 3 (or up to max possible)
        
        # Sample k unique numbers from available_nums
        new_columns = random.sample(list(available_nums), k)
        
        # Add new numbers to the list and sort
        columns.extend(new_columns)
        columns.sort()
        
        # Convert back to string
        return ','.join(map(str, columns))
    else:
        # Return original if no numbers available
        return columns_str
    
random_columns = add_random_column(url_columns)
print(f"len: {len(random_columns)}, new_columns_str: {random_columns}")    

len: 101, new_columns_str: 0,1,2,3,4,6,14,40,42,43,44,45,46,47,48,49,50,51,52,53,54,55,57,58,59,65,66,67,100,103,109,120,121,122


In [164]:
urls = []

for _rows in shuffled_url_rows:
    random_columns = add_random_column(url_columns)
    url = url_mktcap + random_columns + _rows
    urls.append(url)


print(f'len(urls): {len(urls)}')
print(urls[0:3])  # Print the length of the list of url)

len(urls): 50
['https://finviz.com/screener.ashx?v=152&ft=4&o=-marketcap&c=0,1,2,3,4,6,14,42,43,44,45,46,47,48,49,50,51,52,53,54,57,58,59,65,66,67,87,100,103,109,120,121,122&r=981', 'https://finviz.com/screener.ashx?v=152&ft=4&o=-marketcap&c=0,1,2,3,4,6,14,42,43,44,45,46,47,48,49,50,51,52,53,54,57,58,59,65,66,67,71,100,103,109,120,121,122&r=761', 'https://finviz.com/screener.ashx?v=152&ft=4&o=-marketcap&c=0,1,2,3,4,6,14,42,43,44,45,46,47,48,49,50,51,52,53,54,57,58,59,65,66,67,100,103,104,109,110,120,121,122,130&r=801']


In [None]:
import pandas as pd

urls_to_download = urls[0:3]  # Adjust the range as needed
total_urls_to_download = len(urls_to_download)

df = pd.DataFrame()  # Initialized an empty DataFrame
processed_count = 0


for url in urls_to_download :
    # Introduce a delay between requests (adjust as needed)
    delay_seconds = random.uniform(2, 4.5)  # Sleep between 2 and 5 seconds
    # print(f"Downloading {symbol}. Sleeping for {delay_seconds:.2f} seconds...")
    processed_count += 1
    print(f"Downloading {url}. Sleeping for {delay_seconds:.2f} seconds.  Processed {processed_count} / {total_urls_to_download} urls")
    time.sleep(delay_seconds)

    df_temp = download_yahoo_finance_table(url, selector)

    if df_temp is not None:
        df_temp_filtered = df_temp[columns_list]
        df = pd.concat([df, df_temp_filtered])  # Append to the combined DataFrame
    else:
        print(f"Failed to download data for {url}")

In [None]:
df.info()

In [None]:
url_rows = ['&r=1', '&r=21', '&r=41', '&r=61', '&r=81', '&r=101', '&r=121', '&r=141', '&r=161', '&r=181', '&r=201', '&r=221', '&r=241', '&r=261', '&r=281', '&r=301', '&r=321', '&r=341', '&r=361', '&r=381', '&r=401', '&r=421', '&r=441', '&r=461', '&r=481', '&r=501', '&r=521', '&r=541', '&r=561', '&r=581', '&r=601', '&r=621', '&r=641', '&r=661', '&r=681', '&r=701', '&r=721', '&r=741', '&r=761', '&r=781', '&r=801', '&r=821', '&r=841', '&r=861', '&r=881', '&r=901', '&r=921', '&r=941', '&r=961', '&r=981']
shuffled_url_rows = random.sample(url_rows, len(url_rows))  # Returns a new shuffled list
print(f"len: {len(shuffled_url_rows)}, url_rows: {shuffled_url_rows}")
print(f"len: {len(shuffled_url_rows)}, urls: {shuffled_url_rows}")

In [None]:
# Create a sequence from 0 to 981 with step size 20
sequence = list(range(1, 1001, 20))

# print(len(sequence), len(sequence)*20)
# print(sequence)

url_rows = [f"&r={num}" for num in sequence]
print(url_rows)

In [None]:
my_url = url_mktcap_head + random_columns_str + url_rows[0]
my_url

In [None]:
df = download_yahoo_finance_table(my_url, selector)
df.info()

In [None]:
url_base ='https://finviz.com/screener.ashx?v=152&ft=4&o=-marketcap&c=0,1,2,3,4,6,14,42,43,44,45,46,47,48,49,50,51,52,53,54,57,58,59,67,65,66,103,100,109,120,121,122&r='

In [None]:
import random

def get_random_sequence(sequence_list):
  # Make a copy of the original list to avoid modifying it
  working_sequence = sequence_list.copy()
  random_sequence = []
  
  # Continue until working_sequence is empty
  while working_sequence:
    # Select a random item from working_sequence
    random_item = random.choice(working_sequence)
    # Remove the selected item from working_sequence
    working_sequence.remove(random_item)
    # Add the selected item to random_sequence
    random_sequence.append(random_item)
  
  return random_sequence

# Create randomized sequence
randomized_sequence = get_random_sequence(sequence)
print(f"Randomized sequence length: {len(randomized_sequence)}")
print(f"Original sequence length: {len(sequence)}")
print(f'Randomized sequence: {randomized_sequence}')

In [None]:
# Create a list of URLs by appending each number from sequence[0:2] to url_base
urls_to_download = [url_base + str(num) for num in sequence[0:2]]
urls_to_download

In [None]:
urls_to_download_0 = 'https://finviz.com/screener.ashx?v=152&ft=4&o=-marketcap&c=0,1,2,3,4,6,14,42,43,44,45,46,47,48,49,50,51,52,53,54,57,58,59,67,65,66,103,100,109,120,121,122&r=0'
urls_to_download_1 = 'https://finviz.com/screener.ashx?v=152&ft=4&o=-marketcap&c=50,1,2,3,4,6,14,42,43,44,45,46,47,48,49,0,51,52,53,54,57,58,59,67,65,66,103,100,109,120,121,122,126&r=21'
# urls_to_download_2 = 'https://finviz.com/screener.ashx?v=152&ft=4&o=-marketcap&c=50,1,2,3,4,6,14,42,43,44,45,46,47,48,49,0,51,52,53,54,57,58,59,67,65,66,103,100,109,120,121,122&r=0,21'


urls_to_download = [urls_to_download_0, urls_to_download_1]


In [None]:
# df_0 = download_yahoo_finance_table(urls_to_download_0, selector)
df_1 = download_yahoo_finance_table(urls_to_download_1, selector)

In [None]:
display(df_0.info(), df_1.info())

In [None]:
import pandas as pd

# urls_to_download = urls[1]  # Adjust the range as needed
total_urls_to_download = len(urls_to_download)

df = pd.DataFrame()  # Initialized an empty DataFrame
processed_count = 0


for url in urls_to_download :
    # Introduce a delay between requests (adjust as needed)
    delay_seconds = random.uniform(2, 4.5)  # Sleep between 2 and 5 seconds
    # print(f"Downloading {symbol}. Sleeping for {delay_seconds:.2f} seconds...")
    processed_count += 1
    print(f"Downloading {url}. Sleeping for {delay_seconds:.2f} seconds.  Processed {processed_count} / {total_urls_to_download} urls")
    time.sleep(delay_seconds)

    df_temp = download_yahoo_finance_table(url, selector)

    if df_temp is not None:
        # df_temp.columns = col_names # Ensure the columns are what is expected

        # df_temp.set_index('Date', inplace=True) # Set Date as Index
        # # Create MultiIndex
        # df_temp.index = pd.MultiIndex.from_product([[symbol], df_temp.index], names=['Symbol', 'Date'])

        df = pd.concat([df, df_temp])  # Append to the combined DataFrame

    else:
        print(f"Failed to download data for {url}")

df

In [None]:
df_temp = download_yahoo_finance_table(urls_to_download, selector)
# df_temp.info()

In [None]:
df_temp = download_yahoo_finance_table(urls_to_download, selector)
df_temp.info()

In [None]:
# df_temp = download_yahoo_finance_table(urls_to_download, selector)
df_temp.info()

In [None]:
df_temp = download_yahoo_finance_table(urls_to_download, selector)
df_temp

In [None]:
df_temp = download_yahoo_finance_table('https://finviz.com/screener.ashx?v=152&ft=4&o=-marketcap&r=21', selector)
df_temp

In [None]:
import pandas as pd

# urls_to_download = urls[1]  # Adjust the range as needed
total_urls_to_download = len(urls_to_download)

df = pd.DataFrame()  # Initialized an empty DataFrame
processed_count = 0


for url in urls_to_download :
    # Introduce a delay between requests (adjust as needed)
    delay_seconds = random.uniform(2, 4.5)  # Sleep between 2 and 5 seconds
    # print(f"Downloading {symbol}. Sleeping for {delay_seconds:.2f} seconds...")
    processed_count += 1
    print(f"Downloading {url}. Sleeping for {delay_seconds:.2f} seconds.  Processed {processed_count} / {total_urls_to_download} urls")
    time.sleep(delay_seconds)

    df_temp = download_yahoo_finance_table(url, selector)

    if df_temp is not None:
        # df_temp.columns = col_names # Ensure the columns are what is expected

        # df_temp.set_index('Date', inplace=True) # Set Date as Index
        # # Create MultiIndex
        # df_temp.index = pd.MultiIndex.from_product([[symbol], df_temp.index], names=['Symbol', 'Date'])

        df = pd.concat([df, df_temp])  # Append to the combined DataFrame

    else:
        print(f"Failed to download data for {url}")

df

In [None]:
import pandas as pd

# urls_to_download = urls[1]  # Adjust the range as needed
total_urls_to_download = len(urls_to_download)

df = pd.DataFrame()  # Initialized an empty DataFrame
processed_count = 0


for url in urls_to_download :
    # Introduce a delay between requests (adjust as needed)
    delay_seconds = random.uniform(2, 4.5)  # Sleep between 2 and 5 seconds
    # print(f"Downloading {symbol}. Sleeping for {delay_seconds:.2f} seconds...")
    processed_count += 1
    print(f"Downloading {url}. Sleeping for {delay_seconds:.2f} seconds.  Processed {processed_count} / {total_urls_to_download} urls")
    time.sleep(delay_seconds)

    df_temp = download_yahoo_finance_table(url, selector)

    if df_temp is not None:
        # df_temp.columns = col_names # Ensure the columns are what is expected

        # df_temp.set_index('Date', inplace=True) # Set Date as Index
        # # Create MultiIndex
        # df_temp.index = pd.MultiIndex.from_product([[symbol], df_temp.index], names=['Symbol', 'Date'])

        df = pd.concat([df, df_temp])  # Append to the combined DataFrame

    else:
        print(f"Failed to download data for {url}")

In [None]:
df.info()

In [None]:
df

In [None]:
_df

**===== TURN ON POWERTOY AWAKE to KEEP CONNECTION ALIVE =====**

In [None]:
# symbols: The string (or sequence) to slice.
# start_index: Optional. The starting index of the slice. If None, defaults to 0.
# end_index: Optional. The ending index of the slice (exclusive). If None, defaults to the end of the string.
# step_value: Optional. The step value for the slice. If None, defaults to 1.

start_index = None

end_index = None
# end_index = 3

step_value = None
slice_obj = slice(start_index, end_index, step_value)  # Create a slice object

print(f'slice of symbols: symbols[{slice_obj}]')

In [None]:
dir_path = 'G:/My Drive/stocks/'  # Run in PC, Replace with your actual directory path
dir_path = '/content/drive/MyDrive/stocks/'  # Run in Colab

symbols_stocks_file = 'symbols_stocks.txt'
symbols_ETFs_file = 'symbols_ETFs.txt'
# symbols_stocks_ETFs_file = 'symbols_stocks_ETFs.txt'

In [None]:
# selector = "#nimbus-app > section > section > section > article > div.container > div.table-container.yf-1jecxey > table > tbody"
selector = "#nimbus-app > section > section > section > article > div.container > div.table-container.yf-1jecxey"
col_names = ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']

In [None]:
import os

def read_stock_symbols(dir_path, symbols_stocks_file, symbols_ETFs_file):
    """
    Reads stock and ETF symbols from text files in the specified directory and returns them in two separate lists.

    Args:
        dir_path (str): The directory path where the symbol files are located.
        symbols_stocks_file (str): The name of the file containing stock symbols.
        symbols_ETFs_file (str): The name of the file containing ETF symbols.

    Returns:
        tuple: A tuple containing two lists: (stock_symbols, etf_symbols).  Returns ([], [])
               if any error occurs during file reading.
    """

    stock_symbols = []
    etf_symbols = []

    try:
        # Read stock symbols
        with open(os.path.join(dir_path, symbols_stocks_file), 'r') as f:
            stock_symbols = [line.strip() for line in f]

        # Read ETF symbols
        with open(os.path.join(dir_path, symbols_ETFs_file), 'r') as f:
            etf_symbols = [line.strip() for line in f]

        return stock_symbols, etf_symbols

    except FileNotFoundError:
        print(f"Error: One or more files not found in directory: {dir_path}")
        return [], []
    except Exception as e:
        print(f"An error occurred: {e}")
        return [], []

In [None]:
import requests
from bs4 import BeautifulSoup
import time
import random  # For a bit of randomness in the sleep time

def download_yahoo_finance_table(url, selector):
    """
    Downloads table data from a Yahoo Finance page with rate limiting.
    """
    try:
        # Add a User-Agent header to mimic a browser
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}  # Example User-Agent
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)

        soup = BeautifulSoup(response.content, 'html.parser')
        table_body = soup.select_one(selector)

        if table_body is None:
            print(f"Error: Table body not found using selector: {selector}")
            return None

        rows = table_body.find_all('tr')
        if not rows:
            print("Error: No rows found in the table.")
            return None

        # Extract headers from the first row (th elements)
        headers_list = [th.text.strip() for th in rows[0].find_all('th')]

        data = []
        for row in rows:
            cells = row.find_all('td')
            row_data = [cell.text.strip() for cell in cells]
            if row_data:  # Only append if the row has data
                data.append(row_data)

        if not data:
            print("Error: No data found in the table rows.")
            return None

        df = pd.DataFrame(data, columns=headers_list)
        return df

    except requests.exceptions.RequestException as e:
        print(f"Error during request: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

In [None]:
import datetime
import pytz

def get_current_pst_time():
  """
  Returns the current time in Pacific Standard Time (PST).

  Returns:
    A string representing the current time in PST, formatted as
    "YYYY-MM-DD HH:MM:SS".
  """

  pst_timezone = pytz.timezone('America/Los_Angeles')  # Get the PST timezone
  pst_now = datetime.datetime.now(pst_timezone)  # Get the current time in PST

  return pst_now.strftime("%Y-%m-%d %H:%M:%S")  # Format the time as a string

In [None]:
import pandas as pd
import numpy as np

def convert_df_data_types(df):
    """
    Cleans and converts a Pandas DataFrame with a MultiIndex to the specified data types.

    Args:
        df: The input Pandas DataFrame.  Assumes a MultiIndex with stock ticker (str) and date (str).
            Assumes columns 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume' as objects.

    Returns:
        A Pandas DataFrame with the correct data types.
    """

    # Convert the second level of the MultiIndex to datetime
    try:
        df.index = pd.MultiIndex.from_tuples([(i[0], pd.to_datetime(i[1])) for i in df.index], names=df.index.names)
    except ValueError as e:
        print(f"Error converting MultiIndex to datetime: {e}")
        return df  # Or handle the error differently, e.g., raise it

    # Convert columns to appropriate data types
    columns_to_convert = ['Open', 'High', 'Low', 'Close', 'Adj Close']
    for col in columns_to_convert:
        try:
            # Remove commas *before* attempting conversion. CRITICAL.
            df[col] = df[col].str.replace(',', '', regex=False)  # Remove commas first
            df[col] = df[col].astype(float)
        except ValueError as e:
            print(f"Error converting column '{col}' to float: {e}")
            return df #skip this column and return the original df

    try:
        # Handle '-' values in 'Volume' BEFORE removing commas
        df['Volume'] = df['Volume'].replace('-', np.nan)
        df['Volume'] = df['Volume'].str.replace(',', '', regex=False).astype(float).astype('Int64') # Use Int64 to store NaN
    except ValueError as e:
        print(f"Error converting column 'Volume' to int64: {e}")
        return df

    return df

In [None]:
import pandas as pd

def adjust_prices(df):
    """
    Adjusts Open, High, Low, and Close prices using Adj Close to account for splits and dividends.

    Args:
        df: Pandas DataFrame with 'Open', 'High', 'Low', 'Close', and 'Adj Close' columns.
            Assumes MultiIndex with stock ticker and datetime.

    Returns:
        Pandas DataFrame with adjusted 'Open', 'High', and 'Low' prices.
    """

    # Calculate the adjustment ratio
    df['adjustment_ratio'] = df['Adj Close'] / df['Close']

    # Adjust Open, High, and Low prices
    df['Adj Open'] = df['Open'] * df['adjustment_ratio']
    df['Adj High'] = df['High'] * df['adjustment_ratio']
    df['Adj Low'] = df['Low'] * df['adjustment_ratio']


    # Optionally, drop the adjustment_ratio column if you don't need it
    df = df.drop('adjustment_ratio', axis=1)  # axis=1 to drop the column

    return df

# Example Usage (assuming 'df' is your cleaned DataFrame)
# df_adjusted = adjust_prices(df.copy())  # Create a copy
# print(df_adjusted.head())
# print(df_adjusted.info())

In [None]:
stocks, etfs = read_stock_symbols(dir_path, symbols_stocks_file, symbols_ETFs_file)

if stocks or etfs:  # Check if either list has data, indicating successful read
    print(f"Stock Symbols (len = {len(stocks)}):")
    print(stocks)
    print(f"ETF Symbols (len = {len(etfs)}):")
    print(etfs)
else:
    print("Failed to read stock symbols. Check the directory and file names.")

In [None]:
symbols = stocks + etfs
print(f"symbols (len = {len(symbols)}):")
print(symbols)

In [None]:
symbols_to_download = symbols[slice_obj]

# symbols_to_download = slice_string(symbols, symbol_start, symbol_end, symbol_step)  # Adjust the slice as needed
total_symbols_to_download = len(symbols_to_download)
processed_count = 0

print(f'symbols_to_download: symbols[{slice_obj}]')
print(f'total_symbols_to_download: {total_symbols_to_download}')
print(f'processed_count: {processed_count}')


In [None]:
current_pst_time = get_current_pst_time()
print(f"Start OHLCV download at PST time: {current_pst_time}")

In [None]:
import pandas as pd

df = pd.DataFrame()  # Initialized an empty DataFrame

for symbol in symbols_to_download:
    url = f"https://finance.yahoo.com/quote/{symbol}/history/"
    # Introduce a delay between requests (adjust as needed)
    delay_seconds = random.uniform(2, 4.5)  # Sleep between 2 and 5 seconds
    # print(f"Downloading {symbol}. Sleeping for {delay_seconds:.2f} seconds...")
    processed_count += 1
    print(f"Downloading {symbol}. Sleeping for {delay_seconds:.2f} seconds.  Processed {processed_count} / {total_symbols_to_download} symbols.")
    time.sleep(delay_seconds)

    df_temp = download_yahoo_finance_table(url, selector)

    if df_temp is not None:
        df_temp.columns = col_names # Ensure the columns are what is expected

        df_temp.set_index('Date', inplace=True) # Set Date as Index
        # Create MultiIndex
        df_temp.index = pd.MultiIndex.from_product([[symbol], df_temp.index], names=['Symbol', 'Date'])

        df = pd.concat([df, df_temp])  # Append to the combined DataFrame

    else:
        print(f"Failed to download data for {symbol}")

In [None]:
current_pst_time = get_current_pst_time()
print(f"End OHLCV download at PST time: {current_pst_time}")

In [None]:
df

In [None]:
df.info()

In [None]:
import datetime
import pytz

pst = pytz.timezone('America/Los_Angeles')  # or 'US/Pacific'
current_date_pst = datetime.datetime.now(pst).strftime('%Y-%m-%d')

df_OHLCV_filename = f"df_OHLCV_{current_date_pst}.pkl"

print(f"df_OHLCV_filename: {df_OHLCV_filename}")

In [None]:
# Drop rows with any NaN values
df_dropna = df.dropna()
df_dropna.info()

In [None]:
df_converted = convert_df_data_types(df_dropna.copy())  # Create a copy to avoid modifying the original
df_converted.info()

In [None]:
df_adjusted = adjust_prices(df_converted.copy())  # Create a copy
df_adjusted.info()

In [None]:
# Save the DataFrame to a pickle file
df_adjusted.to_pickle(df_OHLCV_filename)  # Saves to the Colab's runtime environment

print(f"Dropped NaN, converted data types, adjusted OHLC and saved DataFrame saved as {df_OHLCV_filename}")

In [None]:
from google.colab import files

# Download the pickle file
files.download(df_OHLCV_filename)
print(f"Downloded {df_OHLCV_filename}")

In [None]:
df_adjusted