In [1]:
# Import libraries/load environments

# Import required libraries
import os
import requests
import pandas as pd
from dotenv import load_dotenv

# Load environment variables
env_path = '/Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/stockapi.env'  # Replace with your actual path
load_dotenv(env_path)
api_key = os.getenv('ALPHA_VANTAGE_API_KEY')


In [2]:
# Define Paths and Parameters

# Define root paths for sub-sectors
root_path = '/Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24'

# Sub-sector paths (replace with actual paths later)
sub_sector_paths = {
    "Semiconductors": f"{root_path}/SEMICONDUCTORS",
    "Software": f"{root_path}/SOFTWARE",
    "IT_Services": f"{root_path}/IT_SERVICES",
    "Technology_Hardware": f"{root_path}/TECHNOLOGY_HARDWARE",
    "Communications_Equipment": f"{root_path}/COMMUNICATIONS_EQUIPMENT",
    "Internet_Services": f"{root_path}/INTERNET_SERVICES",
    "Electronic_Equipment": f"{root_path}/ELECTRONIC_EQUIPMENT",
    "Data_Processing": f"{root_path}/DATA_PROCESSING",
    "Consumer_Electronics": f"{root_path}/CONSUMER_ELECTRONICS"
}

# Define years
years = ["2020", "2021", "2022", "2023", "2024"]

# Define sub-sector symbols (these are examples, adjust as needed)
sub_sector_symbols = {
    "Semiconductors": ["NVDA", "INTC", "AMD", "AVGO", "QCOM"],
    "Software": ["MSFT", "ORCL", "ADBE", "CRM", "INTU"],
    "IT_Services": ["IBM", "ACN", "CTSH", "INFY"],
    "Technology_Hardware": ["AAPL", "HPQ", "DELL", "WDC"],
    "Communications_Equipment": ["CSCO", "JNPR", "ANET"],
    "Internet_Services": ["GOOGL", "AMZN", "AKAM"],
    "Electronic_Equipment": ["TEL", "KEYS", "GLW"],
    "Data_Processing": ["PYPL", "V", "MA"],
    "Consumer_Electronics": ["AAPL", "SNE", "GRMN"]
}


In [3]:
# Define the Data Fetching and Saving Function

# Function to fetch data from Alpha Vantage and save as CSV
def fetch_and_save_data(symbol, sub_sector, year, api_key, output_dir):
    base_url = "https://www.alphavantage.co/query?"
    function = "TIME_SERIES_DAILY_ADJUSTED"
    
    params = {
        "function": function,
        "symbol": symbol,
        "outputsize": "full",
        "apikey": api_key,
        "datatype": "json"
    }
    
    response = requests.get(base_url, params=params)
    data = response.json()

    if "Time Series (Daily)" in data:
        time_series = data["Time Series (Daily)"]
        df = pd.DataFrame.from_dict(time_series, orient='index')
        df.index = pd.to_datetime(df.index)
        df.columns = [
            "Open", "High", "Low", "Close", "Adjusted Close", "Volume", 
            "Dividend Amount", "Split Coefficient"
        ]
        df = df.sort_index()

        # Filter data for the selected year
        df = df.loc[(df.index >= f"{year}-01-01") & (df.index <= f"{year}-12-31")]

        # Add an "Index" column
        df['Index'] = sub_sector

        # Save to CSV
        output_path = os.path.join(output_dir, f"{symbol}_{year}.csv")
        df.to_csv(output_path)
        print(f"Data for {symbol} ({sub_sector}, {year}) saved to {output_path}")
    else:
        print(f"Error fetching data for {symbol}: {data.get('Error Message', 'Unknown error')}")


In [4]:
# Flexibility: The script is designed to be easily adaptable. 

# You can adjust the sub_sector_symbols, sub_sector_paths, and years as needed.

In [5]:
# Iterate Over Sub-Sectors and Years

# Iterate over sub-sectors
for sub_sector, symbols in sub_sector_symbols.items():
    # Iterate over years
    for year in years:
        # Set the output directory for the current sub-sector and year
        output_dir = f"{sub_sector_paths[sub_sector]}/{year}"
        os.makedirs(output_dir, exist_ok=True)  # Create the directory if it doesn't exist

        # Fetch and save data for each symbol in the current sub-sector and year
        for symbol in symbols:
            fetch_and_save_data(symbol, sub_sector, year, api_key, output_dir)



Data for NVDA (Semiconductors, 2020) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/SEMICONDUCTORS/2020/NVDA_2020.csv
Data for INTC (Semiconductors, 2020) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/SEMICONDUCTORS/2020/INTC_2020.csv
Data for AMD (Semiconductors, 2020) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/SEMICONDUCTORS/2020/AMD_2020.csv
Data for AVGO (Semiconductors, 2020) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/SEMICONDUCTORS/2020/AVGO_2020.csv
Data for QCOM (Semiconductors, 2020) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub R

Data for CRM (Software, 2022) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/SOFTWARE/2022/CRM_2022.csv
Data for INTU (Software, 2022) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/SOFTWARE/2022/INTU_2022.csv
Data for MSFT (Software, 2023) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/SOFTWARE/2023/MSFT_2023.csv
Data for ORCL (Software, 2023) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/SOFTWARE/2023/ORCL_2023.csv
Data for ADBE (Software, 2023) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_2

Data for DELL (Technology_Hardware, 2021) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/TECHNOLOGY_HARDWARE/2021/DELL_2021.csv
Data for WDC (Technology_Hardware, 2021) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/TECHNOLOGY_HARDWARE/2021/WDC_2021.csv
Data for AAPL (Technology_Hardware, 2022) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/TECHNOLOGY_HARDWARE/2022/AAPL_2022.csv
Data for HPQ (Technology_Hardware, 2022) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/TECHNOLOGY_HARDWARE/2022/HPQ_2022.csv
Data for DELL (Technology_Hardware, 2022) saved to /Users/ronaldsheaks/Deskt

Data for GOOGL (Internet_Services, 2022) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/INTERNET_SERVICES/2022/GOOGL_2022.csv
Data for AMZN (Internet_Services, 2022) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/INTERNET_SERVICES/2022/AMZN_2022.csv
Data for AKAM (Internet_Services, 2022) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/INTERNET_SERVICES/2022/AKAM_2022.csv
Data for GOOGL (Internet_Services, 2023) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/INTERNET_SERVICES/2023/GOOGL_2023.csv
Data for AMZN (Internet_Services, 2023) saved to /Users/ronaldsheaks/Desktop/OSU AI 

Data for PYPL (Data_Processing, 2024) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/DATA_PROCESSING/2024/PYPL_2024.csv
Data for V (Data_Processing, 2024) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/DATA_PROCESSING/2024/V_2024.csv
Data for MA (Data_Processing, 2024) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/DATA_PROCESSING/2024/MA_2024.csv
Data for AAPL (Consumer_Electronics, 2020) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Project 2/GitHub Repo/Project_2_StockMrkt_ML/ALL_STOCKMARKET_DATA_2020_24/IT_SECTOR_2020_24/CONSUMER_ELECTRONICS/2020/AAPL_2020.csv
Data for SNE (Consumer_Electronics, 2020) saved to /Users/ronaldsheaks/Desktop/OSU AI Bootcamp/Group Pro