# Problem 1: Data from yfinance

https://github.com/ranaroussi/yfinance

In [13]:
import yfinance as yf  # Yahoo Finance data
import pandas as pd   # ðŸ“š Reference: https://pandas.pydata.org/
import os             # ðŸ“š Reference: https://docs.python.org/3/library/os.html
from datetime import datetime  # ðŸ“š Reference: https://docs.python.org/3/library/datetime.html

def get_data():
    # ðŸ“š Reference: https://docs.python.org/3/tutorial/controlflow.html#defining-functions    

    # The list of FAANG stock symbols
    faang = ['META', 'AAPL', 'AMZN', 'NFLX', 'GOOG']      

    # Downloading the data from Yahoo Finance
    # Using yf.download() to get data for multiple tickers
    # 'period="5d"' means last 5 days
    # 'interval="1h"' gives hourly price data
    data = yf.download(tickers=faang, period='5d', interval='1h', group_by='ticker')
    # ðŸ“š Reference: https://aroussi.com/post/python-yahoo-finance, 
    # https://medium.com/@kasperjuunge/yfinance-10-ways-to-get-stock-data-with-python-6677f49e8282,
    # https://www.youtube.com/watch?v=j0sBKAB75oc  

    # Checking if data was downloaded successfully
    print("Downloaded data sample:\n")
    print(data.head())  # Printing first few rows for confirmation
    # ðŸ“š Reference: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.head.html 

    # Checking and printing the original timezone
    print("Original timezone information:")
    print(data.index.tz)
    # ðŸ“š Reference: https://pandas.pydata.org/docs/reference/api/pandas.Index.tz.html
    # This checks if the datetime index contains timezone information.

    # Localizing and converting to Irish time 
    if data.index.tz is None:
        # Localizing to New York time (exchange timezone for FAANG)
        data = data.tz_localize('America/New_York')
        # ðŸ“š Reference: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.tz_localize.html
        # https://pandas.pydata.org/docs/user_guide/timeseries.html#localizing-time-zones
        
    data = data.tz_convert('Europe/Dublin')
    # ðŸ“š Reference: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.tz_convert.html
    # https://pandas.pydata.org/docs/user_guide/timeseries.html#time-zone-handling
    # Converts timestamps to Irish local time.

    # Verify the conversion
    print("Converted to timezone:", data.index.tz)
    # ðŸ“š Reference: https://pandas.pydata.org/docs/reference/api/pandas.Index.tz.html

    # Confirming that data folder exists
    folder_name = 'data'
    if not os.path.isdir(folder_name):
        print(f"Folder '{folder_name}' not found! Please create it manually.")
        return  # Exits early if folder missing
    else:
        print(f"Folder '{folder_name}' found. Proceeding to save the data.") 
    # ðŸ“š Reference: https://www.w3schools.com/python/python_conditions.asp, 
    # https://docs.python.org/3/library/os.path.html#os.path.isdir

# Creating a timestamp for the filename in the format YYYYMMDD-HHmmss
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    # ðŸ“š Reference: https://www.geeksforgeeks.org/python/python-strftime-function/
    # https://stackoverflow.com/questions/32490629/getting-todays-date-in-yyyy-mm-dd-in-python
    # https://www.geeksforgeeks.org/python/convert-datetime-string-to-yyyy-mm-dd-hhmmss-format-in-python/

    # Creating the filename using the timestamp
    filename = str(timestamp) + ".csv"
    # ðŸ“š Reference: https://docs.python.org/3/library/functions.html#func-str
    # https://www.geeksforgeeks.org/python/how-to-create-filename-containing-date-or-time-in-python/
    
    # Saving the DataFrame to a CSV file in the specified folder
    filepath = os.path.join(folder_name, filename)
    # ðŸ“š Reference: https://docs.python.org/3/library/os.path.html#os

    # Saving the data to CSV
    data.to_csv(filepath)
    # ðŸ“š Reference: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html

get_data()


  data = yf.download(tickers=faang, period='5d', interval='1h', group_by='ticker')
[*********************100%***********************]  5 of 5 completed

Downloaded data sample:

Ticker                           META                                      \
Price                            Open        High         Low       Close   
Datetime                                                                    
2025-11-21 14:30:00+00:00  588.500000  596.000000  584.929993  585.119995   
2025-11-21 15:30:00+00:00  585.119995  587.909973  581.859985  587.742676   
2025-11-21 16:30:00+00:00  587.799988  593.909302  586.413086  592.979980   
2025-11-21 17:30:00+00:00  593.000122  595.000000  589.070007  591.625000   
2025-11-21 18:30:00+00:00  591.530029  598.119995  591.179993  597.548584   

Ticker                                    GOOG                          \
Price                       Volume        Open        High         Low   
Datetime                                                                 
2025-11-21 14:30:00+00:00  3693066  297.290009  301.920013  295.562805   
2025-11-21 15:30:00+00:00  2007970  296.739990  301.829987  29




## 1st part - importing

`import yfinance as yf`<br>
Loads the yfinance package, which connects to Yahoo Finance and allows me to download stock market data directly into Python.

At first, I got the error `ModuleNotFoundError: No module named 'yfinance'`. It was because even though Python was installed, the yfinance package wasnâ€™t, and that VS Code sometimes uses the wrong Python interpreter. I had to run python -m pip install yfinance in the terminal, making sure it installed into the same environment VS Code was using. These issues occured after transferring my work from GitHub Codespace to VS Code due to Codespace downtime and problems with syncing VS Code and GitHub.

`import pandas as pd`<br>
Imports pandas and is used to manage and save tabular data.

`import os`<br>
Provides tools for interacting with the operating system, like checking if folders exist or creating new ones.

`from datetime import datetime`<br>
Helps manipulate dates and times and helps creating timestamps for filenames.

**ðŸ“š References:**<br>
- https://packaging.python.org/en/latest/tutorials/installing-packages/\n
- https://code.visualstudio.com/docs/python/environments\n
- https://code.visualstudio.com/docs/python/environments#_select-and-activate-an-environment\n
- https://pip.pypa.io/en/stable/cli/pip_install/\n
- https://pandas.pydata.org/
- https://docs.python.org/3/library/os.html
- https://docs.python.org/3/library/datetime.html
- https://stackoverflow.com/questions/15707532/import-datetime-v-s-from-datetime-import-datetime
- https://www.geeksforgeeks.org/python/python-datetime-module/


## 2nd part - downloading and defining data

`def get_data():`<br>
Defines a new function and actions everything underneath the code line.

`faang = ['META', 'AAPL', 'AMZN', 'NFLX', 'GOOG']`<br>
This line creates a list of ticker symbols for the 5 FAANG companies and tells `yfinance` which stocks to download data for. At first, I struggled with the following error repeatedly: `NameError: name 'faang' is not defined` until I realised the issue was indentation.

`data = yf.download(tickers=faang, period='5d', interval='1h', group_by='ticker')`<br>
`yf.download()` function retrieves the data.<br>
`tickers=faang` means I am downloading multiple stocks at once.<br>
`period='5d'` requests the last five days.<br>
`interval='1h'` retrieves hourly price data.<br>
`group_by='ticker'` organises the data by company.<br>

`msft = yf.Ticker("MSFT")` wasn't used after realising I need a solution that covers multiple tickers.

**ðŸ“š References:**<br>
- https://docs.python.org/3/tutorial/controlflow.html#defining-functions  
- https://aroussi.com/post/python-yahoo-finance, 
- https://medium.com/@kasperjuunge/yfinance-10-ways-to-get-stock-data-with-python-6677f49e8282,
- https://www.youtube.com/watch?v=j0sBKAB75oc  


## 3rd part - timezone conversion

I realised that FAAANG companies trade on NASDAQ and NYSE, which use the U.S. Eastern Time Zone.<br>
To make the dataset consistent with my local time in Ireland I made the following changes:
- Checked the timezone of thedata with `print(data.index.tz)`
- Localised timestamps to America/New_York (the stock exchange timezone) if none was found, with `data = data.tz_localize('America/New_York')`.
- Converted the localised timestamps to Irish time (Europe/Dublin) with `data = data.tz_convert('Europe/Dublin')`
- Verified the conversion with `print(data.index.tz)`

**ðŸ“š References:**<br>
- https://www.ig.com/sg/trading-strategies/nasdaq-opening-and-closing-times--when-can-you-trade--230527
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.tz_localize.html
- https://pandas.pydata.org/docs/user_guide/timeseries.html#localizing-time-zones
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.tz_convert.html
- https://pandas.pydata.org/docs/reference/api/pandas.DatetimeIndex.tz.html
- https://stackoverflow.com/questions/16628819/convert-pandas-timezone-aware-datetimeindex-to-naive-timestamp-but-in-certain-t
- https://www.geeksforgeeks.org/pandas/pandas-series-dt-tz_localize/
- https://pandas.pydata.org/docs/user_guide/timeseries.html#time-zone-handling


## 4th part - checking the data

`print("Downloaded data sample:\n")`outputs preview text and \n adds a new line for better reading. <br>
`print(data.head())` displays the first five rows of the data frame to confirm that the download worked.

`folder_name = 'data'`
    `if not os.path.isdir(folder_name):`
        `print(f"Folder '{folder_name}' not found! Please create it manually.")`
        `return`
    `else:`
        `print(f"Folder '{folder_name}' found. Proceeding to save the data.")`<br>
This part checks if a folder named `data` exists. The folder was created and this checks if the program can find it.<br>
`os.path.isdir(folder_name)`tells me if the folder is found: `Folder 'data' found. Proceeding to save the data.`<br>

**ðŸ“š References:**<br>
- https://www.geeksforgeeks.org/python/difference-between-newline-and-carriage-return-in-python/
- https://docs.python.org/3/library/functions.html#print
- https://docs.python.org/3/library/os.path.html#os.path.isdir
- https://www.w3schools.com/python/python_conditions.asp
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.head.html
- https://www.geeksforgeeks.org/python/python-os-path-isdir-method/


## 5th part - timestamp for the filename

`timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')` gets current time and date and then formats it into a string.<br>
This ensures that each file has a unique timestamp.<br>
`filename = str(timestamp) + ".csv"`creates a file object with extension and creates a string. <br>

**ðŸ“š References:**<br>
- https://www.geeksforgeeks.org/python/python-strftime-function/
- https://stackoverflow.com/questions/32490629/getting-todays-date-in-yyyy-mm-dd-in-python
- https://www.geeksforgeeks.org/python/convert-datetime-string-to-yyyy-mm-dd-hhmmss-format-in-python/
- https://docs.python.org/3/library/functions.html#func-str
- https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
- https://docs.python.org/3/library/datetime.html#datetime.datetime.now
- https://docs.python.org/3/library/datetime.html#datetime.date.strftime


## 6th part - saving the data

`filepath = os.path.join(folder_name, filename)` joins folder name and filename into one path.<br>
`data.to_csv(filepath)` saves the entire data frame to a csv file in the data folder. Each time the function runs, a new file is created with a unique timestamp name.<br>
`get_data()` defined earlier with `def get_data():` at this point the program downloads the FAANG stock data, checks the folder and saves the csv file.

**ðŸ“š References:**<br>
- https://docs.python.org/3/tutorial/controlflow.html#defining-functions
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
- https://docs.python.org/3/library/os.path.html#os.path.join
- https://docs.python.org/3/library/os.path.html#os

# Problem 2: Plotting Data

In [14]:
import matplotlib.pyplot as plt 
import numpy as np 

def plot_data():
    # Folder paths
    data_folder = "data"
    plot_folder = "plots" 

    # Listing all files in the data folder
    files = os.listdir(data_folder)  
    print("All files found:", files)
    # https://docs.python.org/3/library/os.html#os.listdir
    # https://docs.python.org/3/library/functions.html#print

    # Filtering CSV files
    csv_files = [f for f in files if ".csv" in f]  
    print("CSV files found:", csv_files)
    # https://docs.python.org/3/tutorial/datastructures.html#list-comprehensions

    csv_paths = [os.path.join(data_folder, f) for f in csv_files] # Full paths to CSV files
    # Picking the latest file
    latest_file = max(csv_paths, key=os.path.getmtime)  # Selecting most recently modified file

    print("Latest file picked:", latest_file)
    # https://docs.python.org/3/tutorial/introduction.html#lists
    # https://docs.python.org/3/library/os.path.html#os.path.getmtime
    # https://docs.python.org/3/library/time.html#module-time
    # https://stackoverflow.com/questions/39327032/how-to-get-the-latest-file-in-a-folder

    # Loading the CSV into pandas
    file_path = latest_file
    df = pd.read_csv(file_path, header=[0, 1], index_col=0)
    print(df.head()) 
    # https://www.geeksforgeeks.org/pandas/python-read-csv-using-pandas-read_csv/
    # https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html

    # MultiIndex DataFrame so all tickers are selected
    arrays = [
    # First level = stock tickers
    ["META", "META", "AAPL", "AAPL", "AMZN", "AMZN", "NFLX", "NFLX", "GOOG", "GOOG"],
    
    # Second level = price data fields
    ["Open", "Close", "Open", "Close", "Open", "Close", "Open", "Close", "Open", "Close"]
]

    # Tuples pair ticker and field together
    tuples = list(zip(*arrays))
    index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])

    # https://www.geeksforgeeks.org/python/pandas-multi-index-and-groupby/
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.MultiIndex.html
    # https://www.datacamp.com/tutorial/pandas-multi-index
    # https://docs.python.org/3/library/functions.html#zip
    # https://www.geeksforgeeks.org/python/zip-in-python/
    # https://pandas.pydata.org/docs/reference/api/pandas.MultiIndex.from_tuples.html
    # https://www.geeksforgeeks.org/python/python-pandas-multiindex-from_tuples/

    # Selecting only the 'Close' prices for all tickers
    close_data = df.loc[:, (slice(None), 'Close')].copy()
    # https://pandas.pydata.org/docs/user_guide/advanced.html#multiindex-advanced-indexing
    # https://www.geeksforgeeks.org/python/python-pandas-dataframe-loc/

    # Flattening the MultiIndex columns to single level
    close_data.columns = close_data.columns.get_level_values(0)
    # https://pandas.pydata.org/docs/reference/api/pandas.Index.get_level_values.html
    # https://pandas.pydata.org/docs/user_guide/advanced.html#multiindex-advanced-indexing
    # https://stackoverflow.com/questions/39080555/pandas-get-level-values-for-multiple-columns
 
    
    # Plotting all FAANG close prices
    plt.figure(figsize=(12, 6))
    for ticker in close_data.columns:
        plt.plot(close_data.index, close_data[ticker], label=ticker)
        # Plotting each ticker's close price over time
        # https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.html
        # https://matplotlib.org/stable/tutorials/introductory/pyplot.html
        # https://blog.quantinsti.com/python-matplotlib-tutorial/

    # Adding titles and labels
    plt.title("FAANG Stock Close Prices", fontsize=14)
    plt.xlabel("Date and Time (Irish Local Time)", fontsize=12)
    plt.ylabel("Stock Closing Price", fontsize=12)
    plt.legend(title="Ticker", loc="upper left")
    plt.grid(True)
    # https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.legend.html
    # https://www.geeksforgeeks.org/python/matplotlib-pyplot-legend-in-python/
    # https://stackoverflow.com/questions/19125722/adding-a-matplotlib-legend

    # Saving the plot
    if not os.path.isdir(plot_folder):
        os.makedirs(plot_folder) # Creating the folder
        print(f"Created folder: {plot_folder}")
        # https://docs.python.org/3/library/os.html#os.makedirs
        # https://www.geeksforgeeks.org/python-os-makedirs-method/
        # https://stackoverflow.com/questions/273192/how-can-i-create-a-directory-in-python
        
    # Saving the plot into plots folder
    plt.savefig(os.path.join(plot_folder, "faang_close_prices.png"), dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Plot saved successfully in '{plot_folder}' folder as 'faang_close_prices.png'.")
    # https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.savefig.html
    # https://www.geeksforgeeks.org/python/saving-a-plot-as-an-image-in-python/
    # https://docs.python.org/3/library/os.path.html#os.path.join   
    
plot_data()
   
            

All files found: ['20251018_105950.csv', '20251018_110232 .csv', '20251022_221515.csv', '20251022_221612.csv', '20251022_221845.csv', '20251022_223230.csv', '20251026_141153.csv', '20251026_141202.csv', '20251027_191000.csv', '20251027_191225.csv', '20251027_191236.csv', '20251101_140747.csv', '20251101_140758.csv', '20251101_141627.csv', '20251101_142723.csv', '20251101_143026.csv', '20251109_180831.csv', '20251109_183335.csv', '20251113_174926.csv', '20251113_175104.csv', '20251113_180028.csv', '20251113_180929.csv', '20251113_181718.csv', '20251113_181743.csv', '20251115_174229.csv', '20251115_174627.csv', '20251115_182327.csv', '20251115_184004.csv', '20251115_184014.csv', '20251115_184142.csv', '20251123_105440.csv', '20251123_105455.csv', '20251123_145713.csv', '20251123_195532.csv', '20251123_195725.csv', '20251123_195826.csv', '20251123_200439.csv', '20251123_200837.csv', '20251123_201824.csv', '20251123_202001.csv', '20251123_202407.csv', '20251123_202926.csv', '20251123_20293

## End