In [None]:
# Install this if yfinacne is not installed.
# %pip install yfinance --upgrade --no-cache-dir

In [None]:
# Load packages
import yfinance as yf
import numpy as np
import pandas as pd
from datetime import datetime
import calendar
import plotly.express as px
import plotly.graph_objects as go

In [None]:
# Read csv as a Pandas DataFrame
# https://www.kaggle.com/datasets/lsind18/euro-exchange-daily-rates-19992020/
ex_rates = pd.read_csv('euro-daily-hist_1999_2022.csv')

In [None]:
# See the header names in this dataset
list(ex_rates.columns)

In [None]:
# Extract a subset as we only want excahnge rates between USD and EUR
USDEUR = ex_rates[['Period\\Unit:', '[US dollar ]']]
# Rename the columns
USDEUR = USDEUR.rename(columns = {'Period\\Unit:':'Date', '[US dollar ]':'USDEUR'})

In [None]:
# Convert date data to the correct/appropriate date format
USDEUR['Date'] = pd.to_datetime(USDEUR['Date'], infer_datetime_format = True)
# Set Date as the index/row headers
USDEUR = USDEUR.set_index('Date')

In [None]:
# With the date format set, we can take a subset of the data for only the relevent dates
# And covert the exchange rates to float
USDEUR = USDEUR.loc['2021-12-31':'2021-01-01'].astype('float64')

In [None]:
# yfinance is an API package which downloads historical finance data
# It saves as a Pandas DataFrame automatically
df = yf.download(
    tickers = "AAPL BTC-USD",
    start="2021-01-01", end="2021-12-31",
    interval = "1d",
    auto_adjust = True,
    prepost = True,
    threads = True,
    proxy = None
    )

df = df[["Open"]]
df.columns = ['AAPL', 'BTCUSD']

In [None]:
# A few ways to merge these datasets
master_USD = pd.concat([USDEUR, df], axis=1)
# Merge keeps data with dates that exist in both tables only, so we have less record
master_USD = pd.merge(USDEUR, df, left_index=True, right_index=True)
# Remove NAs, can use either lines above and will result in the same results after dropna()
master_USD = master_USD.dropna()

In [None]:
# Convert the USD prices into EUR
master_EUR = pd.concat([pd.DataFrame({'EUR':master_USD.AAPL / master_USD.USDEUR, 'Ticker':"AAPL"}),
                        pd.DataFrame({'EUR':master_USD.BTCUSD / master_USD.USDEUR, 'Ticker':"BTCEUR"})])

In [None]:
# Add a column to the DataFrame to record the month of dates
master_EUR['Month'] = pd.DatetimeIndex(master_EUR.index).month

In [None]:
# If you wanna see the data by date
master_EUR.sort_index(ascending = True)

In [None]:
# If you wanna see the data by ticker and date
master_EUR_sorted = master_EUR.sort_values(by = ['Ticker', 'Date'], ascending = [True, True])

In [None]:
# Aggregation on the data
master_EUR_agg = master_EUR.groupby(['Ticker', 'Month']).agg(['min','mean','max','std'])
# Rename headers
master_EUR_agg.columns = ['Min','Mean','Max','STD']
master_EUR_agg

In [None]:
# Count how many entries per ticker
master_EUR_sorted.groupby(['Ticker']).count()

In [None]:
# Use loops to extract the data as lists
AAPL_EUR = []
AAPL_EUR_Date = []
for lab, row in master_EUR_sorted.iterrows():
    AAPL_EUR.append(row["EUR"])
    AAPL_EUR_Date.append(lab.to_pydatetime().strftime("%d/%m/%Y"))

BTCEUR = []
BTCEUR_Date = []
for lab, row in master_EUR_sorted.iterrows():
    BTCEUR.append(row["EUR"])
    BTCEUR_Date.append(lab.to_pydatetime().strftime("%d/%m/%Y"))

In [None]:
# Can convert these lists into Dictionaries
AAPL_EUR_Dict = dict(zip(AAPL_EUR_Date, AAPL_EUR))
BTCEUR_Dict = dict(zip(BTCEUR_Date, BTCEUR))

In [None]:
# Alternatively, we can turn these into numoy arrays, better for data analysis (apart from DataFrame itself)

# Create emoty arrays for storage
AAPL_EUR = np.zeros(shape=(master_EUR_sorted.groupby(['Ticker']).count().loc[['AAPL'],['EUR']].squeeze(), 2))
BTCEUR = np.zeros(shape=(master_EUR_sorted.groupby(['Ticker']).count().loc[['BTCEUR'],['EUR']].squeeze(), 2))

# Extraction
# We can use i, j for loops instead but I want to use iterrows() as covered in this course
i = 0
j = 0
for lab, row in master_EUR_sorted.iterrows():
    if row["Ticker"] == "AAPL":
        AAPL_EUR[i,0] = lab.to_numpy()
        AAPL_EUR[i,1] = row["EUR"]
        i = i + 1
    else:
        BTCEUR[j,0] = lab.to_numpy()
        BTCEUR[j,1] = row["EUR"]
        j = j + 1

In [None]:
# We can use numpy function `round()` to simplify the data.
np.round(AAPL_EUR, 0)

In [None]:
# Preparation for visualisation, duplicating the (multi-) index columns for simplicity
for lab, row in master_EUR_agg.iterrows():
    master_EUR_agg.loc[lab, "Ticker"] = lab[0]
    master_EUR_agg.loc[lab, "Month"] = calendar.month_name[lab[1]]
master_EUR_agg

In [None]:
# Scatter-plot of price standard deviation against mean
# Data not meaningful as BTC mean and STD values are much bigger than AAPL
fig = px.scatter(master_EUR_agg, x='Mean', y='STD', color="Ticker", hover_data=['Month'])
fig.show()

In [None]:
# Same plot but just with AAPL figures, scales on axises are much smaller
fig = px.scatter(master_EUR_agg[master_EUR_agg['Ticker'] == "AAPL"],
                 x='Mean',
                 y='STD',
                 color="STD",
                 hover_data=['Month'],
                 title = "AAPL")
fig.show()

In [None]:
# Line plot of EUR prices
# Again, we can't really see AAPL prices...
fig = px.line(master_EUR_sorted,
              x=master_EUR_sorted.index,
              y='EUR',
              color="Ticker",
              title = "EUR per unit")
fig.show()

In [None]:
# Same plot but just with AAPL figures, scales on axises are much smaller
fig = px.line(master_EUR_sorted[master_EUR_sorted['Ticker'] == "AAPL"],
              x=master_EUR_sorted[master_EUR_sorted['Ticker'] == "AAPL"].index,
              y='EUR',
              title = "AAPL")
fig.show()

In [None]:
# Bar plot of AAPL mean price each month
fig = px.bar(master_EUR_agg[master_EUR_agg['Ticker'] == "AAPL"],
              x='Month',
              y='Mean',
              title = "AAPL")
fig.show()

In [None]:
# Bar plot of BTC mean price each month
fig = px.bar(master_EUR_agg[master_EUR_agg['Ticker'] == "BTCEUR"],
              x='Month',
              y='Mean',
              title = "BTC")
fig.show()

In [None]:
# Download same data again but just with AAPL
df2 = yf.download(
        tickers = "AAPL",
        start="2021-01-01", end="2021-12-31",
        interval = "1d",
        auto_adjust = True,
        prepost = True,
        threads = True,
        proxy = None
        )

In [None]:
# Interactive Candlestick plot for AAPL
fig = go.Figure(data=[go.Candlestick(x=df2.index,
                open=df2['Open'],
                high=df2['High'],
                low=df2['Low'],
                close=df2['Close'])])

fig.show()

# Project Report

[GitHub URL](https://github.com/marcohoucheng/UCDPA_MarcoLam)

## Abstract
(Short overview of the entire project and features)

## Introduction
(Explain why you chose this project use case)

## Dataset
(Provide a description of your dataset and source. Also justify why you chose this source)

[Kaggle - Daily Exchange Rates per Euro 1999-2022](https://www.kaggle.com/datasets/lsind18/euro-exchange-daily-rates-19992020/)

## Implementation Process
(Describe your entire process in detail)

## Results
(Include the charts and describe them)

## Insights
(Point out at least 5 insights in bullet points)

## References
(Include any references if required)