In [1]:
# pip install yahoo_fin
# pip install pycoingecko
# pip install yfinance
# pip install yfinance plotly
# pip install mplfinance 

### Import Library

In [4]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)

### Load dataset

In [5]:
import yfinance as yf
from datetime import datetime

ticker = 'GC=F'

end_date = datetime.today().strftime('%Y-%m-%d')

# Get the latest data for gold futures
gold_data = yf.download(ticker, start='2004-01-01', end=end_date)

goldDF = pd.DataFrame(gold_data).reset_index()
goldDF

ModuleNotFoundError: No module named 'yfinance'

In [None]:
goldDF.dtypes

In [None]:
goldDF.info()

### Data Cleaning

In [None]:
# Checking for missing values

missing_values = goldDF.isnull().sum()
print("Missing Values:\n", missing_values)

In [None]:
# Handling missing values (if have)
goldDF = goldDF.dropna()

### Exploratory Data Analysis (EDA)

In [None]:
# Summary statistics

summary_stats = goldDF.describe()
print("Summary Statistics:\n", summary_stats)

In [None]:
mean_difference = (goldDF['Adj Close'] - goldDF['Close']).mean()
std_difference = (goldDF['Adj Close'] - goldDF['Close']).std()

print(f"Mean Difference: {mean_difference:.2f} USD")
print(f"Standard Deviation of Difference: {std_difference:.2f} USD")

In [None]:
mean = goldDF['Close'].mean()

In [None]:
plt.figure(figsize=(12, 8))

# Plotting the Open, Close, and Volume lines
plt.plot(goldDF['Date'], goldDF['Adj Close'], label='Adjusted Close Price', linestyle='-')
plt.plot(goldDF['Date'], goldDF['Close'], label='Close Price', linestyle='-')

# Adding legend, title, and labels
plt.legend()
plt.title('Adjusted Close Price vs. Closed Market Price')
plt.xlabel('Date')
plt.ylabel('Price (USD)')

plt.show()

In [None]:
goldDF.std()

In [None]:
scaler = StandardScaler()
X_scaler = scaler.fit_transform(X)
X_scaler.std()

In [None]:
goldDF_corr = goldDF.corr()
goldDF_corr

### Feature selection

In [None]:
X = goldDF.drop(['Date', 'Adj Close'], axis=1)
y = goldDF.Date

In [None]:
X.head()

In [None]:
y.head()

---

In [None]:
plt.figure(figsize=(12, 8))

plt.plot(goldDF['Date'], goldDF['Open'], label='Open', linestyle='-', color='blue')
plt.plot(goldDF['Date'], goldDF['Close'], label='Close', linestyle='-', color='red')

plt.legend()
plt.title('Gold Price Open Market vs. Closed Market')
plt.xlabel('Date')
plt.ylabel('Price (USD)')

plt.show()

In [None]:
year2023 = goldDF[goldDF['Date'].dt.year == 2023]

plt.figure(figsize=(12, 8))

plt.plot(year2023['Date'], year2023['Open'], label='Open', linestyle='-', color='blue')
plt.plot(year2023['Date'], year2023['Close'], label='Close', linestyle='-', color='red')

plt.legend()
plt.title('Gold Price Open Market vs. Closed Market - Year 2023')
plt.xlabel('Date')
plt.ylabel('Price (USD)')

plt.show()


In [None]:
import mplfinance as mpf

Fdf = goldDF.copy()
Fdf.set_index('Date', inplace=True)

dec_2023 = Fdf.loc['2023-01-01':'2023-12-31']

mpf.plot(dec_2023, type='candle', mav=(3, 6, 9), figratio=(12, 8), volume=True,
         title='Candle Chart 2023', show_nontrading=True)

mpf.show()


In [None]:
goldDF_2023 = goldDF[goldDF['Date'].dt.year == 2023]

top_volume_per_month_2023 = goldDF_2023.groupby(goldDF_2023['Date'].dt.month)['Volume'].idxmax()

top_volume_data_2023 = goldDF_2023.loc[top_volume_per_month_2023]

print(top_volume_data_2023)

In [None]:
goldDF['Date'] = pd.to_datetime(goldDF['Date'])

dec_2023_data = goldDF[(goldDF['Date'].dt.year == 2023) & (goldDF['Date'].dt.month == 12)]

top_5_volume_dec_2023 = dec_2023_data.sort_values(by='Volume', ascending=False).head(10)

print(top_5_volume_dec_2023)

In [None]:
# import mplfinance as mpf

# # Assuming 'goldDF' contains the necessary data, and the index is already a DatetimeIndex
# # Make sure your DataFrame has a DatetimeIndex, if not, set it using goldDF.set_index('your_datetime_column', inplace=True)

# # Filter the DataFrame for December 2023
# dec_2023_data = goldDF[(goldDF.index.year == 2023) & (goldDF.index.month == 12)]

# # Sort the DataFrame by volume in descending order and get the top 5 rows
# top_5_volume_dec_2023 = dec_2023_data.sort_values(by='Volume').head(5)
# # Plot the candlestick chart for the top 5 volume days in December 2023
# mpf.plot(top_5_volume_dec_2023, type='candle', figratio=(12, 8),
#          title='Candlestick Chart - Top 5 Volume Days - December 2023',)

# # Display the chart
# mpf.show()


In [None]:
plt.figure(figsize=(12, 8))
plt.bar(top_5_volume_dec_2023['Date'], top_5_volume_dec_2023['Volume'], color='blue', alpha=0.7)

plt.legend(['Volume'])
plt.title('Volume Distribution - Top 5 Volume Days - December 2023')
plt.xlabel('Date')
plt.ylabel('Volume')

plt.show()


2023-12-04 -

2023-12-13 - CPI報告  美国CPI在11月增长了3.1%，市场预计美联储将在周三继续推高利率。這個報告是利多黃金所以交易者都會選擇在這個時間入場


2023-12-22 - GDP報告 年化增长率为4.9%,低于之前的5.2，這個報告是利多黃金所以交易者都會選擇在這個時間入場

黃金升 美金跌 主要的原因
经济不确定性 Economic Uncertainty
通胀对冲 Inflation Hedge
货币贬值 Currency Depreciation