In [11]:
import os
import pandas as pd
import requests
from alpha_vantage.timeseries import TimeSeries
from datetime import datetime

# Alpha Vantage API Key
API_KEY = "INSERT API KEY"
BASE_URL = "https://www.alphavantage.co/query"

data_folder = "data"
os.makedirs(data_folder, exist_ok=True)

# Define the date range
start_date = "2022-01-01"
end_date = datetime.today().strftime("%Y-%m-%d")

# Function to save DataFrame to CSV with timestamp column
def save_df_to_csv(df, filename):
    df.index = pd.to_datetime(df.index)
    df = df.sort_index()  # Ensure chronological order
    df = df.loc[start_date:end_date]
    df.index.name = "timestamp"
    df.to_csv(os.path.join(data_folder, filename))

# Fetch S&P 500 Data using SPY ETF as a proxy
print("Fetching S&P 500 (SPY ETF) data...")
ts = TimeSeries(key=API_KEY, output_format='pandas')
spy_data, _ = ts.get_daily(symbol="SPY", outputsize="full")
spy_data.columns = ["open", "high", "low", "close", "volume"]
save_df_to_csv(spy_data, "sp500.csv")

# Fetch Gold Data using TIME_SERIES_DAILY
print("Fetching Gold data...")
def fetch_gold_data():
    params = {
        "function": "TIME_SERIES_DAILY",
        "symbol": "GOLD",
        "apikey": API_KEY,
        "outputsize": "full"
    }
    response = requests.get(BASE_URL, params=params)
    data = response.json()
    
    if "Time Series (Daily)" not in data:
        print("Error fetching data:", data)
        return None
    
    # Convert to DataFrame
    df = pd.DataFrame.from_dict(data["Time Series (Daily)"], orient="index")
    df = df.rename(columns={
        "1. open": "Open",
        "2. high": "High",
        "3. low": "Low",
        "4. close": "Close",
        "5. volume": "Volume"
    })
    
    df.index = pd.to_datetime(df.index)
    df = df.sort_index()
    df.index.name = "timestamp"
    return df

gold_data = fetch_gold_data()
if gold_data is not None:
    save_df_to_csv(gold_data, "gold.csv")

# Function to fetch Treasury yield data
def fetch_treasury_yield(period, filename, column_name):
    print(f"Fetching Treasury {period} data...")
    url = f"https://www.alphavantage.co/query?function=TREASURY_YIELD&interval=daily&maturity={period}&apikey={API_KEY}"
    response = requests.get(url).json()
    if "data" in response:
        treasury_data = {item['date']: item['value'] for item in response["data"]}
        df = pd.DataFrame.from_dict(treasury_data, orient='index', columns=[column_name])
        df.index = pd.to_datetime(df.index)
        df = df.sort_index()  # Ensure chronological order
        df = df.loc[start_date:end_date]
        df.index.name = "timestamp"
        df.to_csv(os.path.join(data_folder, filename))
    else:
        print(f"Error fetching {period} treasury yield: {response}")

# Fetch Treasury 3-Month Data
fetch_treasury_yield("3month", "treasury_3m.csv", "treasury_3m")

# Fetch Treasury 10-Year Data
fetch_treasury_yield("10year", "treasury_10y.csv", "treasury_10y")

print("Data extraction complete. Files saved in the 'data' folder.")


Fetching S&P 500 (SPY ETF) data...
Fetching Gold data...
Fetching Treasury 3month data...
Fetching Treasury 10year data...
Data extraction complete. Files saved in the 'data' folder.


In [13]:
import os
import pandas as pd
from pytrends.request import TrendReq
from datetime import datetime, timedelta

# Define search keywords
keywords = ["sp500", "SPX", "index fund", "ETF"]

data_folder = "data"
os.makedirs(data_folder, exist_ok=True)

# Define the date range
start_date = "2022-01-01"
end_date = datetime.today().strftime("%Y-%m-%d")

# Initialize Pytrends
pytrends = TrendReq(hl='en-US', tz=360)

# Function to fetch daily Google Trends data
def fetch_google_trends():
    print("Fetching Google Trends data...")
    pytrends.build_payload(kw_list=keywords, timeframe=f"{start_date} {end_date}", geo="US")
    trends_data = pytrends.interest_over_time()
    
    if trends_data.empty:
        print("Error fetching Google Trends data: No data returned.")
        return None
    
    # Drop 'isPartial' column if it exists
    trends_data = trends_data.drop(columns=['isPartial'], errors='ignore')
    
    # Resample to daily frequency if needed
    trends_data = trends_data.resample('D').interpolate()
    
    # Format the DataFrame
    trends_data.index.name = "timestamp"
    trends_data = trends_data.sort_index()
    return trends_data

# Fetch data and save to CSV
google_trends_data = fetch_google_trends()
if google_trends_data is not None:
    google_trends_data.to_csv(os.path.join(data_folder, "google_trends.csv"))
    print("Google Trends data saved successfully.")
else:
    print("Failed to fetch Google Trends data.")

Fetching Google Trends data...
Google Trends data saved successfully.


In [37]:
import os
import pandas as pd
import requests
from datetime import datetime

# Alpha Vantage API Key
API_KEY = "INSERT API KEY"

# Define the data folder
data_folder = "data"
os.makedirs(data_folder, exist_ok=True)

# List of top 5 most traded Forex pairs
forex_pairs = [
    ("EUR", "USD"),
    ("USD", "JPY"),
    ("GBP", "USD"),
    ("AUD", "USD"),
    ("USD", "CAD"),
]

# Define the date range
start_date = "2022-01-01"
end_date = datetime.today().strftime("%Y-%m-%d")

# Function to fetch Forex data
def fetch_forex_data(from_currency, to_currency):
    print(f"Fetching Forex data for {from_currency}/{to_currency} from Alpha Vantage...")

    url = f"https://www.alphavantage.co/query?function=FX_DAILY&from_symbol={from_currency}&to_symbol={to_currency}&apikey={API_KEY}&outputsize=full"
    
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Error fetching data for {from_currency}/{to_currency}: {response.status_code}")
        return None

    data = response.json()
    if "Time Series FX (Daily)" not in data:
        print(f"Error: {data.get('Error Message', 'No data returned.')} for {from_currency}/{to_currency}")
        return None

    # Convert JSON data to DataFrame
    df = pd.DataFrame.from_dict(data["Time Series FX (Daily)"], orient="index")
    df.reset_index(inplace=True)

    # Rename columns with currency pair name
    pair_name = f"{from_currency}_{to_currency}"
    df.columns = ["timestamp", f"open_{pair_name}", f"high_{pair_name}", f"low_{pair_name}", f"close_{pair_name}"]

    # Convert timestamp column
    df["timestamp"] = pd.to_datetime(df["timestamp"])

    # Filter by date range
    df = df[(df["timestamp"] >= start_date) & (df["timestamp"] <= end_date)]

    return df

# Fetch data for all Forex pairs and merge into one DataFrame
merged_df = None
for from_currency, to_currency in forex_pairs:
    forex_data = fetch_forex_data(from_currency, to_currency)
    if forex_data is not None:
        if merged_df is None:
            merged_df = forex_data
        else:
            merged_df = pd.merge(merged_df, forex_data, on="timestamp", how="outer")

# Save merged data to CSV
if merged_df is not None:
    merged_df.sort_values("timestamp", inplace=True)
    merged_df.to_csv(os.path.join(data_folder, "forex.csv"), index=False)
    print("Forex data for top 5 pairs saved successfully as forex.csv.")
else:
    print("Failed to fetch Forex data for any currency pair.")


Fetching Forex data for EUR/USD from Alpha Vantage...
Fetching Forex data for USD/JPY from Alpha Vantage...
Fetching Forex data for GBP/USD from Alpha Vantage...
Fetching Forex data for AUD/USD from Alpha Vantage...
Fetching Forex data for USD/CAD from Alpha Vantage...
Forex data for top 5 pairs saved successfully as forex.csv.
