In [117]:
pip install yfinance pandas

Note: you may need to restart the kernel to use updated packages.


In [118]:
import yfinance as yf
import pandas as pd
import os

In [119]:
START_DATE = "2000-01-01"
END_DATE = "2025-01-01"

# Endeksler
INDEX_SYMBOLS = {
    "S&P 500": "^GSPC",
    "NASDAQ Composite": "^IXIC",
    "Dow Jones": "^DJI"
}

# Emtialar ve faiz oranları
COMMODITIES = {
    "Gold": "GC=F",
    "Oil": "CL=F",
    "10Y Treasury": "^TNX",
    "Fed Funds Rate": "^IRX"
}

# Döviz kurları
CURRENCIES = {
    "EUR/USD": "EURUSD=X",
    "USD/JPY": "JPY=X"
}

# Hisse senetleri (10 farklı sektörden büyük şirketler)
STOCK_SYMBOLS = {
    "Microsoft": "MSFT",
    "Walmart": "WMT",
    "Disney": "DIS",
    "Apple": "AAPL",
    "JP Morgan": "JPM",
    "Exxon Mobil": "XOM",
    "Johnson & Johnson": "JNJ",
    "Google": "GOOGL",
    "Coca-Cola": "KO",
    "Intel": "INTC"
}


### VERİ ÇEKME FONKSİYONU

In [120]:
def download_data(symbol_dict, start, end, folder):
    os.makedirs(folder, exist_ok=True)  # klasör yoksa oluştur
    for name, symbol in symbol_dict.items():
        print(f"Downloading {name} ({symbol})...")
        df = yf.download(symbol, start=start, end=end)
        df.to_csv(f"{folder}/{name.replace('/', '-')}.csv")  # veriyi CSV olarak kaydet

In [121]:
download_data(INDEX_SYMBOLS, START_DATE, END_DATE, "data/indices")
download_data(COMMODITIES, START_DATE, END_DATE, "data/commodities")
download_data(CURRENCIES, START_DATE, END_DATE, "data/currencies")
download_data(STOCK_SYMBOLS, START_DATE, END_DATE, "data/stocks")

  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed

Downloading S&P 500 (^GSPC)...
Downloading NASDAQ Composite (^IXIC)...
Downloading Dow Jones (^DJI)...
Downloading Gold (GC=F)...
Downloading Oil (CL=F)...



  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed

Downloading 10Y Treasury (^TNX)...
Downloading Fed Funds Rate (^IRX)...
Downloading EUR/USD (EURUSD=X)...
Downloading USD/JPY (JPY=X)...



  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed


Downloading Microsoft (MSFT)...
Downloading Walmart (WMT)...
Downloading Disney (DIS)...
Downloading Apple (AAPL)...


  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)


Downloading JP Morgan (JPM)...
Downloading Exxon Mobil (XOM)...
Downloading Johnson & Johnson (JNJ)...


[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed
  df = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed

Downloading Google (GOOGL)...
Downloading Coca-Cola (KO)...
Downloading Intel (INTC)...





In [122]:
import pandas as pd
import os

# Kategoriler: klasör isimleri
categories = ["stocks", "indices", "commodities", "currencies"]

# Beklenen sütun yapısı
expected_cols = ["Close", "High", "Low", "Open", "Volume"]

for category in categories:
    input_folder = f"data/{category}"
    output_folder = f"data/{category}_cleaned"
    os.makedirs(output_folder, exist_ok=True)

    print(f"\n📂 {category.upper()} klasörü işleniyor...")

    for filename in os.listdir(input_folder):
        if filename.endswith(".csv"):
            file_path = os.path.join(input_folder, filename)

            try:
                # 1. İlk iki satırı atlayarak oku
                df = pd.read_csv(file_path, skiprows=2)

                # 2. Tarih sütunu kontrolü ve datetime dönüşümü
                if "Date" not in df.columns:
                    print(f"[UYARI] {filename} içinde 'Date' sütunu yok, atlandı.")
                    continue

                df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
                df.dropna(subset=["Date"], inplace=True)
                df.set_index("Date", inplace=True)

                # 3. Beklenen sütun kontrolü
                if len(df.columns) == len(expected_cols):
                    df.columns = expected_cols
                else:
                    print(f"[UYARI] {filename} beklenen sütun sayısına uymuyor, atlandı.")
                    continue

                # 4. Kaydet
                cleaned_file_path = os.path.join(output_folder, filename.replace(".csv", "_cleaned.csv"))
                df.to_csv(cleaned_file_path)
                print(f"[OK] {filename} → {category}_cleaned klasörüne kaydedildi.")

            except Exception as e:
                print(f"[HATA] {filename} işlenemedi: {e}")


📂 STOCKS klasörü işleniyor...
[OK] Google.csv → stocks_cleaned klasörüne kaydedildi.
[OK] Walmart.csv → stocks_cleaned klasörüne kaydedildi.
[OK] Apple.csv → stocks_cleaned klasörüne kaydedildi.
[OK] Intel.csv → stocks_cleaned klasörüne kaydedildi.
[OK] Coca-Cola.csv → stocks_cleaned klasörüne kaydedildi.
[OK] Exxon Mobil.csv → stocks_cleaned klasörüne kaydedildi.
[OK] Disney.csv → stocks_cleaned klasörüne kaydedildi.
[OK] Johnson & Johnson.csv → stocks_cleaned klasörüne kaydedildi.
[OK] Microsoft.csv → stocks_cleaned klasörüne kaydedildi.
[OK] JP Morgan.csv → stocks_cleaned klasörüne kaydedildi.

📂 INDICES klasörü işleniyor...
[OK] Dow Jones.csv → indices_cleaned klasörüne kaydedildi.
[OK] S&P 500.csv → indices_cleaned klasörüne kaydedildi.
[OK] NASDAQ Composite.csv → indices_cleaned klasörüne kaydedildi.

📂 COMMODITIES klasörü işleniyor...
[OK] 10Y Treasury.csv → commodities_cleaned klasörüne kaydedildi.
[OK] Gold.csv → commodities_cleaned klasörüne kaydedildi.
[OK] Fed Funds Rate.c

In [123]:
import os
import pandas as pd

def load_close_prices_from_folder(folder_path):
    combined = pd.DataFrame()

    for file in os.listdir(folder_path):
        if file.endswith(".csv"):
            symbol = file.replace("_cleaned.csv", "").replace(".csv", "")
            path = os.path.join(folder_path, file)

            try:
                df = pd.read_csv(path, parse_dates=["Date"], index_col="Date")
                if "Close" in df.columns:
                    df = df[["Close"]].rename(columns={"Close": symbol})
                    combined = df if combined.empty else combined.join(df, how="outer")
            except Exception as e:
                print(f"[HATA] {file} okunamadı: {e}")

    return combined.sort_index()

In [124]:
stocks_df = load_close_prices_from_folder("data/stocks_cleaned")
indices_df = load_close_prices_from_folder("data/indices_cleaned")
commodities_df = load_close_prices_from_folder("data/commodities_cleaned")
currencies_df = load_close_prices_from_folder("data/currencies_cleaned")

In [125]:
# Ana tabloyu başlat (stocks)
combined_all = stocks_df

# Diğer kategorileri sırayla ekle
for df in [indices_df, commodities_df, currencies_df]:
    combined_all = combined_all.join(df, how="outer")

# Tarih sırasına göre sırala
combined_all = combined_all.sort_index()

# İlk satırları görüntüle
combined_all.head()


Unnamed: 0_level_0,Disney,Microsoft,Walmart,Google,Exxon Mobil,Apple,Intel,JP Morgan,Johnson & Johnson,Coca-Cola,NASDAQ Composite,Dow Jones,S&P 500,Fed Funds Rate,Oil,Gold,10Y Treasury,EUR-USD,USD-JPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2000-01-03,22.833183,35.726707,14.272882,,17.568422,0.841048,24.710649,23.237137,23.727488,13.913956,4131.149902,11357.509766,1455.219971,5.27,,,6.548,,101.690002
2000-01-04,24.170692,34.519848,13.738815,,17.231915,0.770139,23.556784,22.727282,22.85882,13.929386,3901.689941,10997.929688,1399.420044,5.27,,,6.485,,103.139999
2000-01-05,25.173819,34.883846,13.458438,,18.171329,0.781409,23.752058,22.586996,23.100109,14.05279,3877.540039,11122.650391,1402.109985,5.27,,,6.599,,104.089996
2000-01-06,24.170692,33.715286,13.6053,,19.110748,0.713787,22.367401,22.907658,23.824001,14.068213,3727.129883,11253.259766,1403.449951,5.24,,,6.549,,105.230003
2000-01-07,23.788546,34.15588,14.633372,,19.054665,0.747598,23.290504,23.328535,24.83745,14.99376,3882.620117,11522.55957,1441.469971,5.22,,,6.504,,105.330002


In [126]:
os.makedirs("data/combined", exist_ok=True)
combined_all.to_csv("data/combined/market_all_combined.csv")

In [127]:
for col in combined_all.columns:
    first_date = combined_all[col].first_valid_index()
    print(f"{col}: starts on {first_date}")

Disney: starts on 2000-01-03 00:00:00
Microsoft: starts on 2000-01-03 00:00:00
Walmart: starts on 2000-01-03 00:00:00
Google: starts on 2004-08-19 00:00:00
Exxon Mobil: starts on 2000-01-03 00:00:00
Apple: starts on 2000-01-03 00:00:00
Intel: starts on 2000-01-03 00:00:00
JP Morgan: starts on 2000-01-03 00:00:00
Johnson & Johnson: starts on 2000-01-03 00:00:00
Coca-Cola: starts on 2000-01-03 00:00:00
NASDAQ Composite: starts on 2000-01-03 00:00:00
Dow Jones: starts on 2000-01-03 00:00:00
S&P 500: starts on 2000-01-03 00:00:00
Fed Funds Rate: starts on 2000-01-03 00:00:00
Oil: starts on 2000-08-23 00:00:00
Gold: starts on 2000-08-30 00:00:00
10Y Treasury: starts on 2000-01-03 00:00:00
EUR-USD: starts on 2003-12-01 00:00:00
USD-JPY: starts on 2000-01-03 00:00:00


In [128]:
import pandas as pd

# Tüm birleştirilmiş veri 
df = pd.read_csv("data/combined/market_all_combined.csv", parse_dates=["Date"], index_col="Date")

# Google’ın veri vermeye başladığı ilk tarih
google_start = df["Google"].first_valid_index()
print("Google ilk veri verdiği tarih:", google_start)

Google ilk veri verdiği tarih: 2004-08-19 00:00:00


In [129]:
stocks_df = stocks_df.loc[google_start:].copy()
indices_df = indices_df.loc[google_start:].copy()
commodities_df = commodities_df.loc[google_start:].copy()
currencies_df = currencies_df.loc[google_start:].copy()

In [130]:
combined_all = stocks_df
for df in [indices_df, commodities_df, currencies_df]:
    combined_all = combined_all.join(df, how="outer")

combined_all = combined_all.sort_index()
combined_all.to_csv("data/combined/market_all_combined.csv")

In [131]:
# market_data_google_start_aligned.csv dosyasını oku
df = pd.read_csv("data/combined/market_all_combined.csv", parse_dates=["Date"], index_col="Date")

# Linear interpolation ile eksik verileri doldur
df_interp = df.interpolate(method="linear", limit_direction="both")

# Sonuçları kaydet
df_interp.to_csv("data/combined/market_data_interpolated.csv")

# Eksik veri kaldı mı?
print(df_interp.isna().sum().sort_values(ascending=False))

Disney               0
NASDAQ Composite     0
EUR-USD              0
10Y Treasury         0
Gold                 0
Oil                  0
Fed Funds Rate       0
S&P 500              0
Dow Jones            0
Coca-Cola            0
Microsoft            0
Johnson & Johnson    0
JP Morgan            0
Intel                0
Apple                0
Exxon Mobil          0
Google               0
Walmart              0
USD-JPY              0
dtype: int64
