In [4]:
import yfinance as yf
import pandas as pd
import numpy as np
import os

# List of tickers for the Extra10 stocks
tickers = [
    "AMZN", "GOOGL", "META", "NFLX", "NVDA", "BRK-B", 
    "TSLA", "V", "PFE", "KO"
]

# Define the path for the Extra10 folder
folder_Extra10 = 'data/Extra10'

# Ensure the Extra10 folder exists
os.makedirs(folder_Extra10, exist_ok=True)

# Function to fetch data from Yahoo Finance and perform calculations
def fetch_and_process_data(tickers, folder):
    for ticker in tickers:
        # Fetch historical data for the stock (last 5 years)
        df = yf.download(ticker, start="2018-01-01", end="2024-01-01")
        
        # Reset index to make Date a column instead of index
        df.reset_index(inplace=True)
        
        # Calculate Log Returns
        df['Log Return'] = np.log(df['Adj Close'] / df['Adj Close'].shift(1))
        
        # Calculate Daily Return
        df['Daily Return'] = df['Adj Close'].pct_change()
        
        # Calculate RSI (14-day period)
        df['Change'] = df['Adj Close'].diff()
        df['Gain'] = df['Change'].where(df['Change'] > 0, 0)
        df['Loss'] = -df['Change'].where(df['Change'] < 0, 0)
        df['Avg Gain'] = df['Gain'].rolling(window=14).mean()
        df['Avg Loss'] = df['Loss'].rolling(window=14).mean()
        df['RS'] = df['Avg Gain'] / df['Avg Loss']
        df['RSI'] = 100 - (100 / (1 + df['RS']))
        
        # Calculate Moving Averages (MA_20, MA_50)
        df['MA_20'] = df['Adj Close'].rolling(window=20).mean()
        df['MA_50'] = df['Adj Close'].rolling(window=50).mean()
        
        # Calculate Rolling Volatility using Log Returns
        df['Rolling Volatility'] = df['Log Return'].rolling(window=20).std()
        
        # Keep only the required columns: Date, Close, Adj Close, Volume, Daily Return, Rolling Volatility, MA_20, MA_50, RSI, Log Return
        df = df[['Date', 'Close', 'Adj Close', 'Volume', 'Daily Return', 'Rolling Volatility', 'MA_20', 'MA_50', 'RSI', 'Log Return']]
        
        # Save the data to a CSV file in the Extra10 folder
        df.to_csv(os.path.join(folder, f"{ticker}_2020_2024.csv"), index=False)
        print(f"Processed and saved data for {ticker}")

# Fetch data and process for all stocks in the Extra10 folder
fetch_and_process_data(tickers, folder_Extra10)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Processed and saved data for AMZN
Processed and saved data for GOOGL
Processed and saved data for META
Processed and saved data for NFLX
Processed and saved data for NVDA
Processed and saved data for BRK-B
Processed and saved data for TSLA
Processed and saved data for V


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Processed and saved data for PFE
Processed and saved data for KO





In [3]:
import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('/Users/manasmaskar/Rutgers/Fall24/Algo Trading/Project/v1/Fetch_data/data/US-30/Amgen_AMGN_2020_2024.csv')

# Check if the date column is the index
if df.index.name == 'date':
    print("The date column is set as the index.")
else:
    print("The date column is NOT the index.")


The date column is NOT the index.
