In [10]:
# import relevant libraries

import pandas as pd
import requests
import time
import os

## Extract

In [8]:
# API configuration

api_key = 'QOP90VK9XOHUXLHY'
base_url = 'https://www.alphavantage.co/query'

# function that fetchs stock data for a given symbol

def fetch_stock_info(symbol):
    params = {
        'function': 'TIME_SERIES_DAILY',
        'symbol': symbol,
        'apikey': api_key,
        'outputsize': 'compact'  # last 100 days (approximately 5 months)
    }
    response = requests.get(base_url, params=params)
    return response.json()

In [9]:
# test function
# Test by fetching stock data for Apple (AAPL)
data = fetch_stock_info('AAPL')
data

{'Meta Data': {'1. Information': 'Daily Prices (open, high, low, close) and Volumes',
  '2. Symbol': 'AAPL',
  '3. Last Refreshed': '2024-09-13',
  '4. Output Size': 'Compact',
  '5. Time Zone': 'US/Eastern'},
 'Time Series (Daily)': {'2024-09-13': {'1. open': '223.5800',
   '2. high': '224.0400',
   '3. low': '221.9100',
   '4. close': '222.5000',
   '5. volume': '36766619'},
  '2024-09-12': {'1. open': '222.5000',
   '2. high': '223.5500',
   '3. low': '219.8200',
   '4. close': '222.7700',
   '5. volume': '37498225'},
  '2024-09-11': {'1. open': '221.4550',
   '2. high': '223.0900',
   '3. low': '217.8900',
   '4. close': '222.6600',
   '5. volume': '44587072'},
  '2024-09-10': {'1. open': '218.9200',
   '2. high': '221.4800',
   '3. low': '216.7300',
   '4. close': '220.1100',
   '5. volume': '51591033'},
  '2024-09-09': {'1. open': '220.8200',
   '2. high': '221.2700',
   '3. low': '216.7100',
   '4. close': '220.9100',
   '5. volume': '67179965'},
  '2024-09-06': {'1. open': '223

## Transform

In [11]:
# define function that processes and transforms the stock data
def process_stock_data(data, symbol):
    # convert raw time series data into a DataFrame
    time_series = data['Time Series (Daily)']
    df = pd.DataFrame.from_dict(time_series, orient='index')
    df.columns = ['open', 'high', 'low', 'close', 'volume']
    df.index = pd.to_datetime(df.index)
    
    # ensure data types are float
    df['open'] = df['open'].astype(float)
    df['high'] = df['high'].astype(float)
    df['low'] = df['low'].astype(float)
    df['close'] = df['close'].astype(float)
    df['volume'] = df['volume'].astype(int)
    df['symbol'] = symbol
    
    # handle any missing values by forward filling
    df.fillna(method='ffill', inplace=True)
    
    # calculate moving averages
    df['moving_avg_7'] = df['close'].rolling(window=7).mean()
    df['moving_avg_30'] = df['close'].rolling(window=30).mean()
    
    # normalize the close price
    df['normalized_close'] = (df['close'] - df['close'].min()) / (df['close'].max() - df['close'].min())
    
    # flag high volatility days
    df['volatility'] = df['high'] - df['low']
    df['high_volatility_flag'] = df['volatility'] > df['volatility'].mean()
    
    # detect outliers in the close price
    df['price_outlier'] = (df['close'] > df['close'].mean() + 3 * df['close'].std()) | (df['close'] < df['close'].mean() - 3 * df['close'].std())
    
    # return the transformed DataFrame
    return df

## Load (to local storage)

In [12]:
# directory that saves the 100 days stock data
data_dir = "C:\Users\adityamxr\Desktop\Projects\stock-market-analysis\data\stock_data"
os.makedirs(data_dir, exist_ok=True)

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (35867119.py, line 2)