In [None]:
# The code will pick top-10 momentum stocks and share the returns, standard deviation, VaR and Sharpe ratio for the porfolio for 2024

import numpy as np
import yfinance as yf
import pandas as pd

def get_sp500_tickers():
    table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    tickers = table[0]['Symbol'].tolist()
    return tickers

def fetch_and_analyze_data(ticker, start_date, end_date):
    try:
        adjusted_ticker = ticker.replace('.', '-')
        stock_data = yf.Ticker(adjusted_ticker).history(start=start_date, end=end_date)

        if not stock_data.empty:
            stock_data.index = stock_data.index.tz_localize(None)
            
            start_price = stock_data.iloc[0]['Close']
            end_price = stock_data.iloc[-1]['Close']
            annual_return = (end_price - start_price) / start_price * 100
            
            stock_data['Daily Return'] = stock_data['Close'].pct_change()
            std_dev = stock_data['Daily Return'].std() * (252**0.5)
            sharpe_ratio = (annual_return / 100 - risk_free_rate) / std_dev
            
            pe_ratio = yf.Ticker(adjusted_ticker).info.get('trailingPE', None)

            stock_data['up_days'] = stock_data['Daily Return'] > 0
            stock_data['down_days'] = stock_data['Daily Return'] < 0

            up_days_percentage = stock_data['up_days'].sum() / len(stock_data) * 100
            down_days_percentage = stock_data['down_days'].sum() / len(stock_data) * 100

            stock_data['Momentum Health Indicator'] = up_days_percentage - down_days_percentage
            stock_data.drop(columns=['up_days', 'down_days'], inplace=True)
            
            expected_return = stock_data['Daily Return'].mean()
            var_95 = expected_return - 1.96 * std_dev
            stock_data['VAR'] = var_95

            return {'Ticker': ticker, 'Return': annual_return, 'Std Dev': std_dev, 'Sharpe Ratio': sharpe_ratio, 'P/E Ratio': pe_ratio, 'Momentum Health Indicator': stock_data['Momentum Health Indicator'].iloc[-1], 'VAR': stock_data['VAR'].iloc[-1]}, stock_data
        else:
            return None, None
    except Exception as e:
        print(f"Skipping {ticker} due to error: {e}")
        return None, None

def fetch_future_returns(ticker, start_date, end_date):
    try:
        adjusted_ticker = ticker.replace('.', '-')
        stock_data = yf.Ticker(adjusted_ticker).history(start=start_date, end=end_date)
        if not stock_data.empty:
            stock_data.index = stock_data.index.tz_localize(None)
            start_price = stock_data.iloc[0]['Close']
            end_price = stock_data.iloc[-1]['Close']
            annual_return = (end_price - start_price) / start_price * 100
            return annual_return
        else:
            return None
    except Exception as e:
        print(f"Skipping {ticker} for future returns due to error: {e}")
        return None

def process_year(year, start_date, end_date):
    returns_data = []
    for ticker in sp500_tickers:
        result, stock_data = fetch_and_analyze_data(ticker, start_date, end_date)
        if result and stock_data is not None:
            returns_data.append(result)
            stock_data.to_excel(writer, sheet_name=f'{ticker}_{year}')
    returns_df = pd.DataFrame(returns_data)
    returns_df['P/E Ratio'] = pd.to_numeric(returns_df['P/E Ratio'], errors='coerce')
    returns_df = returns_df.dropna(subset=['P/E Ratio'])
    returns_df = returns_df[returns_df['P/E Ratio'] < 1000]

    average_pe_ratio = returns_df['P/E Ratio'].mean()
    
    print(f"Average PE Ratio for {year}: ", average_pe_ratio, "\n")

    filtered_df = returns_df[(returns_df['P/E Ratio'] < average_pe_ratio) & (returns_df['Momentum Health Indicator'] > 10)]
    filtered_df_sorted = filtered_df.sort_values(by='Return', ascending=False)
    top_10_stocks = filtered_df_sorted.head(10)
    
    top_10_stocks['2024 Return'] = top_10_stocks['Ticker'].apply(lambda x: fetch_future_returns(x, '2023-12-29', '2024-12-31'))

    top_10_stocks.to_excel(writer, sheet_name=f'Top 10 Stocks {year}', index=False)

    # Calculate % Portfolio Return for 2024
    #investment_per_stock = 1000
    #total_investment = investment_per_stock * len(top_10_stocks)
    #total_return = sum(investment_per_stock * (1 + (top_10_stocks['2024 Return'] / 100)))
    #portfolio_return_percentage = (total_return - total_investment) / total_investment * 100

    # print(f"Portfolio return for 2024: {portfolio_return_percentage}%")

    

    return top_10_stocks

sp500_tickers = get_sp500_tickers()
risk_free_rate = 0.02
writer = pd.ExcelWriter('sp500_data.xlsx', engine='openpyxl')

years = {
    '2023': ('2022-12-31', '2023-12-31')
}

for year, (start_date, end_date) in years.items():
 #   print(f"Processing data for {year}...")
    top_10_stocks = process_year(year, start_date, end_date)
   # print(top_10_stocks[['Ticker', 'Return', 'Std Dev', 'Sharpe Ratio', 'P/E Ratio', 'Momentum Health Indicator', 'VAR', '2024 Return']])

# Add standard deviation, VaR, and Sharpe ratio to the summary printout
# Calculate % Portfolio Return for 2024
investment_per_stock = 1000
total_investment = investment_per_stock * len(top_10_stocks)
total_return = sum(investment_per_stock * (1 + (top_10_stocks['2024 Return'] / 100)))
portfolio_return_percentage = (total_return - total_investment) / total_investment * 100

    # print(f"Portfolio return for 2024: {portfolio_return_percentage}%")

print(f"\nTop 10 Stocks for {year} Summary:")
print(top_10_stocks[['Ticker', 'Return', 'Std Dev', 'Sharpe Ratio', 'P/E Ratio', 'Momentum Health Indicator', 'VAR', '2024 Return']])
print(f"Portfolio return for 2024: {portfolio_return_percentage}%")
print(f"\nStandard Deviation: {top_10_stocks['Std Dev'].mean()}")
print(f"VaR (Log Normal 95%): {top_10_stocks['VAR'].mean()}")
print(f"Sharpe Ratio: {top_10_stocks['Sharpe Ratio'].mean()}\n")


writer.close()


Skipping DOV due to error: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


$GEV: possibly delisted; no price data found  (1d 2022-12-31 -> 2023-12-31) (Yahoo error = "Data doesn't exist for startDate = 1672462800, endDate = 1703998800")
$SW: possibly delisted; no price data found  (1d 2022-12-31 -> 2023-12-31) (Yahoo error = "Data doesn't exist for startDate = 1672462800, endDate = 1703998800")
$SOLV: possibly delisted; no price data found  (1d 2022-12-31 -> 2023-12-31) (Yahoo error = "Data doesn't exist for startDate = 1672462800, endDate = 1703998800")


Average PE Ratio for 2023:  36.5789668731501 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_10_stocks['2024 Return'] = top_10_stocks['Ticker'].apply(lambda x: fetch_future_returns(x, '2023-12-29', '2024-12-31'))



Top 10 Stocks for 2023 Summary:
    Ticker      Return   Std Dev  Sharpe Ratio  P/E Ratio  \
424   SMCI  238.969731  0.751333      3.153990  15.584080   
403    RCL  165.838665  0.364839      4.490713  24.139780   
76    BLDR  155.455251  0.417931      3.671780  16.047410   
453   UBER  142.783905  0.361867      3.890491  33.527092   
385    PHM  125.444359  0.300122      4.113136   8.643542   
207     GE   92.971719  0.242983      3.743956  35.805620   
260    JBL   90.273934  0.365913      2.412433  15.406427   
286    LII   87.906619  0.291485      2.947209  30.956232   
68    BKNG   74.549886  0.249525      2.907514  33.516434   
154    DHI   69.149843  0.281647      2.384183  10.337518   

     Momentum Health Indicator       VAR  2024 Return  
424                       13.2 -1.466602     7.929359  
403                       14.4 -0.710890    80.581388  
76                        16.4 -0.815027   -14.118849  
453                       12.4 -0.705434    -1.299333  
385            