In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from pyspark.sql.functions import *

In [None]:
from pyspark.sql import DataFrame, SparkSession
from typing import List
import pyspark.sql.types as T
import pyspark.sql.functions as F

spark= SparkSession \
       .builder \
       .appName("tickerData") \
       .getOrCreate()

spark

## Access gmail credentials

### Subtask:
Retry securely retrieving the Gmail address and app password from Colab secrets after the previous attempt failed due to a `SecretNotFoundError`.

In [None]:
from google.colab import userdata

gmail_address = userdata.get('GMAIL_ADDRESS')
gmail_app_password = userdata.get('GMAIL_APP_PASSWORD')

print("Gmail address retrieved.")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
ticker_price_history_df= spark.read.csv("/content/drive/MyDrive/ticker_prices_nif_100_1/1_year_history.csv", header=True, inferSchema=True)
ticker_price_history_df.write.saveAsTable("ticker_price_history")

In [None]:
from statsmodels.tsa.stattools import coint
tickers = sorted(list(set([
    "SWIGGY.NS", "ABB.NS", "ACC.NS", "AUBANK.NS", "ADANIENSOL.NS", "ADANIENT.NS", "ADANIGREEN.NS", "ADANIPOWER.NS", "ATGL.NS", "ABCAPITAL.NS", "ALKEM.NS", "AMBUJACEM.NS", "APOLLOHOSP.NS", "ASHOKLEY.NS", "ASIANPAINT.NS", "ASTRAL.NS", "AUROPHARMA.NS", "DMART.NS", "AXISBANK.NS", "BSE.NS", "BAJAJ-AUTO.NS", "BAJFINANCE.NS", "BAJAJFINSV.NS", "BAJAJHLDNG.NS", "BANKBARODA.NS", "BANKINDIA.NS", "BDL.NS", "BEL.NS", "BHARATFORG.NS", "BHEL.NS", "BPCL.NS", "BHARTIARTL.NS", "BHARTIHEXA.NS", "BIOCON.NS", "BLUESTARCO.NS", "BOSCHLTD.NS", "BRITANNIA.NS", "CGPOWER.NS", "CANBK.NS", "CHOLAFIN.NS", "COALINDIA.NS", "COCHINSHIP.NS", "COLPAL.NS", "CONCOR.NS", "COROMANDEL.NS", "CUMMINSIND.NS", "DLF.NS", "DABUR.NS", "DIVISLAB.NS", "DIXON.NS", "EICHERMOT.NS", "EXIDEIND.NS", "NYKAA.NS", "FEDERALBNK.NS", "FORTIS.NS", "GAIL.NS", "GMRAIRPORT.NS", "GLENMARK.NS", "GODFRYPHLP.NS", "GODREJCP.NS", "GODREJPROP.NS", "GRASIM.NS", "HCLTECH.NS", "HDFCAMC.NS", "HDFCBANK.NS", "HDFCLIFE.NS", "HAVELLS.NS", "HEROMOTOCO.NS", "HINDALCO.NS", "HAL.NS", "HINDPETRO.NS", "HINDUNILVR.NS", "POWERINDIA.NS", "HUDCO.NS", "HYUNDAI.NS", "ICICIBANK.NS", "ICICIGI.NS", "IRB.NS", "ITCHOTELS.NS", "INDIANB.NS", "IOC.NS", "IRCTC.NS", "IRFC.NS", "IREDA.NS", "IGL.NS", "INDUSTOWER.NS", "INDUSINDBK.NS", "NAUKRI.NS", "INDIGO.NS", "JSWENERGY.NS"
])))
for i in tickers:
  data = yf.download(i, period="5d", interval="1d")
  data.to_csv(f"{i}.csv")
  data_df= spark.read.csv(f"{i}.csv", header=True, inferSchema=True)
  data_df= data_df.withColumn("Ticker", lit(f"{i}"))
  # Filter out rows where 'Price' column contains "Ticker" or "Date"
  data_df_filtered = data_df.filter((col("Price") != lit("Ticker")) & (col("Price") != lit("Date")))\
  .withColumnRenamed("Price", "Date")
  # Write the filtered data_df to the CSV
  data_df_filtered.write.option("header","true")\
  .mode("append")\
  .csv("5_days_history.csv")

  # The commented out code below seems to be an alternative approach that is not needed with the current fix.
  # data_df = spark.createDataFrame(data)
  # data_df_renamed = data_df
  # for j in data_df.columns:
  #   data_df_renamed = data_df_renamed.withColumnRenamed(j, j.split(",")[0][2:-1])
  # data_df_ticker = data_df_renamed.withColumn("Ticker", lit(f"{i}"))
  # data_df_ticker.write.option("header","true")\
  # .mode("append")\
  # .csv("1_year_history.csv")

In [None]:
ticker_5day_df = spark.read.csv("5_days_history.csv", header= True, inferSchema= True)
ticker_5day_df.write.saveAsTable("ticker_5day_history")

In [None]:
# Read the tables into DataFrames
target_df = spark.read.table("ticker_price_history")
source_df = spark.read.table("ticker_5day_history")

# Perform a full outer join on Date and Ticker
merged_df = target_df.join(source_df, ["Date", "Ticker"], "fullouter")

# Use coalesce to select the most recent data from source if available, otherwise use target data
merged_df = merged_df.select(
    coalesce(source_df["Date"], target_df["Date"]).alias("Date"),
    coalesce(source_df["Ticker"], target_df["Ticker"]).alias("Ticker"),
    coalesce(source_df["Close"], target_df["Close"]).alias("Close"),
    coalesce(source_df["High"], target_df["High"]).alias("High"),
    coalesce(source_df["Low"], target_df["Low"]).alias("Low"),
    coalesce(source_df["Open"], target_df["Open"]).alias("Open"),
    coalesce(source_df["Volume"], target_df["Volume"]).alias("Volume")
)

# To "merge" the data, you would typically overwrite the original table or save to a new location.
# Overwriting the original table:
# merged_df.write.mode("overwrite").saveAsTable("ticker_price_history")

# Or save to a new location:
merged_df.write.option("header","true").mode("overwrite").csv("/content/drive/MyDrive/ticker_prices_nif_100_1/1_year_history.csv")
merged_df.write.option("header","true").mode("overwrite").csv("1_year_history.csv")#optional

merged_df.agg(min("Date")).show()
merged_df.agg(max("Date")).show()
# For now, let's show the head of the merged DataFrame to verify
# display(merged_df.head())

In [None]:
ticker_final_df = spark.read.csv("/content/drive/MyDrive/ticker_prices_nif_100_1/1_year_history.csv", header=True, inferSchema=True)

In [None]:
# ticker_final_df.write.option("header","true")\
#   .mode("overwrite")\
#   .csv("/content/drive/MyDrive/ticker_prices/1_year_history.csv")

In [None]:
# ticker_final_df_1= spark.read.csv("/content/drive/MyDrive/ticker_prices/1_year_history.csv", header=True, inferSchema=True)

In [None]:
ticker_final_df.select("Ticker").distinct().show()
ticker_final_df.count()

In [None]:
one_yr_df = ticker_final_df.toPandas()
one_yr_df.head()

In [None]:
# tickers = ["INFY.NS", "TCS.NS", "HDFCBANK.NS", "ICICIBANK.NS"]
# Initialize an empty DataFrame to store the combined results
combined_tick_df = pd.DataFrame()

for i in tickers:
  tick_one_yr_df = ticker_final_df.select("*").filter(col("Ticker") == lit(f"{i}")).orderBy("Date").toPandas()

  # Convert relevant columns to numeric, coercing errors
  numeric_cols = ['Close', 'High', 'Low', 'Open', 'Volume']
  for col_name in numeric_cols:
      tick_one_yr_df[col_name] = pd.to_numeric(tick_one_yr_df[col_name], errors='coerce')

  #calculating SMA
  tick_one_yr_df['SMA20'] = tick_one_yr_df['Close'].rolling(window=20).mean()
  tick_one_yr_df['SMA50'] = tick_one_yr_df['Close'].rolling(window=50).mean()
  tick_one_yr_df['SMA200'] = tick_one_yr_df['Close'].rolling(window=200).mean()

  #calculating MACD
  ema12 = tick_one_yr_df['Close'].ewm(span=12, adjust=False).mean()
  ema26 = tick_one_yr_df['Close'].ewm(span=26, adjust=False).mean()
  tick_one_yr_df['MACD'] = ema12 - ema26
  tick_one_yr_df['MACD_Signal'] = tick_one_yr_df['MACD'].ewm(span=9, adjust=False).mean()

  #calculating RSI
  delta = tick_one_yr_df['Close'].diff()
  gain = delta.clip(lower=0)
  loss = -delta.clip(upper=0)
  # Wilder's smoothing (EMA-like)
  avg_gain = gain.ewm(alpha=1/14, min_periods=14, adjust=False).mean()
  avg_loss = loss.ewm(alpha=1/14, min_periods=14, adjust=False).mean()
  rs = avg_gain / avg_loss
  tick_one_yr_df['RSI'] = 100 - (100 / (1 + rs))



  #calculating william %R
  high14 = tick_one_yr_df['High'].rolling(window=14).max()
  low14 = tick_one_yr_df['Low'].rolling(window=14).min()
  tick_one_yr_df['Williams_%R'] = -100 * (high14 - tick_one_yr_df['Close']) / (high14 - low14)

  #calculating MFI
  tp = (tick_one_yr_df['High'] + tick_one_yr_df['Low'] + tick_one_yr_df['Close']) / 3
  mf = tp * tick_one_yr_df['Volume']
  pos_mf = mf.where(tp > tp.shift(1), 0)
  neg_mf = mf.where(tp < tp.shift(1), 0)
  rolling_pos = pos_mf.rolling(window=14).sum()
  rolling_neg = neg_mf.rolling(window=14).sum()
  # Avoid division by zero
  mfr = rolling_pos / (rolling_neg.replace(0, np.nan))
  tick_one_yr_df['MFI'] = 100 - (100 / (1 + mfr))

  #calculating cci
  tp = (tick_one_yr_df['High'] + tick_one_yr_df['Low'] + tick_one_yr_df['Close']) / 3
  sma_tp = tp.rolling(window=20).mean()
  mad = tp.rolling(window=20).apply(lambda x: (x - x.mean()).abs().mean(), raw=False) # Removed raw=True
  tick_one_yr_df['cci'] = (tp - sma_tp) / (0.015 * mad)

  # Calculating On-Balance Volume (OBV)
  # Initialize OBV with 0
  tick_one_yr_df['OBV'] = 0
  # Calculate OBV based on price changes using .loc for assignment
  for j in range(1, len(tick_one_yr_df)):
      if tick_one_yr_df['Close'].iloc[j] > tick_one_yr_df['Close'].iloc[j-1]:
          tick_one_yr_df.loc[j, 'OBV'] = tick_one_yr_df['OBV'].iloc[j-1] + tick_one_yr_df['Volume'].iloc[j]
      elif tick_one_yr_df['Close'].iloc[j] < tick_one_yr_df['Close'].iloc[j-1]:
          tick_one_yr_df.loc[j, 'OBV'] = tick_one_yr_df['OBV'].iloc[j-1] - tick_one_yr_df['Volume'].iloc[j]
      else:
          tick_one_yr_df.loc[j, 'OBV'] = tick_one_yr_df['OBV'].iloc[j-1]

  # Calculating Accumulation/Distribution Line (A/D Line)
  # Money Flow Multiplier (MFM)
  # Ensure High and Low are not equal to avoid division by zero
  mfm_denominator = tick_one_yr_df['High'] - tick_one_yr_df['Low']
  mfm = ((tick_one_yr_df['Close'] - tick_one_yr_df['Low']) - (tick_one_yr_df['High'] - tick_one_yr_df['Close'])) / mfm_denominator
  mfm = mfm.replace([np.inf, -np.inf], np.nan).fillna(0) # Handle potential division by zero and NaNs
  # Money Flow Volume (MFV)
  mfv = mfm * tick_one_yr_df['Volume']
  # Accumulation/Distribution Line (A/D Line) is the cumulative sum of MFV
  tick_one_yr_df['AD_Line'] = mfv.cumsum()


  # Append the processed DataFrame for the current ticker to the combined DataFrame
  combined_tick_df = pd.concat([combined_tick_df, tick_one_yr_df], ignore_index=True)

  # For min and max values of a column in pandas DataFrame
  print(combined_tick_df["Date"].min())
  print(combined_tick_df["Date"].max())



# Now combined_tick_df contains the data for all tickers with SMA values and new indicators

In [None]:
# import matplotlib.pyplot as plt
# import matplotlib.dates as mdates

# # Convert 'Date' column to datetime objects if not already
# combined_tick_df['Date'] = pd.to_datetime(combined_tick_df['Date'])

# # Get the list of unique tickers
# unique_tickers = combined_tick_df['Ticker'].unique()

# # Plot for each ticker
# for ticker in unique_tickers:
#     ticker_df = combined_tick_df[combined_tick_df['Ticker'] == ticker].copy()

#     plt.figure(figsize=(12, 6))
#     plt.plot(ticker_df['Date'], ticker_df['Close'] , label='Close Price')
#     plt.plot(ticker_df['Date'], ticker_df['SMA20'] , label='SMA 20')
#     plt.plot(ticker_df['Date'], ticker_df['SMA50'] , label='SMA 50')
#     plt.plot(ticker_df['Date'], ticker_df['SMA200'] , label='SMA 200')

#     plt.xlabel('Date')
#     plt.ylabel('Price')
#     plt.title(f'{ticker} Stock Price and SMAs over Time')
#     plt.legend()
#     plt.grid(True)

#     # Format x-axis to show months
#     ax = plt.gca()
#     formatter = mdates.DateFormatter('%Y-%m')
#     ax.xaxis.set_major_formatter(formatter)
#     plt.xticks(rotation=45)

#     plt.tight_layout()
#     plt.show()

In [None]:
filtered_tickers_df = pd.DataFrame()

# Convert 'Date' column to datetime objects if not already
combined_tick_df['Date'] = pd.to_datetime(combined_tick_df['Date'])

# Get the list of unique tickers
unique_tickers = combined_tick_df['Ticker'].unique()

for ticker in combined_tick_df['Ticker'].unique():
    ticker_df = combined_tick_df[combined_tick_df['Ticker'] == ticker].copy()

    # Calculate previous day's values
    ticker_df['RSI_prev'] = ticker_df['RSI'].shift(1)
    ticker_df['MACD_prev'] = ticker_df['MACD'].shift(1)
    ticker_df['Williams_%R_prev'] = ticker_df['Williams_%R'].shift(1)
    ticker_df['cci_prev'] = ticker_df['cci'].shift(1)


    # Define the filtering conditions based on the latest proposal
    condition_rsi = (ticker_df['RSI'] >= 55) & (ticker_df['RSI'] > ticker_df['RSI_prev'])
    condition_williams = (ticker_df['Williams_%R'] >= -22) & (ticker_df['Williams_%R'] > ticker_df['Williams_%R_prev'])
    condition_macd = ((ticker_df['MACD'] - ticker_df['MACD_Signal']) > 0) & (ticker_df['MACD'] > ticker_df['MACD_prev'])
    # Keep CCI condition for potential future use or as an optional filter
    condition_cci = (ticker_df['cci'] > -200) & (ticker_df['cci'] < 200)

    # Combine all conditions
    all_conditions = condition_rsi & condition_macd & condition_williams & condition_cci # Added CCI to combined conditions


    # Filter the DataFrame based on all conditions
    filtered_df = ticker_df[all_conditions]

    # Append the filtered data for the current ticker to the result DataFrame
    filtered_tickers_df = pd.concat([filtered_tickers_df, filtered_df], ignore_index=True)

# Display the tickers and dates that meet the criteria
display(filtered_tickers_df[['Date', 'Ticker', 'RSI', 'MACD', 'MACD_Signal', 'Williams_%R', 'cci']]) # Added CCI to display

In [None]:
latest_two_days_df = pd.DataFrame()

for ticker in combined_tick_df['Ticker'].unique():
    ticker_df = combined_tick_df[combined_tick_df['Ticker'] == ticker].copy()

    # Sort by date to ensure latest dates are at the end
    ticker_df = ticker_df.sort_values(by='Date', ascending=True)

    # Get the latest two dates
    latest_dates = ticker_df['Date'].tail(2).tolist()

    # Filter for the latest two dates
    latest_two_days_ticker_df = ticker_df[ticker_df['Date'].isin(latest_dates)]

    # Append to the combined DataFrame
    latest_two_days_df = pd.concat([latest_two_days_df, latest_two_days_ticker_df], ignore_index=True)

# Now apply the filtering logic and categorization to the latest two days data
filtered_latest_two_days_df = pd.DataFrame()

for ticker in latest_two_days_df['Ticker'].unique():
    ticker_df = latest_two_days_df[latest_two_days_df['Ticker'] == ticker].copy()

    # Calculate previous day's values (within the latest two days)
    ticker_df['RSI_prev'] = ticker_df['RSI'].shift(1)
    ticker_df['MACD_prev'] = ticker_df['MACD'].shift(1)
    ticker_df['Williams_%R_prev'] = ticker_df['Williams_%R'].shift(1)
    # Assuming CCI should also be included based on previous discussions
    ticker_df['cci_prev'] = ticker_df['cci'].shift(1)


    # Define the filtering conditions based on the latest proposal
    condition_rsi = (ticker_df['RSI'] >= 55) & (ticker_df['RSI'] > ticker_df['RSI_prev'])
    condition_williams = (ticker_df['Williams_%R'] >= -22) & (ticker_df['Williams_%R'] > ticker_df['Williams_%R_prev'])
    condition_macd = ((ticker_df['MACD'] - ticker_df['MACD_Signal']) > 0) & (ticker_df['MACD'] > ticker_df['MACD_prev'])
    # Keep CCI condition for potential future use or as an optional filter
    condition_cci = (ticker_df['cci'] > -200) & (ticker_df['cci'] < 200) # Example CCI condition within thresholds


    # Count how many of the *three proposed* conditions are satisfied for each row
    ticker_df['satisfied_conditions_count'] = condition_rsi.astype(int) + \
                                               condition_williams.astype(int) + \
                                               condition_macd.astype(int)


    # Categorize based on the count (assuming the user still wants these categories based on the *new* count)
    ticker_df['Recommendation'] = 'None'
    # Diamond Pick: All 3 proposed conditions satisfied
    ticker_df.loc[ticker_df['satisfied_conditions_count'] == 3, 'Recommendation'] = 'Diamond Pick'
    # Golden Pick: Any 2 of the 3 proposed conditions satisfied
    ticker_df.loc[ticker_df['satisfied_conditions_count'] == 2, 'Recommendation'] = 'Golden Pick'
    # Silver Pick: Any 1 of the 3 proposed conditions satisfied, AND RSI >= 55
    ticker_df.loc[ticker_df['satisfied_conditions_count'] == 1, 'Recommendation'] = 'Silver Pick'


    # Create a description of satisfied conditions
    conditions_met = []
    if condition_rsi.any(): # Check if there's at least one True in the series
         conditions_met.append('RSI >= 55 & Increasing')
    if condition_williams.any():
        conditions_met.append('Williams %R >= -22 & Increasing')
    if condition_macd.any():
        conditions_met.append('MACD > Signal & Increasing')
    if condition_cci.any(): # Include CCI if you want to describe it when met
         conditions_met.append('CCI within thresholds')

    # Join the conditions with a comma; handle case where no conditions are met
    ticker_df['Satisfied_Conditions_Description'] = ticker_df.apply(
        lambda row: ', '.join([cond for cond, met in zip(['RSI >= 55 & Increasing', 'Williams %R >= -22 & Increasing', 'MACD > Signal & Increasing', 'CCI within thresholds'],
                                                         [row['RSI'] >= 55 and row['RSI'] > row['RSI_prev'],
                                                          row['Williams_%R'] >= -22 and row['Williams_%R'] > row['Williams_%R_prev'],
                                                          (row['MACD'] - row['MACD_Signal']) > 0 and row['MACD'] > row['MACD_prev'],
                                                          row['cci'] > -200 and row['cci'] < 200]) if met]), axis=1
    )


    # Filter for rows with 'Diamond Pick', 'Golden Pick', or 'Silver Pick'
    filtered_df = ticker_df[(ticker_df['Recommendation'] == 'Diamond Pick') | (ticker_df['Recommendation'] == 'Golden Pick') | (ticker_df['Recommendation'] == 'Silver Pick')]


    # Append the filtered data for the current ticker to the result DataFrame
    filtered_latest_two_days_df = pd.concat([filtered_latest_two_days_df, filtered_df], ignore_index=True)

# Display the tickers, dates, and recommendations
display(filtered_latest_two_days_df[['Date', 'Ticker', 'RSI', 'MACD', 'MACD_Signal', 'Williams_%R', 'cci', 'satisfied_conditions_count', 'Recommendation', 'Satisfied_Conditions_Description']])

In [None]:
combined_tick_df.count()

# Task
Send an email to "jonnadularohit@gmail.com" containing the shortlisted tickers from the `filtered_latest_two_days_df` DataFrame and their corresponding technical indicator plots, using the Gmail address and app password stored in Colab secrets.

## Access gmail credentials

### Subtask:
Securely retrieve the Gmail address and app password from Colab secrets.


**Reasoning**:
Access the Gmail address and app password from Colab secrets using the `google.colab` library.



# Task
Backtest the historical performance of the "Diamond Pick," "Golden Pick," and "Silver Pick" recommendations based on the indicator filtering logic in cell `0e47eb98`.

## Define backtesting period

### Subtask:
Determine the historical date range over which you want to perform the backtesting.


**Reasoning**:
Determine the earliest date after which all technical indicators are calculated in `combined_tick_df` to set the backtesting start date, and set the end date to the latest date in the data.



In [None]:
combined_tick_df[combined_tick_df["Date"] > "2024-01-01"].head()
print(combined_tick_df.groupby('Ticker')['Date'].min())

In [None]:
# # Determine the earliest date for each ticker where SMA200 is not NaN
# earliest_sma200_dates_per_ticker = combined_tick_df.dropna(subset=['SMA200', 'RSI', 'MFI', 'MACD', 'Williams_%R', 'cci']).groupby('Ticker')['Date'].min()

# # Display the earliest date for SMA200 for each ticker
# print("Earliest Date for SMA200 Availability per Ticker:")
# display(earliest_sma200_dates_per_ticker)

In [None]:
# # Determine the earliest date for each ticker after which all indicators have valid data
# # The SMA200 has the longest lookback period (200 days)
# # We need at least 200 data points for SMA200 to have a value
# earliest_dates_per_ticker_with_all_indicators = combined_tick_df.dropna(subset=['SMA200', 'RSI', 'MFI', 'MACD', 'Williams_%R', 'cci']).groupby('Ticker')['Date'].min()

# print("Earliest Date per Ticker with All Indicators Available:")
# display(earliest_dates_per_ticker_with_all_indicators)

# # Determine the overall backtesting start date (the latest of the earliest dates across all tickers)
# earliest_date_with_all_indicators = earliest_dates_per_ticker_with_all_indicators.max()


# # Determine the latest date in the data
# latest_date = combined_tick_df['Date'].max()

# print(f"\nBacktesting start date (latest of the above): {earliest_date_with_all_indicators.strftime('%Y-%m-%d')}")
# print(f"Backtesting end date: {latest_date.strftime('%Y-%m-%d')}")

# # Define the backtesting period
# backtesting_dates = combined_tick_df[(combined_tick_df['Date'] >= earliest_date_with_all_indicators) & (combined_tick_df['Date'] <= latest_date)]['Date'].unique()
# backtesting_dates = sorted(backtesting_dates)

In [None]:
# # Filter the Series to find tickers with earliest date in 2025
# tickers_with_earliest_date_in_2025 = earliest_dates_per_ticker_with_all_indicators[
#     (earliest_dates_per_ticker_with_all_indicators.dt.year == 2025)
# ]

# # Display the tickers and their earliest dates in 2025
# print("Tickers with Earliest Date (All Indicators Available) in 2025:")
# display(tickers_with_earliest_date_in_2025)

In [None]:
# Initialize a list to store historical picks
historical_picks = []

# Determine the overall latest date in the data (used as the end date for all tickers' backtesting)
overall_latest_date = combined_tick_df['Date'].max()

# Get the earliest date with all indicators available for each ticker (reusing logic from d1c20277)
earliest_dates_per_ticker_with_all_indicators = combined_tick_df.dropna(subset=['SMA200', 'RSI', 'MFI', 'MACD', 'Williams_%R', 'cci']).groupby('Ticker')['Date'].min()


# Iterate through each unique ticker
for ticker in combined_tick_df['Ticker'].unique():
    # Get the earliest date with all indicators available for the current ticker
    # Handle cases where a ticker might not have data for all indicators (though dropna in combined_tick_df should prevent this)
    if ticker in earliest_dates_per_ticker_with_all_indicators.index:
        ticker_backtesting_start_date = earliest_dates_per_ticker_with_all_indicators.loc[ticker]
    else:
        # If for some reason a ticker doesn't appear in the earliest_dates_per_ticker_with_all_indicators, skip it
        print(f"Skipping ticker {ticker}: No data with all indicators available.")
        continue


    # Filter data for the current ticker within its specific backtesting period
    ticker_historical_data = combined_tick_df[(combined_tick_df['Ticker'] == ticker) &
                                               (combined_tick_df['Date'] >= ticker_backtesting_start_date) &
                                               (combined_tick_df['Date'] <= overall_latest_date)].copy()

    # Sort by date to ensure correct previous day calculations
    ticker_historical_data = ticker_historical_data.sort_values(by='Date', ascending=True).reset_index(drop=True)


    # Apply the filtering logic and categorization for each day in the ticker's backtesting period
    # Iterate through the data for the current ticker, starting from the second day
    # to allow for previous day comparison
    for i in range(1, len(ticker_historical_data)):
        current_day_data = ticker_historical_data.iloc[i]
        previous_day_data = ticker_historical_data.iloc[i-1]
        current_date = current_day_data['Date']

        # Calculate previous day's values
        current_day_data['RSI_prev'] = previous_day_data['RSI']
        current_day_data['MACD_prev'] = previous_day_data['MACD']
        current_day_data['Williams_%R_prev'] = previous_day_data['Williams_%R']
        current_day_data['cci_prev'] = previous_day_data['cci']


        # Define the filtering conditions based on the logic in cell 0e47eb98
        # Ensure previous day data is not NaN for the comparison
        condition_rsi = (current_day_data['RSI'] >= 55) and \
                        (not pd.isna(current_day_data['RSI_prev'])) and \
                        (current_day_data['RSI'] > current_day_data['RSI_prev'])

        condition_williams = (current_day_data['Williams_%R'] >= -22) and \
                             (not pd.isna(current_day_data['Williams_%R_prev'])) and \
                             (current_day_data['Williams_%R'] > current_day_data['Williams_%R_prev'])

        condition_macd = ((current_day_data['MACD'] - current_day_data['MACD_Signal']) > 0) and \
                         (not pd.isna(current_day_data['MACD_prev'])) and \
                         (current_day_data['MACD'] > current_day_data['MACD_prev'])

        condition_cci = (current_day_data['cci'] > -200) and (current_day_data['cci'] < 200)


        # Count how many of the *three proposed* conditions are satisfied
        satisfied_conditions_count = int(condition_rsi) + \
                                       int(condition_williams) + \
                                       int(condition_macd)

        # Determine the recommendation based on the count (excluding CCI for categorization as per original logic)
        recommendation = 'None'
        if satisfied_conditions_count == 3:
            recommendation = 'Diamond Pick'
        elif satisfied_conditions_count == 2:
            recommendation = 'Golden Pick'
        elif satisfied_conditions_count == 1 and condition_rsi: # Silver Pick: Any 1 of the 3 proposed conditions satisfied, AND RSI >= 55
             recommendation = 'Silver Pick'
        # Re-evaluating Silver Pick logic based on cell 0e47eb98: "Any 1 of the 3 proposed conditions satisfied, AND RSI >= 55"
        # The original code in 0e47eb98 didn't check for RSI >= 55 *again* for the Silver Pick after counting satisfied conditions.
        # It seemed the intention was just based on the count of the 3 proposed conditions.
        # Let's stick to the count logic for categorization as it was implemented in 0e47eb98
        recommendation = 'None'
        if satisfied_conditions_count == 3:
            recommendation = 'Diamond Pick'
        elif satisfied_conditions_count == 2:
            recommendation = 'Golden Pick'
        elif satisfied_conditions_count == 1: # Silver Pick: Any 1 of the 3 proposed conditions satisfied
             recommendation = 'Silver Pick'


        # Create a description of satisfied conditions for this specific day's trigger
        conditions_met_description = []
        if condition_rsi:
             conditions_met_description.append('RSI >= 55 & Increasing')
        if condition_williams:
            conditions_met_description.append('Williams %R >= -22 & Increasing')
        if condition_macd:
            conditions_met_description.append('MACD > Signal & Increasing')
        if condition_cci: # Include CCI in description if it meets its condition
             conditions_met_description.append('CCI within thresholds')

        satisfied_conditions_text = ', '.join(conditions_met_description) if conditions_met_description else 'None'


        # Record the pick if it's not 'None'
        if recommendation != 'None':
            historical_picks.append({
                'Date': current_date,
                'Ticker': ticker,
                'Recommendation': recommendation,
                'satisfied_conditions_count': satisfied_conditions_count,
                'Close_Price_on_Pick_Date': current_day_data['Close'],
                'RSI': current_day_data['RSI'], # Include indicator values for context
                'MACD': current_day_data['MACD'],
                'MACD_Signal': current_day_data['MACD_Signal'],
                'Williams_%R': current_day_data['Williams_%R'],
                'cci': current_day_data['cci'],
                'Satisfied_Conditions_Description': satisfied_conditions_text
            })


# Convert the list of historical picks to a DataFrame
historical_picks_df = pd.DataFrame(historical_picks)

# Display the historical picks
print("Historical Picks (Ticker-Specific Backtesting):")
display(historical_picks_df.head())
print("\nTotal Historical Picks:")
display(historical_picks_df.shape)

# Task
Visualize the cumulative returns of each recommendation type based on the historical picks.

## Calculate returns for historical picks

### Subtask:
For each historical pick in `historical_picks_df`, determine the price change or return over a defined period following the pick date.


**Reasoning**:
Calculate the returns for each historical pick over a defined holding period by finding the closing price on the pick date and the price after the holding period.



In [None]:
# 1. Define the holding period
holding_period = 5 # Example: 5 trading days (approximately one week)

# Initialize columns for return and target date
historical_picks_df['Return'] = np.nan
historical_picks_df['Max_Price_in_Holding_Period'] = np.nan # New column for max price
historical_picks_df['Date_of_Max_Price'] = pd.NaT # New column for date of max price


# 2. Iterate through each historical pick to calculate returns
for index, row in historical_picks_df.iterrows():
    ticker = row['Ticker']
    pick_date = row['Date']
    price_on_pick_date = row['Close_Price_on_Pick_Date']

    # Calculate the end date for the holding period
    end_date_holding_period = pick_date + pd.Timedelta(days=holding_period)

    # Find the data for the specific ticker within the holding period (starting the day after the pick)
    holding_period_data = combined_tick_df[(combined_tick_df['Ticker'] == ticker) &
                                           (combined_tick_df['Date'] > pick_date) &
                                           (combined_tick_df['Date'] <= end_date_holding_period)].copy()

    # Ensure there is data in the holding period
    if not holding_period_data.empty:
        # Find the maximum 'High' price within the holding period
        max_price = holding_period_data['High'].max()
        # Find the date of the maximum price
        date_of_max_price = holding_period_data.loc[holding_period_data['High'].idxmax(), 'Date']


        # Calculate the return based on the maximum price, and multiply by 100 for percentage
        calculated_return = ((max_price - price_on_pick_date) / price_on_pick_date) * 100

        # Store the calculated return, max price, and date of max price
        historical_picks_df.loc[index, 'Return'] = calculated_return
        historical_picks_df.loc[index, 'Max_Price_in_Holding_Period'] = max_price
        historical_picks_df.loc[index, 'Date_of_Max_Price'] = date_of_max_price


# Display the updated DataFrame with returns and target dates
display(historical_picks_df.head())

In [None]:
# Analyze performance metrics for each recommendation type

# performance_metrics_by_recommendation = {}

# for recommendation_type in historical_picks_df['Recommendation'].unique():
#     category_picks = historical_picks_df[historical_picks_df['Recommendation'] == recommendation_type].copy()

#     if not category_picks.empty:
#         total_picks = len(category_picks)
#         winning_picks = category_picks[category_picks['Return'] > 0]
#         losing_picks = category_picks[category_picks['Return'] <= 0] # Consider 0 return as not a win

#         win_rate = len(winning_picks) / total_picks if total_picks > 0 else 0
#         loss_rate = len(losing_picks) / total_picks if total_picks > 0 else 0

#         average_winning_return = winning_picks['Return'].mean() if not winning_picks.empty else 0
#         average_losing_return = losing_picks['Return'].mean() if not losing_picks.empty else 0
#         average_total_return = category_picks['Return'].mean()


#         performance_metrics_by_recommendation[recommendation_type] = {
#             'Total Picks': total_picks,
#             'Winning Picks': len(winning_picks),
#             'Losing Picks': len(losing_picks),
#             'Win Rate (%)': win_rate * 100,
#             'Loss Rate (%)': loss_rate * 100,
#             'Average Winning Return (%)': average_winning_return, # Removed * 100
#             'Average Losing Return (%)': average_losing_return, # Removed * 100
#             'Average Total Return (%)': average_total_return # Removed * 100
#         }

# # Convert the performance metrics dictionary to a DataFrame for better display
# performance_metrics_by_recommendation_df = pd.DataFrame.from_dict(performance_metrics_by_recommendation, orient='index')

# # Display the performance metrics by recommendation type
# print("Performance Metrics by Recommendation Type:")
# display(performance_metrics_by_recommendation_df)


# Analyze performance metrics for each ticker
# performance_metrics_by_ticker = {}

# for ticker in historical_picks_df['Ticker'].unique():
#     ticker_picks = historical_picks_df[historical_picks_df['Ticker'] == ticker].copy()

#     if not ticker_picks.empty:
#         total_picks = len(ticker_picks)
#         winning_picks = ticker_picks[ticker_picks['Return'] > 0]
#         losing_picks = ticker_picks[ticker_picks['Return'] <= 0] # Consider 0 return as not a win

#         win_rate = len(winning_picks) / total_picks if total_picks > 0 else 0
#         loss_rate = len(losing_picks) / total_picks if total_picks > 0 else 0

#         average_winning_return = winning_picks['Return'].mean() if not winning_picks.empty else 0
#         average_losing_return = losing_picks['Return'].mean() if not losing_picks.empty else 0
#         average_total_return = ticker_picks['Return'].mean()


#         performance_metrics_by_ticker[ticker] = {
#             'Total Picks': total_picks,
#             'Winning Picks': len(winning_picks),
#             'Losing Picks': len(losing_picks),
#             'Win Rate (%)': win_rate * 100,
#             'Loss Rate (%)': loss_rate * 100,
#             'Average Winning Return (%)': average_winning_return, # Removed * 100
#             'Average Losing Return (%)': average_losing_return, # Removed * 100
#             'Average Total Return (%)': average_total_return # Removed * 100
#         }

# # Convert the performance metrics dictionary to a DataFrame for better display
# performance_metrics_by_ticker_df = pd.DataFrame.from_dict(performance_metrics_by_ticker, orient='index')

# # Display the performance metrics by ticker
# print("\nPerformance Metrics by Ticker:")
# display(performance_metrics_by_ticker_df)


# Analyze performance metrics grouped by Ticker and Recommendation Type
performance_metrics_by_ticker_recommendation = {}

# Add a 'Year' column to historical_picks_df for grouping
historical_picks_df['Year'] = historical_picks_df['Date'].dt.year

# Group by Ticker, Recommendation Type, and Year
grouped_performance = historical_picks_df.groupby(['Ticker', 'Recommendation', 'Year'])

for name, group in grouped_performance:
    ticker = name[0]
    recommendation_type = name[1]
    year = name[2]
    category_picks = group.copy() # Use the group as the category_picks

    if not category_picks.empty:
        total_picks = len(category_picks)
        winning_picks = category_picks[category_picks['Return'] > 0]
        losing_picks = category_picks[category_picks['Return'] <= 0] # Consider 0 return as not a win

        win_rate = len(winning_picks) / total_picks if total_picks > 0 else 0
        loss_rate = len(losing_picks) / total_picks if total_picks > 0 else 0

        average_winning_return = winning_picks['Return'].mean() if not winning_picks.empty else 0
        average_losing_return = losing_picks['Return'].mean() if not losing_picks.empty else 0
        average_total_return = category_picks['Return'].mean()

        # Store metrics using a tuple (ticker, recommendation_type, year) as the key
        performance_metrics_by_ticker_recommendation[(ticker, recommendation_type, year)] = {
            'Total Picks': total_picks,
            'Winning Picks': len(winning_picks),
            'Losing Picks': len(losing_picks),
            'Win Rate (%)': win_rate * 100,
            'Loss Rate (%)': loss_rate * 100,
            'Average Winning Return (%)': average_winning_return, # Removed * 100
            'Average Losing Return (%)': average_losing_return, # Removed * 100
            'Average Total Return (%)': average_total_return # Removed * 100
        }

# Convert the performance metrics dictionary to a DataFrame for better display
performance_metrics_by_ticker_recommendation_df = pd.DataFrame.from_dict(performance_metrics_by_ticker_recommendation, orient='index')

# Rename the index for clarity
performance_metrics_by_ticker_recommendation_df.index.names = ['Ticker', 'Recommendation', 'Year']


# Display the performance metrics grouped by Ticker and Recommendation Type
print("\nPerformance Metrics by Ticker, Recommendation Type, and Year:")
display(performance_metrics_by_ticker_recommendation_df)

## Backtesting Analysis for New Signal Conditions

### Subtask:
Perform backtesting analysis for `signal_1`, `signal_2`, and `signal_3` conditions and calculate the winning percentage for each signal.

**Reasoning**:
Iterate through historical dates, apply the new complex signal conditions, record triggers, calculate returns after triggers, and summarize winning percentages per signal over the backtesting period.

In [None]:
# Define the holding period for performance analysis after a signal trigger
holding_period_signals = 5 # Example: 5 trading days

# Initialize lists to store signal triggers and their performance
signal_triggers = {
    'signal_1': [],
    'signal_2': [],
    'signal_3': []
}

# Determine the overall latest date in the data
overall_latest_date = combined_tick_df['Date'].max()

# Get the earliest date with all indicators available for each ticker (reusing logic from d1c20277)
earliest_dates_per_ticker_with_all_indicators = combined_tick_df.dropna(subset=['SMA200', 'RSI', 'MFI', 'MACD', 'Williams_%R', 'cci']).groupby('Ticker')['Date'].min()


# Iterate through each unique ticker
for ticker in combined_tick_df['Ticker'].unique():
    # Get the earliest date with all indicators available for the current ticker
    if ticker in earliest_dates_per_ticker_with_all_indicators.index:
        ticker_backtesting_start_date = earliest_dates_per_ticker_with_all_indicators.loc[ticker]
    else:
        # If for some reason a ticker doesn't appear in the earliest_dates_per_ticker_with_all_indicators, skip it
        print(f"Skipping ticker {ticker}: No data with all indicators available for backtesting new signals.")
        continue

    # Filter data for the current ticker within its specific backtesting period
    ticker_historical_data = combined_tick_df[(combined_tick_df['Ticker'] == ticker) &
                                               (combined_tick_df['Date'] >= ticker_backtesting_start_date) &
                                               (combined_tick_df['Date'] <= overall_latest_date)].copy()

    # Sort by date to ensure correct previous day calculations
    ticker_historical_data = ticker_historical_data.sort_values(by='Date', ascending=True).reset_index(drop=True)


    # Iterate through the data for the current ticker, starting from the second day
    # to allow for previous day comparison and rolling calculations
    for i in range(1, len(ticker_historical_data)):
        current_day_data = ticker_historical_data.iloc[i]
        previous_day_data = ticker_historical_data.iloc[i-1]
        current_date = current_day_data['Date']

        # Ensure previous day data is not NaN for comparison
        if pd.isna(previous_day_data['RSI']) or pd.isna(previous_day_data['MACD']) or pd.isna(previous_day_data['Williams_%R']) or pd.isna(previous_day_data['cci']):
             continue # Skip if previous day indicators are not available

        # Calculate rolling 20-day volume mean up to the current date
        # Ensure there are enough data points for the rolling mean (at least 20 including current day)
        if i >= 19: # Need at least 20 data points (index 0 to 19) for rolling window 20
             rolling_vol_mean_20 = ticker_historical_data.loc[i-19:i, 'Volume'].mean() # Calculate rolling mean up to current day
        else:
             rolling_vol_mean_20 = np.nan # Not enough data for rolling mean


        # Define the conditions based on the user's signal definitions
        rsi = current_day_data['RSI']
        williams_r = current_day_data['Williams_%R']
        macd = current_day_data['MACD']
        macd_signal = current_day_data['MACD_Signal']
        cci = current_day_data['cci']
        obv = current_day_data['OBV']
        prev_obv = previous_day_data['OBV']
        volume = current_day_data['Volume']


        # Define signal_1
        signal_1_triggered = (
            (55 <= rsi <= 60) and
            (-22 <= williams_r <= -16) and
            (macd > macd_signal) and
            (macd > 0) and (
                (-200 < cci < 200) or # Changed from and to or
                (obv > prev_obv) or # Changed from and to or
                (volume > 1.5 * rolling_vol_mean_20 if not pd.isna(rolling_vol_mean_20) else False) # Handle potential NaN in rolling mean
            )
        )

        # Define signal_2
        signal_2_triggered = (
            (60 < rsi <= 65) and
            (-20 < williams_r <= -9) and # Adjusted range based on user's definition
            (macd > macd_signal) and
            (macd > 0) and (
                (-200 < cci < 200) or # Changed from and to or
                (obv > prev_obv) or # Changed from and to or
                (volume > 1.5 * rolling_vol_mean_20 if not pd.isna(rolling_vol_mean_20) else False) # Handle potential NaN in rolling mean
             )
        )

        # Define signal_3
        signal_3_triggered = (
            (65 < rsi <= 70) and
            (-16 < williams_r <= -2) and # Adjusted range based on user's definition
            (macd > macd_signal) and
            (macd > 0) and (
                (-200 < cci < 200) or # Changed from and to or
                (obv > prev_obv) or # Changed from and to or
                (volume > 1.5 * rolling_vol_mean_20 if not pd.isna(rolling_vol_mean_20) else False) # Handle potential NaN in rolling mean
            )
        )


        # Record triggered signals and calculate performance
        if signal_1_triggered:
            trigger_info = {
                'Date': current_date,
                'Ticker': ticker,
                'Close_Price_on_Trigger_Date': current_day_data['Close']
            }
            # Calculate performance for this trigger
            price_on_trigger_date = trigger_info['Close_Price_on_Trigger_Date']
            # Find the data for the specific ticker within the holding period (starting the day after the trigger)
            holding_period_data = combined_tick_df[(combined_tick_df['Ticker'] == ticker) &
                                                    (combined_tick_df['Date'] > current_date) &
                                                    (combined_tick_df['Date'] <= current_date + pd.Timedelta(days=holding_period_signals))].copy()

            winning_trigger = False
            if not holding_period_data.empty:
                # Find the maximum 'High' price within the holding period
                max_price = holding_period_data['High'].max()
                # Check if the maximum price is higher than the trigger price (positive return)
                if max_price > price_on_trigger_date:
                    winning_trigger = True

            trigger_info['Winning_Trigger'] = winning_trigger
            signal_triggers['signal_1'].append(trigger_info)


        if signal_2_triggered:
             trigger_info = {
                'Date': current_date,
                'Ticker': ticker,
                'Close_Price_on_Trigger_Date': current_day_data['Close']
            }
             # Calculate performance for this trigger
             price_on_trigger_date = trigger_info['Close_Price_on_Trigger_Date']
             holding_period_data = combined_tick_df[(combined_tick_df['Ticker'] == ticker) &
                                                    (combined_tick_df['Date'] > current_date) &
                                                    (combined_tick_df['Date'] <= current_date + pd.Timedelta(days=holding_period_signals))].copy()
             winning_trigger = False
             if not holding_period_data.empty:
                max_price = holding_period_data['High'].max()
                if max_price > price_on_trigger_date:
                    winning_trigger = True

             trigger_info['Winning_Trigger'] = winning_trigger
             signal_triggers['signal_2'].append(trigger_info)


        if signal_3_triggered:
             trigger_info = {
                'Date': current_date,
                'Ticker': ticker,
                'Close_Price_on_Trigger_Date': current_day_data['Close']
            }
             # Calculate performance for this trigger
             price_on_trigger_date = trigger_info['Close_Price_on_Trigger_Date']
             holding_period_data = combined_tick_df[(combined_tick_df['Ticker'] == ticker) &
                                                    (combined_tick_df['Date'] > current_date) &
                                                    (combined_tick_df['Date'] <= current_date + pd.Timedelta(days=holding_period_signals))].copy()
             winning_trigger = False
             if not holding_period_data.empty:
                max_price = holding_period_data['High'].max()
                if max_price > price_on_trigger_date:
                    winning_trigger = True

             trigger_info['Winning_Trigger'] = winning_trigger
             signal_triggers['signal_3'].append(trigger_info)


# Convert signal triggers lists to DataFrames
signal_triggers_df = {}
for signal, triggers in signal_triggers.items():
    signal_triggers_df[signal] = pd.DataFrame(triggers)

# Calculate performance (winning percentage) for each signal, grouped by ticker
signal_performance_by_ticker = {}

for signal, df in signal_triggers_df.items():
    signal_performance_by_ticker[signal] = {} # Initialize nested dictionary for this signal

    if not df.empty:
        # Group triggers by ticker for this signal
        grouped_triggers = df.groupby('Ticker')

        for ticker, ticker_triggers_df in grouped_triggers:
            total_triggers = len(ticker_triggers_df)
            winning_triggers = ticker_triggers_df['Winning_Trigger'].sum()

            # Calculate winning percentage for this ticker and signal
            winning_percentage = (winning_triggers / total_triggers) * 100 if total_triggers > 0 else 0

            # Store performance for this ticker and signal
            signal_performance_by_ticker[signal][ticker] = {
                'Total Triggers': total_triggers,
                'Winning Triggers (based on Max Price)': winning_triggers,
                'Winning Percentage (%)': winning_percentage
            }
    else:
         # If no triggers for this signal, add empty entry for each ticker that has enough data for backtesting
        tickers_with_data = earliest_dates_per_ticker_with_all_indicators.index.tolist()
        for ticker in tickers_with_data:
             signal_performance_by_ticker[signal][ticker] = {
                'Total Triggers': 0,
                'Winning Triggers (based on Max Price)': 0,
                'Winning Percentage (%)': 0
            }


# Convert the nested dictionary to a DataFrame for display
# Create a list of tuples for the MultiIndex
index_tuples = [(signal, ticker) for signal, ticker_data in signal_performance_by_ticker.items() for ticker in ticker_data.keys()]
# Create a list of dictionaries for the data
data_list = [metrics for signal, ticker_data in signal_performance_by_ticker.items() for metrics in ticker_data.values()]

if index_tuples:
    signal_performance_by_ticker_df = pd.DataFrame(data_list, index=pd.MultiIndex.from_tuples(index_tuples, names=['Signal', 'Ticker']))
else:
    # Create an empty DataFrame with the correct columns if no triggers were found for any signal/ticker
    signal_performance_by_ticker_df = pd.DataFrame(columns=['Total Triggers', 'Winning Triggers (based on Max Price)', 'Winning Percentage (%)'])


# Display the winning percentages for each signal, grouped by ticker
print("Winning Percentage for New Signal Conditions (Grouped by Ticker):")
display(signal_performance_by_ticker_df)

## Identify Latest New Signals Triggered by Shortlisted Tickers

### Subtask:
Check which of the new signal conditions (`signal_1`, `signal_2`, `signal_3`) were triggered by the shortlisted tickers on the latest date.

**Reasoning**:
Iterate through the shortlisted tickers from `filtered_latest_two_days_df`, find the latest date for each, and apply the new signal conditions to determine which signals were triggered on that specific date.

In [None]:
# Dictionary to store which new signals were triggered for each shortlisted ticker on the latest date
latest_new_signals_for_shortlisted = {}

if not filtered_latest_two_days_df.empty:
    for ticker in filtered_latest_two_days_df['Ticker'].unique():
        # Get the latest data point for this shortlisted ticker from the filtered DataFrame
        latest_filtered_data = filtered_latest_two_days_df[filtered_latest_two_days_df['Ticker'] == ticker].iloc[-1]
        latest_date = latest_filtered_data['Date']

        # Find the corresponding data for the latest date and the previous day from the full combined_tick_df
        # This ensures we have enough historical data for indicators and previous day comparisons
        ticker_full_data_up_to_latest_date = combined_tick_df[(combined_tick_df['Ticker'] == ticker) &
                                                              (combined_tick_df['Date'] <= latest_date)].copy()

        if len(ticker_full_data_up_to_latest_date) > 1: # Need at least two days for previous day comparison
            current_day_data = ticker_full_data_up_to_latest_date.iloc[-1]
            previous_day_data = ticker_full_data_up_to_latest_date.iloc[-2]

            # Calculate rolling 20-day volume mean up to the latest date
            # Ensure there are enough data points for the rolling mean
            if len(ticker_full_data_up_to_latest_date) >= 20: # Need at least 20 data points including the current day
                 rolling_vol_mean_20 = ticker_full_data_up_to_latest_date['Volume'].rolling(window=20).mean().iloc[-1]
            else:
                 rolling_vol_mean_20 = np.nan # Not enough data for rolling mean


            # Define the conditions based on the user's signal definitions (replicated from de73b6d9)
            rsi = current_day_data['RSI']
            williams_r = current_day_data['Williams_%R']
            macd = current_day_data['MACD']
            macd_signal = current_day_data['MACD_Signal']
            cci = current_day_data['cci']
            obv = current_day_data['OBV']
            prev_obv = previous_day_data['OBV']
            volume = current_day_data['Volume']


            # Define signal_1, signal_2, signal_3 triggered on the latest day (using corrected logic with ORs)
            signal_1_triggered = (
                (55 <= rsi <= 60) and
                (-22 <= williams_r <= -16) and
                (macd > macd_signal) and
                (macd > 0) and (
                    (-200 < cci < 200) or
                    (obv > prev_obv) or
                    (volume > 1.5 * rolling_vol_mean_20 if not np.isnan(rolling_vol_mean_20) else False) # Handle potential NaN in rolling mean
                )
            )

            signal_2_triggered = (
                (60 < rsi <= 65) and
                (-20 < williams_r <= -9) and
                (macd > macd_signal) and
                (macd > 0) and (
                    (-200 < cci < 200) or
                    (obv > prev_obv) or
                    (volume > 1.5 * rolling_vol_mean_20 if not np.isnan(rolling_vol_mean_20) else False) # Handle potential NaN in rolling mean
                 )
            )

            signal_3_triggered = (
                (65 < rsi <= 70) and
                (-16 < williams_r <= -2) and
                (macd > macd_signal) and
                (macd > 0) and (
                    (-200 < cci < 200) or
                    (obv > prev_obv) or
                    (volume > 1.5 * rolling_vol_mean_20 if not np.isnan(rolling_vol_mean_20) else False) # Handle potential NaN in rolling mean
                )
            )


            triggered_signals_list = []
            if signal_1_triggered:
                triggered_signals_list.append('Signal 1')
            if signal_2_triggered:
                triggered_signals_list.append('Signal 2')
            if signal_3_triggered:
                triggered_signals_list.append('Signal 3')

            latest_new_signals_for_shortlisted[ticker] = ', '.join(triggered_signals_list) if triggered_signals_list else 'None of the new signals triggered'
        else:
            latest_new_signals_for_shortlisted[ticker] = 'Not enough data for signal check'

# Display the latest new signals triggered for each shortlisted ticker
print("Latest New Signals Triggered by Shortlisted Tickers:")
if latest_new_signals_for_shortlisted:
    for ticker, signals in latest_new_signals_for_shortlisted.items():
        print(f"{ticker}: {signals}")
else:
    print("No shortlisted tickers found or not enough data to check signals.")

## Calculate daily returns for each category

### Subtask:
Aggregate the returns of the picks for each day and each recommendation type to get daily portfolio returns for each category.


**Reasoning**:
Aggregate the returns by date and recommendation type and pivot the data for cumulative return calculation.



In [None]:
# # Group by Date and Recommendation and calculate the mean return
# daily_returns = historical_picks_df.groupby(['Date', 'Recommendation'])['Return'].mean().reset_index()

# # Pivot the DataFrame to have Dates as index and Recommendation types as columns
# daily_portfolio_returns = daily_returns.pivot(index='Date', columns='Recommendation', values='Return')

# # Fill missing values with 0
# daily_portfolio_returns = daily_portfolio_returns.fillna(0)

# # Sort the DataFrame by date
# daily_portfolio_returns = daily_portfolio_returns.sort_index()

# # Display the daily portfolio returns
# display(daily_portfolio_returns.head())
# display(daily_portfolio_returns.shape)

## Calculate cumulative returns

### Subtask:
Calculate the cumulative returns for each recommendation type over the backtesting period.


**Reasoning**:
Calculate the cumulative returns for each recommendation category by adding 1 to the daily return and then calculating the cumulative product.



In [None]:
# # Calculate cumulative returns
# cumulative_returns = (daily_portfolio_returns + 1).cumprod()

# # Display the head of the cumulative returns DataFrame
# display(cumulative_returns.head())

## Visualize cumulative returns

### Subtask:
Visualize the cumulative returns of each recommendation type based on the historical picks.


**Reasoning**:
Create a line plot of the cumulative returns of each recommendation type.



In [None]:
# plt.figure(figsize=(12, 6))
# plt.plot(cumulative_returns.index, cumulative_returns['Diamond Pick'], label='Diamond Pick')
# plt.plot(cumulative_returns.index, cumulative_returns['Golden Pick'], label='Golden Pick')
# plt.plot(cumulative_returns.index, cumulative_returns['Silver Pick'], label='Silver Pick')
# plt.title('Cumulative Returns of Recommendation Types')
# plt.xlabel('Date')
# plt.ylabel('Cumulative Return')
# plt.legend()
# plt.grid(True)
# plt.show()

## Generate charts and save them as image files

### Subtask:
Modify the plotting code to save each combined technical indicator chart as a temporary image file.


**Reasoning**:
Modify the existing plotting code to save each combined technical indicator chart as a temporary image file and store the filenames for later use.



In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import os

# Convert 'Date' column to datetime objects if not already
combined_tick_df['Date'] = pd.to_datetime(combined_tick_df['Date'])

# Get the list of unique tickers from the filtered latest two days dataframe
# This ensures we only plot for tickers that met the initial filtering criteria
unique_shortlisted_tickers = filtered_latest_two_days_df['Ticker'].unique()

# List to store temporary plot filenames
temp_plot_files = []

# Plot for each shortlisted ticker
indicators = ['RSI', 'MFI', 'MACD', 'Williams_%R', 'cci', 'OBV', 'AD_Line']
num_indicators = len(indicators)

for ticker in unique_shortlisted_tickers:
    # Find the latest date entry for the current ticker in the filtered DataFrame
    latest_date_entry = filtered_latest_two_days_df[filtered_latest_two_days_df['Ticker'] == ticker].iloc[-1]
    date_of_signal = latest_date_entry['Date'].strftime('%Y-%m-%d')

    # Find the corresponding full data for plotting from the combined_tick_df
    ticker_plot_df = combined_tick_df[combined_tick_df['Ticker'] == ticker].copy()

    # Create a figure with subplots for SMA and Technical Indicators
    # Adjust figure size
    fig, axes = plt.subplots(nrows=num_indicators + 1, ncols=1, figsize=(12, 2 * (num_indicators + 1)), sharex=True)
    fig.suptitle(f'Technical Analysis ({date_of_signal})', y=1.02) # Add a title for the entire figure

    # Add ticker name to the top left
    fig.text(0.01, 0.99, ticker, # Position in the top left of the figure
             horizontalalignment='left', verticalalignment='top', transform=fig.transFigure,
             fontsize=12, fontweight='bold')


    # Plot SMA on the top subplot
    ax_sma = axes[0]
    ax_sma.plot(ticker_plot_df['Date'], ticker_plot_df['Close'] , label='Close Price') # Display actual price
    ax_sma.plot(ticker_plot_df['Date'], ticker_plot_df['SMA20'] , label='SMA 20')
    ax_sma.plot(ticker_plot_df['Date'], ticker_plot_df['SMA50'] , label='SMA 50')
    ax_sma.plot(ticker_plot_df['Date'], ticker_plot_df['SMA200'] , label='SMA 200')

    ax_sma.set_ylabel('Price') # Changed ylabel to Price
    ax_sma.set_title('Stock Price and SMAs')
    ax_sma.grid(True)

    # Display current values of Close and SMAs within the subplot (aligned right)
    current_close = ticker_plot_df['Close'].iloc[-1]
    current_sma20 = ticker_plot_df['SMA20'].iloc[-1]
    current_sma50 = ticker_plot_df['SMA50'].iloc[-1]
    current_sma200 = ticker_plot_df['SMA200'].iloc[-1]
    text_to_display_sma = f'Current:\nClose: {current_close:.2f}\nSMA20: {current_sma20:.2f}\nSMA50: {current_sma50:.2f}\nSMA200: {current_sma200:.2f}'
    ax_sma.text(1.01, 0.5, text_to_display_sma, # Position to the right of the plot area
             horizontalalignment='left', verticalalignment='center', transform=ax_sma.transAxes,
             bbox=dict(boxstyle='round,pad=0.5', fc='wheat', alpha=0.5))
    ax_sma.legend(loc='upper left', bbox_to_anchor=(0, 1.02)) # Move legend to top left inside plot


    # Plot Technical Indicators on the subsequent subplots
    for i, indicator in enumerate(indicators):
        ax = axes[i + 1] # Start from the second subplot
        ax.plot(ticker_plot_df['Date'], ticker_plot_df[indicator], label=indicator)

        # Add threshold lines and shaded areas based on the indicator
        if indicator == 'MACD':
            ax.axhline(0, color='red', linestyle='--')
            # Shade area above zero
            ax.fill_between(ticker_plot_df['Date'], ticker_plot_df[indicator], 0, where=(ticker_plot_df[indicator] > 0), color='green', alpha=0.3, interpolate=True)
            # Shade area below zero
            ax.fill_between(ticker_plot_df['Date'], ticker_plot_df[indicator], 0, where=(ticker_plot_df[indicator] < 0), color='red', alpha=0.3, interpolate=True)

            # Overlay MACD Signal
            ax.plot(ticker_plot_df['Date'], ticker_plot_df['MACD_Signal'], label='MACD Signal', linestyle='--')

            # Display current values and difference within the subplot (aligned right)
            current_macd = ticker_plot_df['MACD'].iloc[-1]
            current_signal = ticker_plot_df['MACD_Signal'].iloc[-1]
            macd_diff = current_macd - current_signal
            text_to_display_corner = f'Current:\nMACD: {current_macd:.2f}\nSignal: {current_signal:.2f}\nDiff: {macd_diff:.2f}'
            ax.text(1.01, 0.5, text_to_display_corner, # Position to the right of the plot area
                     horizontalalignment='left', verticalalignment='center', transform=ax.transAxes,
                     bbox=dict(boxstyle='round,pad=0.5', fc='wheat', alpha=0.5))


        elif indicator == 'Williams_%R':
            threshold_upper = -20
            threshold_lower = -80
            ax.axhline(threshold_upper, color='red', linestyle='--')
            ax.axhline(threshold_lower, color='red', linestyle='--')
            # Shade area above -20
            ax.fill_between(ticker_plot_df['Date'], ticker_plot_df[indicator], threshold_upper, where=(ticker_plot_df[indicator] > threshold_upper), color='red', alpha=0.3, interpolate=True)
            # Shade area below -80
            ax.fill_between(ticker_plot_df['Date'], ticker_plot_df[indicator], threshold_lower, where=(ticker_plot_df[indicator] < threshold_lower), color='green', alpha=0.3, interpolate=True)

            # Display current value within the subplot (aligned right)
            current_value = ticker_plot_df[indicator].iloc[-1]
            ax.text(1.01, 0.5, f'Current {indicator}: {current_value:.2f}', # Position to the right of the plot area
                     horizontalalignment='left', verticalalignment='center', transform=ax.transAxes,
                     bbox=dict(boxstyle='round,pad=0.5', fc='wheat', alpha=0.5))

            # Add threshold labels within the subplot (aligned right), vertically aligned with lines
            ax.text(1.01, threshold_upper, f'Upper: {threshold_upper}', # Position to the right of the plot area, aligned with line
                     horizontalalignment='left', verticalalignment='center', transform=ax.get_yaxis_transform(), fontsize=9) # Use yaxis transform for data alignment
            ax.text(1.01, threshold_lower, f'Lower: {threshold_lower}', # Position to the right of the plot area, aligned with line
                     horizontalalignment='left', verticalalignment='center', transform=ax.get_yaxis_transform(), fontsize=9) # Use yaxis transform for data alignment


        elif indicator == 'RSI':
            threshold_upper = 70
            threshold_lower = 30
            ax.axhline(threshold_upper, color='red', linestyle='--')
            ax.axhline(threshold_lower, color='red', linestyle='--')
            # Shade area above 70
            ax.fill_between(ticker_plot_df['Date'], ticker_plot_df[indicator], threshold_upper, where=(ticker_plot_df[indicator] > threshold_upper), color='red', alpha=0.3, interpolate=True)
            # Shade area below 30
            ax.fill_between(ticker_plot_df['Date'], ticker_plot_df[indicator], threshold_lower, where=(ticker_plot_df[indicator] < threshold_lower), color='green', alpha=0.3, interpolate=True)
            current_value = ticker_plot_df[indicator].iloc[-1]
            ax.text(1.01, 0.5, f'Current {indicator}: {current_value:.2f}', # Position to the right of the plot area
                     horizontalalignment='left', verticalalignment='center', transform=ax.transAxes,
                     bbox=dict(boxstyle='round,pad=0.5', fc='wheat', alpha=0.5))
            ax.text(1.01, threshold_upper, f'Upper: {threshold_upper}', # Position to the right of the plot area, aligned with line
                     horizontalalignment='left', verticalalignment='center', transform=ax.get_yaxis_transform(), fontsize=9) # Use yaxis transform for data alignment
            ax.text(1.01, threshold_lower, f'Lower: {threshold_lower}', # Position to the right of the plot area, aligned with line
                     horizontalalignment='left', verticalalignment='center', transform=ax.get_yaxis_transform(), fontsize=9) # Use yaxis transform for data alignment


        elif indicator == 'MFI':
            threshold_upper = 80
            threshold_lower = 20
            ax.axhline(threshold_upper, color='red', linestyle='--')
            ax.axhline(threshold_lower, color='red', linestyle='--')
            # Shade area above 80
            ax.fill_between(ticker_plot_df['Date'], ticker_plot_df[indicator], threshold_upper, where=(ticker_plot_df[indicator] > threshold_upper), color='red', alpha=0.3, interpolate=True)
            # Shade area below 20
            ax.fill_between(ticker_plot_df['Date'], ticker_plot_df[indicator], threshold_lower, where=(ticker_plot_df[indicator] < threshold_lower), color='green', alpha=0.3, interpolate=True)
            current_value = ticker_plot_df[indicator].iloc[-1]
            ax.text(1.01, 0.5, f'Current {indicator}: {current_value:.2f}', # Position to the right of the plot area
                     horizontalalignment='left', verticalalignment='center', transform=ax.transAxes,
                     bbox=dict(boxstyle='round,pad=0.5', fc='wheat', alpha=0.5))
            ax.text(1.01, threshold_upper, f'Upper: {threshold_upper}', # Position to the right of the plot area, aligned with line
                     horizontalalignment='left', verticalalignment='center', transform=ax.get_yaxis_transform(), fontsize=9) # Use yaxis transform for data alignment
            ax.text(1.01, threshold_lower, f'Lower: {threshold_lower}', # Position to the right of the plot area, aligned with line
                     horizontalalignment='left', verticalalignment='center', transform=ax.get_yaxis_transform(), fontsize=9) # Use yaxis transform for data alignment


        elif indicator == 'cci':
            threshold_upper = 200
            threshold_lower = -200
            ax.axhline(threshold_upper, color='red', linestyle='--')
            ax.axhline(threshold_lower, color='red', linestyle='--')
            # Shade area above 200
            ax.fill_between(ticker_plot_df['Date'], ticker_plot_df[indicator], threshold_upper, where=(ticker_plot_df[indicator] > threshold_upper), color='red', alpha=0.3, interpolate=True)
            # Shade area below -200
            ax.fill_between(ticker_plot_df['Date'], ticker_plot_df[indicator], threshold_lower, where=(ticker_plot_df[indicator] < threshold_lower), color='green', alpha=0.3, interpolate=True)
            current_value = ticker_plot_df[indicator].iloc[-1]
            ax.text(1.01, 0.5, f'Current {indicator}: {current_value:.2f}', # Position to the right of the plot area
                     horizontalalignment='left', verticalalignment='center', transform=ax.transAxes,
                     bbox=dict(boxstyle='round,pad=0.5', fc='wheat', alpha=0.5))
            ax.text(1.01, threshold_upper, f'Upper: {threshold_upper}', # Position to the right of the plot area, aligned with line
                     horizontalalignment='left', verticalalignment='center', transform=ax.get_yaxis_transform(), fontsize=9) # Use yaxis transform for data alignment
            ax.text(1.01, threshold_lower, f'Lower: {threshold_lower}', # Position to the right of the plot area, aligned with line
                     horizontalalignment='left', verticalalignment='center', transform=ax.get_yaxis_transform(), fontsize=9) # Use yaxis transform for data alignment


        elif indicator == 'OBV':
             # Display current value within the subplot (aligned right)
            current_value = ticker_plot_df[indicator].iloc[-1]
            ax.text(1.01, 0.5, f'Current {indicator}: {current_value:.2f}', # Position to the right of the plot area
                     horizontalalignment='left', verticalalignment='center', transform=ax.transAxes,
                     bbox=dict(boxstyle='round,pad=0.5', fc='wheat', alpha=0.5))


        elif indicator == 'AD_Line':
             # Display current value within the subplot (aligned right)
            current_value = ticker_plot_df[indicator].iloc[-1]
            ax.text(1.01, 0.5, f'Current {indicator}: {current_value:.2f}', # Position to the right of the plot area
                     horizontalalignment='left', verticalalignment='center', transform=ax.transAxes,
                     bbox=dict(boxstyle='round,pad=0.5', fc='wheat', alpha=0.5))


        ax.set_ylabel(indicator)
        ax.legend(loc='upper left', bbox_to_anchor=(0, 1.02)) # Move legend to top left inside plot


        # Format x-axis to show months for the bottom subplot
        if i == num_indicators - 1:
            formatter = mdates.DateFormatter('%Y-%m')
            ax.xaxis.set_major_formatter(formatter)
            plt.xticks(rotation=45)
        else:
            ax.tick_params(labelbottom=False) # Hide x-axis labels for upper subplots

        ax.grid(True) # Keep grid


    plt.tight_layout() # Use default tight_layout


    # Save the combined plot to a temporary file
    combined_plot_filename = f'{ticker}_technical_analysis_plot.png'
    plt.savefig(combined_plot_filename)
    plt.close(fig) # Close the figure to free up memory
    temp_plot_files.append(combined_plot_filename) # Add to list for cleanup

print(f"Generated and saved {len(temp_plot_files)} temporary plot files.")

**Reasoning**:
Create a new folder in Google Drive with the current date as the folder name to store the technical analysis plots.



In [None]:
from google.colab import drive
from google.colab import userdata
import os
from datetime import datetime

# Mount Google Drive if not already mounted
# This is already done in a previous cell, so we just assume it's mounted at /content/drive

# Define the base target directory in Google Drive
drive_base_path = '/content/drive/MyDrive/stock_analysis_plots' # Adjust this path as needed

# Create a date-stamped folder name
date_today = datetime.now().strftime('%Y-%m-%d')
drive_date_folder = os.path.join(drive_base_path, date_today)

# Create the date directory if it doesn't exist
os.makedirs(drive_date_folder, exist_ok=True)

# Define and create subfolders for each recommendation type within the date folder
recommendation_types = ['Diamond Pick', 'Golden Pick', 'Silver Pick']
drive_subfolders = {}
for rec_type in recommendation_types:
    # Create a folder name that is filesystem-friendly (replace spaces with underscores)
    folder_name = rec_type.replace(' ', '_')
    subfolder_path = os.path.join(drive_date_folder, folder_name)
    os.makedirs(subfolder_path, exist_ok=True)
    drive_subfolders[rec_type] = subfolder_path # Store the path mapped to the original recommendation type

print(f"Google Drive target date folder created: {drive_date_folder}")
print("Google Drive subfolders for recommendations created.")

**Reasoning**:
Copy the temporary plot files to the newly created Google Drive folder.



In [None]:
import shutil
import pandas as pd # Import pandas to access filtered_latest_two_days_df

# Assuming filtered_latest_two_days_df is available from a previous cell and contains 'Ticker' and 'Recommendation'

# Copy each temporary plot file to the appropriate Google Drive subfolder
for plot_file in temp_plot_files:
    # Extract ticker name from the filename (assuming filename is in the format "TICKER_...")
    ticker = plot_file.split('_')[0]

    # Find the recommendation for this ticker from the filtered_latest_two_days_df
    # We need to find the latest recommendation for the ticker
    ticker_latest_recommendation_row = filtered_latest_two_days_df[filtered_latest_two_days_df['Ticker'] == ticker].tail(1)

    if not ticker_latest_recommendation_row.empty:
        recommendation = ticker_latest_recommendation_row['Recommendation'].iloc[0]

        # Get the target subfolder path based on the recommendation
        # Use the dictionary created in the previous cell
        if recommendation in drive_subfolders:
            target_subfolder = drive_subfolders[recommendation]
            shutil.copy(plot_file, target_subfolder)
            print(f"Copied {plot_file} to {target_subfolder}")
        else:
            print(f"Warning: Could not find a matching subfolder for recommendation '{recommendation}' for ticker {ticker}. Skipping copy for this plot.")
    else:
        print(f"Warning: Could not find recommendation data for ticker {ticker} in filtered_latest_two_days_df. Skipping copy for this plot.")


print("All temporary plot files copied to Google Drive subfolders.")

**Reasoning**:
Generate a shareable link for the newly created Google Drive folder containing the plots and include this link in the email report.



**Reasoning**:
The previous command failed because `receiver_email` was not defined. Define the `receiver_email` variable and retry sending the email with the shortlisted tickers report, performance metrics, new signal analysis, and the Google Drive folder reference, including the cleanup of temporary plot files.



In [None]:
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import os
from datetime import datetime

# Email details
sender_email = gmail_address
sender_password = gmail_app_password
receiver_email = gmail_address # Define the receiver email here
subject = "Performance Reports for Shortlisted Tickers" # Updated subject

# Prepare the email body with the shortlisted tickers data and performance metrics
email_body = "" # Initialize email body

if not filtered_latest_two_days_df.empty:
    shortlisted_tickers_list = filtered_latest_two_days_df['Ticker'].unique().tolist()

    # Add the shortlisted tickers data (Diamond, Golden, Silver Picks)
    email_body += "<h2>Shortlisted Stock Tickers (Diamond, Golden, and Silver Picks)</h2>\n"
    email_body += "<p>Here are the stock tickers that met the filtering criteria for the latest trading sessions:</p>\n"
    email_body += filtered_latest_two_days_df[['Date', 'Ticker', 'Recommendation', 'satisfied_conditions_count', 'Satisfied_Conditions_Description']].to_html(index=False)
    email_body += "\n<br>\n" # Add a break


    # Add the latest new signals triggered by shortlisted tickers
    if 'latest_new_signals_for_shortlisted' in locals() and latest_new_signals_for_shortlisted:
        email_body += "<h2>Latest New Signals Triggered by Shortlisted Tickers</h2>\n"
        email_body += "<p>For the latest trading session, the following new signals were triggered by the shortlisted tickers:</p>\n"
        email_body += "<ul>\n"
        for ticker, signals in latest_new_signals_for_shortlisted.items():
            email_body += f"<li><b>{ticker}:</b> {signals}</li>\n"
        email_body += "</ul>\n"
        email_body += "\n<br>\n" # Add a break


    if 'performance_metrics_by_ticker_recommendation_df' in locals() and not performance_metrics_by_ticker_recommendation_df.empty:
        # Filter the performance metrics DataFrame for original picks for only the shortlisted tickers
        performance_metrics_shortlisted_original = performance_metrics_by_ticker_recommendation_df.loc[performance_metrics_by_ticker_recommendation_df.index.get_level_values('Ticker').isin(shortlisted_tickers_list)].copy()

        # Add the original performance metrics as an HTML table
        email_body += "<h2>Historical Performance Metrics for Original Picks (Grouped by Ticker and Recommendation)</h2>\n"
        email_body += performance_metrics_shortlisted_original.to_html() # Add the performance metrics as an HTML table
        # Add description for original performance metrics columns
        email_body += "<p><b>Column Descriptions:</b></p>\n"
        email_body += "<ul>\n"
        email_body += "<li><b>Total Picks:</b> The total number of times this Ticker and Recommendation combination occurred in the backtesting period.</li>\n"
        email_body += "<li><b>Winning Picks:</b> The number of times the price reached a new high within the holding period after the pick date (indicating a positive return).</li>\n"
        email_body += "<li><b>Losing Picks:</b> The number of times the price did not reach a new high within the holding period (indicating a zero or negative return).</li>\n"
        email_body += "<li><b>Win Rate (%):</b> The percentage of Winning Picks out of Total Picks.</li>\n"
        email_body += "<li><b>Loss Rate (%):</b> The percentage of Losing Picks out of Total Picks.</li>\n"
        email_body += "<li><b>Average Winning Return (%):</b> The average percentage return for all Winning Picks in this category.</li>\n"
        email_body += "<li><b>Average Losing Return (%):</b> The average percentage return for all Losing Picks in this category.</li>\n"
        email_body += "<li><b>Average Total Return (%):</b> The average percentage return for all Picks (Winning and Losing) in this category.</li>\n"
        email_body += "</ul>\n"
        email_body += "\n<br>\n" # Add a break between sections


    if 'signal_performance_by_ticker_df' in locals() and not signal_performance_by_ticker_df.empty:
         # Filter the performance metrics for new signals for only the shortlisted tickers
         # The index of signal_performance_by_ticker_df is MultiIndex (Signal, Ticker)
         # We need to filter based on the Ticker level of the index
         signal_performance_shortlisted_new = signal_performance_by_ticker_df.loc[signal_performance_by_ticker_df.index.get_level_values('Ticker').isin(shortlisted_tickers_list)].copy()


         # Add the new signal performance metrics as an HTML table
         email_body += "<h2>Winning Percentage for New Signal Conditions (Grouped by Signal and Ticker)</h2>\n"
         email_body += signal_performance_shortlisted_new.to_html() # Add the new signal performance metrics as an HTML table
         # Add description for new signal performance metrics columns
         email_body += "<p><b>Column Descriptions:</b></p>\n"
         email_body += "<ul>\n"
         email_body += "<li><b>Total Triggers:</b> The total number of times this Signal and Ticker combination was triggered in the backtesting period.</li>\n"
         email_body += "<li><b>Winning Triggers (based on Max Price):</b> The number of times the price reached a new high within the holding period after the signal trigger date (indicating a positive return).</li>\n"
         email_body += "<li><b>Winning Percentage (%):</b> The percentage of Winning Triggers out of Total Triggers for this Signal and Ticker.</li>\n"
         email_body += "</ul>\n"
         email_body += "\n<br>\n" # Add a break

    # Define the target directory in Google Drive (replicated from the previous successful cell)
    date_today = datetime.now().strftime('%Y-%m-%d')
    drive_base_path = '/content/drive/MyDrive/stock_analysis_plots' # Adjust this path as needed
    drive_target_folder = os.path.join(drive_base_path, date_today)

    # Add the instruction/link to the Google Drive folder
    drive_link_instruction = f"Please find the technical analysis plots in the Google Drive folder '{date_today}' within '{drive_base_path}'. You can navigate to this folder in your Google Drive and generate a shareable link if needed."
    email_body += f"<p>{drive_link_instruction}</p>\n"

    # Add technical indicator significance (reusing from 4c0258a2)
    email_body += "<h2>Technical Indicator Significance</h2>\n"
    email_body += "<p>Here is a brief explanation of the technical indicators used in the analysis and plots:</p>\n"
    email_body += "<ul>\n"
    email_body += "<li><b>RSI (Relative Strength Index):</b> A momentum oscillator that measures the speed and change of price movements. RSI values range from 0 to 100. Generally, RSI above 70 indicates overbought conditions, and below 30 indicates oversold conditions.</li>\n"
    email_body += "<li><b>MFI (Money Flow Index):</b> A momentum indicator that uses both price and volume to measure buying and selling pressure. MFI values range from 0 to 100. Generally, MFI above 80 indicates overbought conditions, and below 20 indicates oversold conditions.</li>\n"
    email_body += "<li><b>MACD (Moving Average Convergence Divergence):</b> A trend-following momentum indicator that shows the relationship between two moving averages of a security’s price. The MACD line crossing above the Signal Line is typically a bullish signal, and crossing below is a bearish signal.</li>\n"
    email_body += "<li><b>Williams %R:</b> A momentum indicator that measures overbought and oversold levels. Williams %R values range from 0 to -100. Generally, a reading between 0 and -20 is considered overbought, and between -80 and -100 is considered oversold.</li>\n"
    email_body += "<li><b>CCI (Commodity Channel Index):</b> An oscillator used to identify cyclical trends. CCI measures the difference between a security's price change and its average price change. Generally, readings above +100 suggest overbought conditions, and below -100 suggest oversold conditions. Some traders use wider thresholds like +200 and -200 for stronger signals.</li>\n"
    email_body += "<li><b>OBV (On-Balance Volume):</b> A momentum indicator that relates volume to price change. OBV is a running total of volume, where volume is added on up days and subtracted on down days. A rising OBV confirms a rising price trend, while a falling OBV confirms a falling price trend. Divergences between price and OBV can signal potential reversals.</li>\n"
    email_body += "<li><b>A/D Line (Accumulation/Distribution Line):</b> A volume-based indicator that assesses buying and selling pressure by measuring the cumulative flow of money into and out of a security. A rising A/D line suggests accumulation (buying pressure), while a falling A/D line suggests distribution (selling pressure). It's often used to confirm trends or spot divergences.</li>\n"
    email_body += "</ul>\n"
    email_body += "\n<br>\n"


else:
     email_body = "Dear User,\n\nNo relevant backtesting performance metrics found for the shortlisted tickers."


# Create the MIMEMultipart message
msg = MIMEMultipart() # Re-initialize msg
msg['From'] = sender_email
msg['To'] = receiver_email
msg['Subject'] = subject
msg.attach(MIMEText(email_body, 'html'))

# Note: The plot files were generated and saved to Google Drive in previous steps.
# We are not attaching them here, only providing the link in the email body.

# Connect to the Gmail SMTP server and send the email
try:
    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
        server.login(sender_email, sender_password)
        server.sendmail(sender_email, receiver_email, msg.as_string())
    print("Email with performance reports and Google Drive link sent successfully!")

except Exception as e:
    print(f"Error sending email: {e}")

finally:
    # Clean up temporary plot files (assuming they were created in a previous step and need cleanup)
    # This assumes a variable 'temp_plot_files' exists and contains the list of temporary file paths
    if 'temp_plot_files' in locals():
        for plot_file in temp_plot_files:
            if os.path.exists(plot_file):
                os.remove(plot_file)
        if temp_plot_files:
            print("Temporary plot files removed.")
    else:
        print("No temporary plot files to remove.")

## Summary:

### Data Analysis Key Findings

*   The process successfully generated and saved 19 temporary technical analysis plot files for the shortlisted tickers.
*   A new folder named with the current date was created in the specified Google Drive location.
*   All generated temporary plot files were successfully copied to the date-stamped Google Drive folder.
*   The email report was successfully sent, containing the shortlisted tickers data, historical performance metrics for original picks and new signal conditions, and an instruction to access the technical analysis plots in the Google Drive folder.
*   The temporary plot files were successfully cleaned up after the email was sent.

### Insights or Next Steps

*   The current process provides instructions on how to find the plots in Google Drive but does not include a direct shareable link. A next step could be to programmatically generate a shareable link for the uploaded folder and include that link directly in the email for easier access.
