Importing Necessary Libraries


In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import datetime

Defining the Custom DataFrame Creation Function


In [None]:
# Function to create the custom DataFrame
def create_custom_df(start_date, end_date, finder_indexes, nasdaq_etfs, days, market_caps, primary_market_caps, most_frequent_tickers):
    # Convert start_date and end_date to datetime64[ns]
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    
    # Filter days DataFrame to the specified date range
    filtered_days = days[(days['Date'] >= start_date) & (days['Date'] <= end_date)].copy()

    # Initialize the custom DataFrame with the filtered days
    custom_df = pd.DataFrame(filtered_days['Date'], columns=['Date'])

    # Ensure the 'Date' column in the custom_df and all relevant DataFrames is datetime64[ns]
    custom_df['Date'] = pd.to_datetime(custom_df['Date'])
    
    # Add Finder sector indexes data
    for index in finder_indexes:
        index_sheet = index if index.endswith('_MC') else f'{index}_MC'
        index_name = index.replace('_MC', '')
        if index_sheet in primary_market_caps:
            df_index = primary_market_caps[index_sheet]
            if 'Date' not in df_index.columns:
                print(f"Error: 'Date' column not found in sheet {index_sheet}")
                continue
            df_index['Date'] = pd.to_datetime(df_index['Date'])
            if 'index' in df_index.columns:
                df_index = df_index[['Date', 'index']]
                df_index = df_index.rename(columns={'index': f'Finder {index_name}'})
            elif index in df_index.columns:
                df_index = df_index[['Date', index]]
                df_index = df_index.rename(columns={index: f'Finder {index_name}'})
            else:
                print(f"Warning: Column {index} not found in sheet {index_sheet}")
                continue
            print(f"Merging {index_sheet} with {len(df_index)} rows")
            custom_df = custom_df.merge(df_index, on='Date', how='left')

            # Add the most frequent ticker data
            if index_sheet in most_frequent_tickers:
                most_frequent_ticker = most_frequent_tickers[index_sheet]
                try:
                    ticker_data = yf.download(most_frequent_ticker, start=start_date, end=end_date, interval='1d')
                    if 'Date' not in ticker_data.columns:
                        ticker_data.reset_index(inplace=True)
                    print(f"Merging Ticker {most_frequent_ticker}-{index_name} with {len(ticker_data)} rows")
                    custom_df = custom_df.merge(ticker_data[['Date', 'Close']], on='Date', how='left')
                    custom_df.rename(columns={'Close': f'{most_frequent_ticker}-{index_name}'}, inplace=True)
                except Exception as e:
                    print(f"Failed to download data for ticker {most_frequent_ticker}: {e}")

    # Add NASDAQ sector ETFs data
    for etf, etf_name in nasdaq_etfs.items():
        try:
            etf_data = yf.download(etf, start=start_date, end=end_date, interval='1d')
            if 'Date' not in etf_data.columns:
                etf_data.reset_index(inplace=True)
            print(f"Merging ETF NASDAQ {etf_name} with {len(etf_data)} rows")
            custom_df = custom_df.merge(etf_data[['Date', 'Close']], on='Date', how='left')
            custom_df.rename(columns={'Close': f'NASDAQ {etf_name}'}, inplace=True)
        except Exception as e:
            print(f"Failed to download data for ETF {etf}: {e}")

    # Remove rows where all elements are NaN except the 'Date' column
    custom_df = custom_df.dropna(how='all', subset=custom_df.columns.difference(['Date']))

    return custom_df


Defining the Normalization Function


In [None]:
def normalize_dataframe(df):
    normalized_df = df.copy()
    for column in df.columns:
        if column != 'Date':
            initial_index = df[column].first_valid_index()
            if initial_index is None:
                print(f"No valid data for {column}. Skipping normalization for this column.")
                normalized_df[column] = np.nan
            else:
                initial_value = df.at[initial_index, column]
                normalized_df[column] = (df[column] / initial_value) * 100
                normalized_df.at[initial_index, column] = 100  # Ensure the first valid value is set to 100
    print(f"Normalized DataFrame:\n{normalized_df.head()}")
    return normalized_df


Loading Data from Excel File


In [None]:
# Load data from the provided Excel file
file_path = 'C:\\Users\\odeya.h\\SNC Dropbox\\Odeya Hazani Cohen\\פרוייקט\\calculate data.xlsx'
xl = pd.ExcelFile(file_path)

# Read necessary sheets into DataFrames
market_caps = xl.parse('Overall Market Caps_MC')

# Extract primary categories and their market caps, excluding 'Failed Indexes', 'Failed Tickers', 'NASDAQ Index ', and 'Overall Market Caps_MC'
exclude_sheets = ['Failed Indexes', 'Failed Tickers', 'NASDAQ Index ', 'Overall Market Caps_MC']
primary_market_caps = {sheet_name: xl.parse(sheet_name) for sheet_name in xl.sheet_names if sheet_name not in exclude_sheets}

# Ensure the 'Date' column in all DataFrames is datetime64[ns]
for key in primary_market_caps:
    if 'Date' not in primary_market_caps[key].columns:
        print(f"Warning: 'Date' column not found in sheet {key}")
    primary_market_caps[key]['Date'] = pd.to_datetime(primary_market_caps[key]['Date'])


Creating the Days DataFrame and Defining Sector Indexes and ETFs


In [None]:
# Create a DataFrame with all dates
start_date = datetime.date(2019, 1, 1)
end_date = datetime.date(2024, 6, 1)
days = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='D', name='Date'))
days['Date'] = pd.to_datetime(days['Date'])

# Finder sector indexes
finder_indexes = [
    'Retail & Marketing_MC', 'Content & Media_MC', 'Enterprise, IT & Data Infras_MC', 
    'Security Technologies_MC', 'FinTech_MC', 'Industrial Technologies_MC', 
    'Smart Mobility_MC', 'Life Sciences & HealthTech_MC', 'Energy-tech_MC', 
    'Aerospace & Aviation_MC', 'AgriFood-tech & Water_MC'
]

# NASDAQ sector ETFs with full names
nasdaq_etfs = {
    'XRT': 'Retail & Marketing',
    'PBS': 'Content & Media',
    'QQQ': 'Enterprise, IT & Data Infras',
    'CIBR': 'Security Technologies',
    'FINX': 'FinTech',
    'PRN': 'Industrial Technologies',
    'IDRV': 'Smart Mobility',
    'IBB': 'Life Sciences & HealthTech',
    'QCLN': 'Energy-tech',
    'PPA': 'Aerospace & Aviation',
    'PHO': 'AgriFood-tech & Water'
}


Finding the Most Frequent Tickers


In [None]:
# Dictionary to store the most frequent tickers
most_frequent_tickers = {}

# Find the most frequent ticker in the 'highest value ticker' column for each Finder sector
for sheet_name in finder_indexes:
    df = xl.parse(sheet_name)
    if 'highest value ticker' in df.columns:
        most_frequent_ticker = df['highest value ticker'].mode()[0]
        most_frequent_tickers[sheet_name] = most_frequent_ticker


Creating the Custom DataFrame and Normalizing It


In [None]:
# Create the custom DataFrame based on user input
custom_df = create_custom_df(start_date, end_date, finder_indexes, nasdaq_etfs, days, market_caps, primary_market_caps, most_frequent_tickers)

print(f"Custom DataFrame before normalization:\n{custom_df.head()}")
normalized_df = normalize_dataframe(custom_df)


Saving the DataFrames to an Excel File


In [None]:
# Save the custom DataFrame to an Excel file
output_file_path = 'custom_data_indexes_only_with_full_names.xlsx'
with pd.ExcelWriter(output_file_path) as writer:
    custom_df.to_excel(writer, sheet_name='Custom Data', index=False)
    normalized_df.to_excel(writer, sheet_name='Normalized Data', index=False)

print(f"DataFrames have been saved to {output_file_path}")


Saving DataFrames to Excel

In [None]:
output_path = 'C:\\Users\\odeya.h\\SNC Dropbox\\Odeya Hazani Cohen\\פרוייקט\\sector_data_2019.xlsx'
with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
    custom_df.to_excel(writer, sheet_name='Custom Data', index=False)
    normalized_df.to_excel(writer, sheet_name='Normalized Data', index=False)


Creating and Configuring the Chart

In [None]:
# Create a chart object
workbook  = writer.book
worksheet = writer.sheets['Normalized Data']
chart = workbook.add_chart({'type': 'line'})

# Configure the series of the chart from the DataFrame data
for i, column in enumerate(normalized_df.columns[1:]):  # Skip the 'Date' column
    chart.add_series({
        'name':       [worksheet.name, 0, i + 1],
        'categories': [worksheet.name, 1, 0, len(normalized_df), 0],
        'values':     [worksheet.name, 1, i + 1, len(normalized_df), i + 1],
    })


Formatting the Chart

In [None]:
# Configure the chart title with professional formatting
chart.set_title({
    'name': 'Index Comparison',
    'name_font': {
        'bold': True,
        'size': 14,
        'name': 'Arial'
    }
})

# Configure the chart axes with professional formatting and the new date format
chart.set_x_axis({
    'name': 'Date',
    'date_axis': True,
    'num_format': 'dd-mm-yyyy',
    'name_font': {
        'bold': True,
        'size': 12,
        'name': 'Arial'
    },
    'label_font': {
        'size': 10,
        'name': 'Arial'
    },
    'major_gridlines': {
        'visible': True,
        'line': {'width': 0.75, 'dash_type': 'dash'}
    },
    'minor_gridlines': {
        'visible': True,
        'line': {'width': 0.25, 'dash_type': 'dash'}
    }
})
chart.set_y_axis({
    'name': 'Normalized Value',
    'name_font': {
        'bold': True,
        'size': 12,
        'name': 'Arial'
    },
    'label_font': {
        'size': 10,
        'name': 'Arial'
    },
    'major_gridlines': {
        'visible': True,
        'line': {'width': 0.75, 'dash_type': 'dash'}
    },
    'minor_gridlines': {
        'visible': True,
        'line': {'width': 0.25, 'dash_type': 'dash'}
    }
})


Customizing the Legend and Inserting the Chart

In [None]:
# Customize the legend
chart.set_legend({
    'position': 'bottom',
    'font': {
        'size': 10,
        'name': 'Arial'
    }
})

# Adjust the chart size and positioning
worksheet.insert_chart('G2', chart, {'x_offset': 25, 'y_offset': 10, 'x_scale': 1.5, 'y_scale': 1.5})


Print Confirmation Message

In [None]:
print(f"Custom data with graph has been saved to {output_path}")