### this is just to check functionalities

#### lets run s&p 500 code in parallel in fragments


In [41]:
import pandas as pd
import torch


import yfinance as yf
import pandas_ta as ta
import os


In [66]:
# Define a function to format numbers based on their value
def custom_format(value):
    if abs(value) >= 10000:
        # Format numbers >= 100,000 with commas and no decimals
        return f'{value:,.0f}'
    elif abs(value) >= 1000:
        # Format numbers > 1000 without commas and decimals
        return f'{value:.0f}'
    else:
        return f'{value:.2f}'

# Define a styling function for red negative numbers with parentheses
def format_negative_red(value):
    if value < 0:
        return f'color: red; font-weight: bold;'  # CSS for red color
    return None

def format_negative_parentheses(value):
    formatted_value = custom_format(value)
    return f'({formatted_value})' if value < 0 else formatted_value

def display_df(df):
    return df.style.applymap(format_negative_red).format(format_negative_parentheses)

In [67]:
# Function to get S&P 500 constituents
def get_sp500_constituents():
    sp500_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    df = pd.read_html(sp500_url, header=0)[0]
    return df


# Function to calculate weights based on market cap
def calculate_weights(df):
    df['Weight'] = df['Market Cap'] / df['Market Cap'].sum() * 100
    return df


# function to deal with multi-level index
def organize_columns(stock_data):
    refined_stock_data = stock_data.reset_index()
    # test_df = analysis_results['AAPL']['Close'].reset_index()
    refined_stock_data.columns = ['Date', 'Close']
    refined_stock_data.set_index(['Date'], inplace=True)
    
    return refined_stock_data


# Function to get stock data and technical analysis
def analyze_stock(stock_symbol, start_date, end_date):
    # Download historical data from Yahoo Finance
    stock_data = yf.download(stock_symbol, start=start_date, end=end_date)
    close_px = organize_columns(stock_data=stock_data['Close'])
    # display(close_px)

    # Calculate technical indicators
    stock_data['SMA_50'] = ta.sma(close_px['Close'], length=50)  # 50-day Simple Moving Average
    # print(ta.sma(close_px['Close'], length=50))
    stock_data['SMA_200'] = ta.sma(close_px['Close'], length=200)  # 200-day Simple Moving Average
    stock_data['RSI'] = ta.rsi(close_px['Close'], length=14)  # Relative Strength Index

    # MACD
    macd = ta.macd(close_px['Close'])
    stock_data['MACD'] = macd['MACD_12_26_9']
    stock_data['Signal'] = macd['MACDs_12_26_9']
    stock_data['Histogram'] = macd['MACDh_12_26_9']

    return stock_data


In [71]:
def get_sp500():
    sp500_df = get_sp500_constituents()

    sp500_df['Market Cap'] = [100000 + i * 1000 for i in range(len(sp500_df))] # just random numbers
    sp500_df = calculate_weights(sp500_df)

    # folder_name = "data"
    # if not os.path.exists(folder_name):
    #     os.makedirs(folder_name)

    return sp500_df

# Analyze each stock
def analyse_stock(symbol):
    start_date = "2023-01-01"
    end_date = "2023-12-01"
    analysis_results = {}

    # for symbol in sp500_df['Symbol']:
    try:
        stock_symbol = f"{symbol}"  # S&P 500 symbols don't require suffixes like ".NS"
        stock_analysis = analyze_stock(stock_symbol, start_date, end_date)
        analysis_results[symbol] = stock_analysis
        print(f"Analyzed {symbol}")
        return stock_analysis
    
    except Exception as e:
        print(f"Error analyzing {symbol}: {e}")

    


In [78]:
# sp500_df['Symbol'].sample(5)
# stock_symbol = "AAPL"
output_folder = os.path.join("data", "us_markets")
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Save the SP500 constituents to a CSV file in the created subfolder
output_file = os.path.join(output_folder, f"sp500_constituents.csv")
# override = False
if os.path.exists(output_file):
    sp500_df = pd.read_csv(output_file)
    print(f"SP500 loaded from {output_file}")
else:
    sp500_df = get_sp500()
    sp500_df.to_csv(output_file)
    print(f"SP500 saved to {output_file}")

analysis_output_folder = os.path.join("data", "us_markets", "analysis")
if not os.path.exists(analysis_output_folder):
    os.makedirs(analysis_output_folder)

analysis_results = {}
for symbol in sp500_df['Symbol']:
    output_file = os.path.join(analysis_output_folder, f"{symbol}_analysis.csv")
    if os.path.exists(output_file):
        temp_df = pd.read_csv(output_file)
        analysis_results[symbol] = temp_df
        print(f"Analysis of {symbol} loaded from {output_file}")
    else:
        temp_df = analyse_stock(symbol=symbol)
        if temp_df is not None:
            temp_df.to_csv(output_file)
            analysis_results[symbol] = temp_df
            print(f"Analysis of {symbol} saved to {output_file}")
        else:
            print(f"Analysis of {symbol} failed")





SP500 loaded from data/us_markets/sp500_constituents.csv
Analysis of MMM loaded from data/us_markets/analysis/MMM_analysis.csv
Analysis of AOS loaded from data/us_markets/analysis/AOS_analysis.csv
Analysis of ABT loaded from data/us_markets/analysis/ABT_analysis.csv
Analysis of ABBV loaded from data/us_markets/analysis/ABBV_analysis.csv
Analysis of ACN loaded from data/us_markets/analysis/ACN_analysis.csv
Analysis of ADBE loaded from data/us_markets/analysis/ADBE_analysis.csv
Analysis of AMD loaded from data/us_markets/analysis/AMD_analysis.csv
Analysis of AES loaded from data/us_markets/analysis/AES_analysis.csv
Analysis of AFL loaded from data/us_markets/analysis/AFL_analysis.csv
Analysis of A loaded from data/us_markets/analysis/A_analysis.csv
Analysis of APD loaded from data/us_markets/analysis/APD_analysis.csv
Analysis of ABNB loaded from data/us_markets/analysis/ABNB_analysis.csv
Analysis of AKAM loaded from data/us_markets/analysis/AKAM_analysis.csv
Analysis of ALB loaded from d

[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['AMTM']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2023-01-01 -> 2023-12-01) (Yahoo error = "Data doesn\'t exist for startDate = 1672549200, endDate = 1701406800")')


Error analyzing AMTM: 'NoneType' object is not subscriptable
Analysis of AMTM failed
Analysis of AEE loaded from data/us_markets/analysis/AEE_analysis.csv
Analysis of AEP loaded from data/us_markets/analysis/AEP_analysis.csv
Analysis of AXP loaded from data/us_markets/analysis/AXP_analysis.csv
Analysis of AIG loaded from data/us_markets/analysis/AIG_analysis.csv
Analysis of AMT loaded from data/us_markets/analysis/AMT_analysis.csv
Analysis of AWK loaded from data/us_markets/analysis/AWK_analysis.csv
Analysis of AMP loaded from data/us_markets/analysis/AMP_analysis.csv
Analysis of AME loaded from data/us_markets/analysis/AME_analysis.csv
Analysis of AMGN loaded from data/us_markets/analysis/AMGN_analysis.csv
Analysis of APH loaded from data/us_markets/analysis/APH_analysis.csv
Analysis of ADI loaded from data/us_markets/analysis/ADI_analysis.csv
Analysis of ANSS loaded from data/us_markets/analysis/ANSS_analysis.csv
Analysis of AON loaded from data/us_markets/analysis/AON_analysis.csv
A

[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['BRK.B']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')
[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['BF.B']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2023-01-01 -> 2023-12-01)')


Error analyzing BRK.B: 'NoneType' object is not subscriptable
Analysis of BRK.B failed
Analysis of BBY loaded from data/us_markets/analysis/BBY_analysis.csv
Analysis of TECH loaded from data/us_markets/analysis/TECH_analysis.csv
Analysis of BIIB loaded from data/us_markets/analysis/BIIB_analysis.csv
Analysis of BLK loaded from data/us_markets/analysis/BLK_analysis.csv
Analysis of BX loaded from data/us_markets/analysis/BX_analysis.csv
Analysis of BK loaded from data/us_markets/analysis/BK_analysis.csv
Analysis of BA loaded from data/us_markets/analysis/BA_analysis.csv
Analysis of BKNG loaded from data/us_markets/analysis/BKNG_analysis.csv
Analysis of BWA loaded from data/us_markets/analysis/BWA_analysis.csv
Analysis of BSX loaded from data/us_markets/analysis/BSX_analysis.csv
Analysis of BMY loaded from data/us_markets/analysis/BMY_analysis.csv
Analysis of AVGO loaded from data/us_markets/analysis/AVGO_analysis.csv
Analysis of BR loaded from data/us_markets/analysis/BR_analysis.csv
Ana

[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['GEV']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2023-01-01 -> 2023-12-01) (Yahoo error = "Data doesn\'t exist for startDate = 1672549200, endDate = 1701406800")')


Error analyzing GEV: 'NoneType' object is not subscriptable
Analysis of GEV failed
Analysis of GEN loaded from data/us_markets/analysis/GEN_analysis.csv
Analysis of GNRC loaded from data/us_markets/analysis/GNRC_analysis.csv
Analysis of GD loaded from data/us_markets/analysis/GD_analysis.csv
Analysis of GIS loaded from data/us_markets/analysis/GIS_analysis.csv
Analysis of GM loaded from data/us_markets/analysis/GM_analysis.csv
Analysis of GPC loaded from data/us_markets/analysis/GPC_analysis.csv
Analysis of GILD loaded from data/us_markets/analysis/GILD_analysis.csv
Analysis of GPN loaded from data/us_markets/analysis/GPN_analysis.csv
Analysis of GL loaded from data/us_markets/analysis/GL_analysis.csv
Analysis of GDDY loaded from data/us_markets/analysis/GDDY_analysis.csv
Analysis of GS loaded from data/us_markets/analysis/GS_analysis.csv
Analysis of HAL loaded from data/us_markets/analysis/HAL_analysis.csv
Analysis of HIG loaded from data/us_markets/analysis/HIG_analysis.csv
Analysis 

[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['SW']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2023-01-01 -> 2023-12-01) (Yahoo error = "Data doesn\'t exist for startDate = 1672549200, endDate = 1701406800")')


Error analyzing SW: 'NoneType' object is not subscriptable
Analysis of SW failed
Analysis of SNA loaded from data/us_markets/analysis/SNA_analysis.csv


[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['SOLV']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2023-01-01 -> 2023-12-01) (Yahoo error = "Data doesn\'t exist for startDate = 1672549200, endDate = 1701406800")')


Error analyzing SOLV: 'NoneType' object is not subscriptable
Analysis of SOLV failed
Analysis of SO loaded from data/us_markets/analysis/SO_analysis.csv
Analysis of LUV loaded from data/us_markets/analysis/LUV_analysis.csv
Analysis of SWK loaded from data/us_markets/analysis/SWK_analysis.csv
Analysis of SBUX loaded from data/us_markets/analysis/SBUX_analysis.csv
Analysis of STT loaded from data/us_markets/analysis/STT_analysis.csv
Analysis of STLD loaded from data/us_markets/analysis/STLD_analysis.csv
Analysis of STE loaded from data/us_markets/analysis/STE_analysis.csv
Analysis of SYK loaded from data/us_markets/analysis/SYK_analysis.csv
Analysis of SMCI loaded from data/us_markets/analysis/SMCI_analysis.csv
Analysis of SYF loaded from data/us_markets/analysis/SYF_analysis.csv
Analysis of SNPS loaded from data/us_markets/analysis/SNPS_analysis.csv
Analysis of SYY loaded from data/us_markets/analysis/SYY_analysis.csv
Analysis of TMUS loaded from data/us_markets/analysis/TMUS_analysis.c

In [None]:
# # Save results
# sp500_df.to_csv("sp500_constituents.csv", index=False)
# for symbol, analysis in analysis_results.items():
#     analysis.to_csv(f"{symbol}_analysis.csv")

# print("Analysis complete. Files saved.")

# Save results
# file_path = os.path.join(folder_name, "sp500_constituents.csv")
# sp500_df.to_csv(file_path, index=False)
# for symbol, analysis in analysis_results.items():
#     temp_path = os.path.join(folder_name, f"{symbol}_analysis.csv")
#     analysis.to_csv(temp_path)

# print("Analysis complete. Files saved.")



In [None]:
# # need to write code to load data and results

# # stock_symbol = "AAPL"
# output_folder = os.path.join("data", "us_markets")
# if not os.path.exists(output_folder):
#     os.makedirs(output_folder)

# # Save the SP500 constituents to a CSV file in the created subfolder
# output_file = os.path.join(output_folder, f"sp500_constituents.csv")
# override = False
# if os.path.exists(output_file) and not override:
#     gdp_df = pd.read_csv(output_file)
#     print(f"SP500 loaded from {output_file}")
# else:
#     gdp_df = get_aapl_data()
#     gdp_df.to_csv(output_file)
#     print(f"SP500 saved to {output_file}")


In [5]:
import yfinance as yf
import plotly.graph_objs as go

def create_market_cap_chart(symbol, start_date, end_date):
    # Fetch historical data
    stock = yf.Ticker(symbol)
    hist = stock.history(start=start_date, end=end_date)
    
    # Calculate market cap (assuming shares outstanding remains constant)
    shares_outstanding = stock.info['sharesOutstanding']
    hist['Market Cap'] = hist['Close'] * shares_outstanding
    
    # Create the chart
    fig = go.Figure()
    
    # Add price line
    fig.add_trace(go.Scatter(
        x=hist.index,
        y=hist['Close'],
        name='Stock Price',
        line=dict(color='blue')
    ))
    
    # Add market cap line
    fig.add_trace(go.Scatter(
        x=hist.index,
        y=hist['Market Cap'],
        name='Market Cap',
        yaxis='y2',
        line=dict(color='green')
    ))
    
    # Update layout
    fig.update_layout(
        title=f'{symbol} Stock Price and Market Cap Over Time',
        xaxis_title='Date',
        yaxis_title='Stock Price (USD)',
        yaxis2=dict(
            title='Market Cap (USD)',
            overlaying='y',
            side='right'
        ),
        legend=dict(x=0, y=1, traceorder='normal')
    )
    
    return fig

# Example usage
symbol = 'AAPL'
start_date = '2020-01-01'
end_date = '2024-12-22'
fig = create_market_cap_chart(symbol, start_date, end_date)
fig.show()


In [63]:
import yfinance as yf
import pandas as pd


def get_aapl_data():

    # Define the stock symbol (e.g., Apple: AAPL) and the time range
    stock_symbol = "AAPL"  # Replace with the stock symbol of your choice
    start_date = "1993-01-01"  # 30 years back from 2023
    end_date = "2023-12-31"

    # Fetch historical stock data
    stock_data = yf.Ticker(stock_symbol)
    historical_data = stock_data.history(start=start_date, end=end_date)

    # Fetch additional information like shares outstanding
    shares_outstanding = stock_data.info.get("sharesOutstanding", "N/A")

    # Format the historical data to include shares outstanding
    historical_data["Shares Outstanding"] = shares_outstanding

    # Save or display the data
    # display_df(historical_data.sample(50))

    return historical_data

stock_symbol = "AAPL"
output_folder = os.path.join("data", "us_markets")
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Save the GDP data to a CSV file in the created subfolder
output_file = os.path.join(output_folder, f"{stock_symbol}_historical_stock_data.csv")
if os.path.exists(output_file):
    gdp_df = pd.read_csv(output_file)
    print(f"GDP data loaded from {output_file}")
else:
    gdp_df = get_aapl_data()
    gdp_df.to_csv(output_file)
    print(f"GDP data saved to {output_file}")

# # Save to CSV (optional)
# historical_data.to_csv(f"{stock_symbol}_historical_stock_data.csv")


GDP data saved to data/us_markets/AAPL_historical_stock_data.csv


In [8]:
file_path = os.path.join(output_folder, "plot_aapl.html")

fig.write_html(file_path)
# folder_name


In [58]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import datetime

# Define a function to fetch current S&P 500 constituents
def fetch_sp500_constituents():
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table', {'id': 'constituents'})
    
    # Parse the table to get the list of companies
    df = pd.read_html(str(table))[0]
    return df['Symbol'].tolist()

# Fetch current constituents as a starting point
current_constituents = fetch_sp500_constituents()

# Placeholder for historical data collection logic
# This would typically involve accessing a financial data API or database
def fetch_historical_rebalance_data():
    # Example structure for storing historical data
    # In practice, this would be filled with real historical data
    historical_data = {
        '1999-03-31': ['AAPL', 'MSFT', 'GOOGL'],  # Example constituents at a past date
        '2000-06-30': ['AAPL', 'MSFT', 'AMZN'],   # Changes over time
        # ... more historical dates and constituents ...
    }
    return historical_data

# Analyze changes in constituents over time
def analyze_rebalance_changes(historical_data):
    changes = {}
    previous_constituents = set()
    
    for date, constituents in sorted(historical_data.items()):
        current_set = set(constituents)
        added = current_set - previous_constituents
        removed = previous_constituents - current_set
        
        changes[date] = {
            'added': added,
            'removed': removed
        }
        
        previous_constituents = current_set
    
    return changes

# Fetch historical rebalance data (this is a placeholder function)
historical_data = fetch_historical_rebalance_data()

# Analyze changes in the S&P 500 constituents over time
rebalance_changes = analyze_rebalance_changes(historical_data)

# Output the rebalance changes for inspection
for date, change in rebalance_changes.items():
    print(f"Date: {date}")
    print(f"Added: {change['added']}")
    print(f"Removed: {change['removed']}")
    print("\n")


Date: 1999-03-31
Added: {'GOOGL', 'AAPL', 'MSFT'}
Removed: set()


Date: 2000-06-30
Added: {'AMZN'}
Removed: {'GOOGL'}





Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.



#### this is with options data

In [None]:
# Analyze each stock and options -- nothing came out of this
start_date = "2023-01-01"
end_date = "2023-12-01"
analysis_results = {}
options_results = {}

for symbol in sp500_df['Symbol'].sample(5):
    try:
        stock_symbol = f"{symbol}"  # S&P 500 symbols don't require suffixes like ".NS"

        # Analyze stock data
        stock_analysis = analyze_stock(stock_symbol, start_date, end_date)
        analysis_results[symbol] = stock_analysis

        print(f"Analyzed {symbol}")
    except Exception as e:
        print(f"Error analyzing {symbol}: {e}")

In [None]:
# Main script
if __name__ == "__main__":
    try:
        # Get S&P 500 constituents
        sp500_df = get_sp500_constituents()
        
        # Add market capitalization column manually (replace this with API if available)
        sp500_df['Market Cap'] = [100000 + i * 1000 for i in range(len(sp500_df))]  # Placeholder values

        # Calculate index weights
        sp500_df = calculate_weights(sp500_df)

        # Analyze each stock
        start_date = "2023-01-01"
        end_date = "2023-12-01"
        analysis_results = {}
        options_results = {}

        for symbol in sp500_df['Symbol']:
            try:
                stock_symbol = f"{symbol}"  # S&P 500 symbols don't require suffixes like ".NS"

                # Analyze stock data
                stock_analysis = analyze_stock(stock_symbol, start_date, end_date)
                analysis_results[symbol] = stock_analysis

                # # Fetch options data
                # options_data = get_options_data(stock_symbol)
                # options_results[symbol] = options_data

                print(f"Analyzed {symbol}")
            except Exception as e:
                print(f"Error analyzing {symbol}: {e}")

        # Save results
        sp500_df.to_csv("sp500_constituents.csv", index=False)
        for symbol, analysis in analysis_results.items():
            analysis.to_csv(f"{symbol}_analysis.csv")

        # for symbol, options in options_results.items():
        #     with open(f"{symbol}_options.json", "w") as f:
        #         f.write(pd.DataFrame(options).to_json())

        print("Analysis complete. Files saved.")
    except Exception as e:
        print(f"Error: {e}")

### Creating proper visualization of WB gdp data

In [None]:
# combining all parts here

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

import wbdata
import pandas as pd
import datetime


In [32]:
#  load actual gdp data 

def get_gdp_data():
    # Define the date range (past 35 years)
    start_date = datetime.datetime(1989, 1, 1)
    end_date = datetime.datetime(2024, 1, 1)

    # Define the top 25 economies by ISO country codes (based on search results)
    countries = [
        'US', 'CN', 'JP', 'DE', 'IN', 'GB', 'FR', 'BR', 'IT', 'CA',
        'KR', 'RU', 'AU', 'ES', 'MX', 'ID', 'NL', 'SA', 'TR', 'CH',
        'TW', 'PL', 'SE', 'TH', 'BE'
    ]

    # Define the GDP indicator (current US$)
    indicator = {'NY.GDP.MKTP.CD': 'GDP (current US$)'}

    # Retrieve GDP data from the World Bank API
    df = wbdata.get_dataframe(indicator, country=countries, date=(start_date, end_date), 
                            #   convert_date=False
                            )

    # Reshape the data for better readability
    df = df.reset_index()
    df_pivot = df.pivot(index='date', columns='country', values='GDP (current US$)')

    return df_pivot



output_folder = os.path.join("data", "gdp_data")
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Save the GDP data to a CSV file in the created subfolder
output_file = os.path.join(output_folder, "top_25_economies_gdp_1989_2024.csv")
if os.path.exists(output_file):
    gdp_df = pd.read_csv(output_file)
    print(f"GDP data loaded from {output_file}")
else:
    gdp_df = get_gdp_data()
    gdp_df.to_csv(output_file)
    print(f"GDP data saved to {output_file}")





GDP data loaded from data/gdp_data/top_25_economies_gdp_1989_2024.csv


In [None]:
# df = pd.DataFrame(data)

# Melt the DataFrame for Plotly compatibility
df_melted = gdp_df.melt(id_vars=["date"], var_name="Country", value_name="GDP")

# # Sort the data dynamically by GDP within each year
# def reorder_categories(frame):
#     sorted_countries = frame.sort_values(by="GDP", ascending=False)["Country"].tolist()
#     frame["Country"] = pd.Categorical(frame["Country"], categories=sorted_countries, ordered=True)
#     return frame

# df_melted = df_melted.groupby("Year", group_keys=False).apply(reorder_categories)

# Create the Plotly animation
fig = px.bar(
    df_melted,
    x="Country",
    y="GDP",
    color="Country",
    animation_frame="date",
    title="Top 5 Economies Over Time",
    labels={"GDP": "GDP (Trillions USD)"},
    range_y=[0, gdp_df.drop('date', axis=1).max().max() * 1.1],
)

# Update layout for dynamic sorting of x-axis
fig.update_layout(
    xaxis_title="Country",
    yaxis_title="GDP (Trillions USD)",
    legend_title="Country",
    template="plotly_white",
    xaxis=dict(categoryorder="total descending"), # used for dynamically rearranging x axis
    xaxis_tickangle=45,  # Tilt the x-axis labels
)

# Show the interactive animation
fig.show()

In [54]:
display_df(gdp_df)
# gdp_df


Styler.applymap has been deprecated. Use Styler.map instead.



Unnamed: 0,date,Australia,Belgium,Brazil,Canada,China,France,Germany,India,Indonesia,Italy,Japan,"Korea, Rep.",Mexico,Netherlands,Poland,Russian Federation,Saudi Arabia,Spain,Sweden,Switzerland,Thailand,Turkiye,United Kingdom,United States
0,1989,299875906679,164221056511,412990820287,567211993243,347768051312,1016742237302,1404092925205,296042052945,94451427877,930801709004,3109455047824,246928837311,221403098267,258716904292,,506631299735,95344459279,414460794097,217634838449,208105846934,72250748100,107127191329,926884816754,5641580000000
1,1990,311426665220,205331747948,464989098145,596075591361,360857912566,1257649439827,1778162195860,320979026420,106140727334,1183945130899,3185904656664,283365844161,261253675693,318799003994,65977748211.0,517014446228,117630173565,536175326477,261469578631,265763573999,85343190719,150655500192,1093169389205,5963144000000
2,1991,325975319456,210510999409,342534090909,612527712316,383373318084,1258961748634,1875792575133,270105341879,116621996217,1249092439519,3648065760649,330647042837,313139656146,327982316124,85500935935.0,517962962963,132223230975,576753902322,273834608020,268901693863,98234714971,151034731544,1142797178131,6158129000000
3,1992,325525379567,234781652447,328191909882,594387358319,426915712716,1389663073110,2141377582968,288208070278,128026966580,1323204350354,3980702922118,355524903068,363157832924,363497050125,94337050693.0,460290556901,137087850467,630465353486,283912173667,279921425828,111452746518,159104772992,1179659529660,6520327000000
4,1993,312133923553,224721795709,368292034381,579053561739,444731282436,1314383368080,2078954217438,279295648983,158006700302,1067412587671,4536940479038,392665710525,530160763984,354070495966,96043157273.0,435083713851,132967957276,524700572624,212647043708,272237527071,128889262951,180415757852,1061388722256,6858559000000
5,1994,322806641301,244884129491,525369467296,579944346807,564321854521,1385822778828,2215282632277,327274843459,176892143932,1101750159702,4998797547741,463619823515,553618247901,379688232232,110803635288.0,395077301248,135174899866,530183651720,228701658891,301375055202,146683778959,130650447499,1140489745944,7287236000000
6,1995,368166023166,288025588396,769333050987,605961090061,734484834574,1595219345512,2593053091306,360281909643,202132028723,1177369428266,5545563663890,566581003128,380157469867,452967334614,142838527115.0,395537185735,143343124166,614170002669,267052836695,352835806342,169278916593,169319579016,1344240176740,7639749000000
7,1996,401341880621,279201433225,850426432992,630607994133,863749314719,1598889216566,2506576553158,392896866205,227369679375,1314776508972,4923391533852,610167053824,432157945024,451372549020,160813026223.0,391724890744,158662483311,642251412058,291952430659,340103959547,183035237429,181464408820,1419645865835,8073122000000
8,1997,435642611297,252708051421,883206179730,654986999856,961601980985,1449392222971,2218790886533,415867563593,215748998610,1244988176444,4492448605639,569755022973,523449530464,417506211882,159893964917.0,404928954192,165963684913,589739772727,268251974486,294788198856,150180456566,189878399895,1560911918795,8577552000000
9,1998,399674421759,258528339631,863710759256,634000000000,1029060747621,1496906382032,2247760364566,421351317225,95445547873,1272729786997,4098362709531,383331833682,557461102631,438612530549,175282269667.0,270955486862,146775466667,618731484575,270889570938,303459014347,113675596788,275941990764,1653694932097,9062817000000


In [44]:
import pandas as pd
import numpy as np

# Sample DataFrame
data = {
    'Column1': [1234567.89, -9876543.21, 3456789.01],
    'Column2': [-12345.67, 98765.43, -56789.01]
}
df = pd.DataFrame(data)

# Step 1: Set display options for large numbers with commas
pd.set_option('display.float_format', '{:,.2f}'.format)

# Step 2: Define a styling function for red negative numbers with parentheses
def format_negative_red(value):
    if value < 0:
        return f'color: red; font-weight: bold;'  # CSS for red color
    return None

def format_negative_parentheses(value):
    return f'({abs(value):,.2f})' if value < 0 else f'{value:,.2f}'

def display_df(df):
    return df.style.applymap(format_negative_red).format(format_negative_parentheses)

# Apply formatting for display
# styled_df = df.style.applymap(format_negative_red).format(format_negative_parentheses)

# Display the styled DataFrame in Jupyter Notebook or save as HTML
# styled_df.to_html("styled_output.html")  # Save as HTML for visualization

display_df(df)



Styler.applymap has been deprecated. Use Styler.map instead.



Unnamed: 0,Column1,Column2
0,1234567.89,"(12,345.67)"
1,"(9,876,543.21)",98765.43
2,3456789.01,"(56,789.01)"


In [36]:
# df_melted.head(50)
display(df)

df_melted

Unnamed: 0,Year,United States,China,Japan,Germany,India,United Kingdom,France,Brazil,Canada,Italy
0,2000,10.0,1.2,4.5,2.5,0.5,2.3,2.1,1.8,1.6,1.9
1,2001,11.2,2.7,4.6,2.7,0.8,2.45,2.22,1.9,1.68,1.95
2,2002,12.4,4.2,4.7,2.9,1.1,2.6,2.34,2.0,1.76,2.0
3,2003,13.6,5.7,4.8,3.1,1.4,2.75,2.46,2.1,1.84,2.05
4,2004,14.8,7.2,4.9,3.3,1.7,2.9,2.58,2.2,1.92,2.1
5,2005,16.0,8.7,5.0,3.5,2.0,3.05,2.7,2.3,2.0,2.15
6,2006,17.2,10.2,5.1,3.7,2.3,3.2,2.82,2.4,2.08,2.2
7,2007,18.4,11.7,5.2,3.9,2.6,3.35,2.94,2.5,2.16,2.25
8,2008,19.6,13.2,5.3,4.1,2.9,3.5,3.06,2.6,2.24,2.3
9,2009,20.8,14.7,5.4,4.3,3.2,3.65,3.18,2.7,2.32,2.35


Unnamed: 0,Year,Country,GDP
0,2000,United States,10.00
1,2001,United States,11.20
2,2002,United States,12.40
3,2003,United States,13.60
4,2004,United States,14.80
...,...,...,...
245,2020,Italy,2.90
246,2021,Italy,2.95
247,2022,Italy,3.00
248,2023,Italy,3.05


In [None]:
file_path = os.path.join(output_folder, "sp500_constituents.csv")

In [None]:
folder_name = "data"
if not os.path.exists(folder_name):
    os.makedirs(folder_name)

In [28]:
output_folder = os.path.join("data", "gdp_data")
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
output_folder

'data/gdp_data'


Styler.applymap has been deprecated. Use Styler.map instead.



Unnamed: 0,Column1,Column2
0,1234568,"(-12,346)"
1,"(-9,876,543)",98765
2,3456789,"(-56,789)"
3,100000,123457
