In [None]:
import yfinance as yf
import pandas as pd
import concurrent.futures
import networkx as nx
import plotly.graph_objs as go
from sklearn.cluster import SpectralClustering
from time import sleep
from random import randint

# Download the list of S&P 500 companies from Wikipedia
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
tables = pd.read_html(url)
sp500_table = tables[0]
sp500_symbols = sp500_table['Symbol'].tolist()
selected_symbols = sp500_symbols

# Download Historical Data
def fetch_data(symbol, retries=5):
    for attempt in range(retries):
        try:
            data = yf.download(symbol, start='2020-01-01', end='2024-07-31')['Adj Close']
            if data.empty:
                raise ValueError(f"No data found for {symbol}")
            return symbol, data
        except Exception as e:
            print(f"Attempt {attempt+1} failed for {symbol}: {e}")
            sleep(randint(1, 3))
    return symbol, None

# Use threading to download data for each stock
data_frames = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    futures = {executor.submit(fetch_data, symbol): symbol for symbol in selected_symbols}
    for future in concurrent.futures.as_completed(futures):
        result = future.result()
        if result is not None:
            symbol, data = result
            if data is not None:
                data_frames[symbol] = data

# Combine all data into a single DataFrame
valid_data = {symbol: data for symbol, data in data_frames.items() if data is not None}
data = pd.DataFrame(valid_data)

data = data.dropna(axis=1, how='all')

returns = data.pct_change().dropna()

avg_corr = returns.corr()

In [20]:
# Create Interactive Network Graph for the Average Correlation Matrix using Plotly
def plot_avg_corr_network(correlations, threshold=0.7):
    # Filter out rows and columns with NaN values
    correlations = correlations.dropna(axis=0, how='any').dropna(axis=1, how='any')
    correlations = correlations.applymap(lambda x: x if x > 0 else 0)

    G = nx.Graph()
    
    # Add nodes and edges with weights
    for i in correlations.columns:
        for j in correlations.columns:
            if i != j and correlations.loc[i, j] > threshold:
                G.add_edge(i, j, weight=correlations.loc[i, j])
    
    pos = nx.spring_layout(G)
    
    # Clustering
    clustering = SpectralClustering(n_clusters=10, affinity='precomputed', assign_labels='discretize').fit(correlations)
    labels = clustering.labels_
    
    # Create edge traces
    edge_traces = []
    for edge in G.edges(data=True):
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        trace = go.Scatter(
            x=[x0, x1, None], y=[y0, y1, None],
            line=dict(width=edge[2]['weight']*5, color='#888'),
            hoverinfo='none',
            mode='lines')
        edge_traces.append(trace)

    node_x = []
    node_y = []
    node_color = []
    for i, node in enumerate(G.nodes()):
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        node_color.append(labels[i])

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers+text',
        text=[node for node in G.nodes()],
        textposition="bottom center",
        hoverinfo='text',
        marker=dict(
            showscale=True,
            colorscale='YlGnBu',
            size=10,
            color=node_color,
            colorbar=dict(
                thickness=15,
                title='Cluster Group',
                xanchor='left',
                titleside='right'
            ),
            line_width=2))

    node_adjacencies = []
    node_text = []
    for node, adjacencies in enumerate(G.adjacency()):
        node_adjacencies.append(len(adjacencies[1]))
        node_text.append(f'{adjacencies[0]}')

    node_trace.marker.color = node_adjacencies
    node_trace.text = node_text

    fig = go.Figure(data=edge_traces + [node_trace],
                    layout=go.Layout(
                        title='Average Stock Correlation Network (Selected S&P 500 Tickers)',
                        titlefont_size=16,
                        width=1800,
                        height=1400,
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=20,l=5,r=5,t=40),
                        annotations=[dict(
                            text="",
                            showarrow=False,
                            xref="paper", yref="paper")],
                        xaxis=dict(showgrid=False, zeroline=False),
                        yaxis=dict(showgrid=False, zeroline=False))
                    )
    fig.show()

# Plot network for the average correlation matrix
plot_avg_corr_network(avg_corr)


DataFrame.applymap has been deprecated. Use DataFrame.map instead.

