In [1]:
#Import requirements

import subprocess
import sys

import re
import time
import webbrowser
import numpy as np
import pandas as pd
import seaborn as sns
import yfinance as yf
import networkx as nx
from datetime import datetime
from functools import reduce
import matplotlib.pyplot as plt
import yahoo_fin.stock_info as si

import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go



import warnings
warnings.filterwarnings("ignore")


In [3]:
main_file = pd.read_csv('merged_df.csv')
#Some text manipulation 
pattern = r"^(.*?): Close$"

# List to hold extracted ticker names
ticker_names = []
column_names = main_file.columns

#Matching the elements of column_names with pattern
for col in column_names:
    match = re.match(pattern, col)
    if match:
        ticker_names.append(match.group(1))  # group(1) refers to the first captured group

#Creating a list with target columns from main_file : "ticker_name: Close"
target_col_names = []
for ticker in ticker_names: 
    col_name = str(ticker + ': Close')
    target_col_names.append(col_name)

main_df = main_file[target_col_names].round(2)

#Creating new column names
# new_column_names = ['AAPL', 'MC.PA', 'JPM', 'NVDA', '^SPX', 'IAU', 'CL=F', 'LQD', 'XRP-USD',
#        'NVDA', 'LLY', 'TSLA', '^SPX','^DJI', '^RUT', 'IAU', 'CPER', 'GSG', 'CL=F', 'TLT', 
#                     'JNK', 'LQD', 'BTC-USD', 'ETH-USD', 'XRP-USD']

#Renaming columns
new_column_names = ['AAPL', 'META', 'V', 'MC.PA', 'NFLX', 'NKE', 'JPM', 'BAC', 'C',
                    'NVDA', 'LLY', 'TSLA', '^SPX', '^DJI', '^RUT', 'IAU', 'CPER',
                    'GSG', 'CL=F', 'TLT', 'JNK', 'LQD', 'BTC-USD', 'ETH-USD', 'XRP-USD']

#Setting main_file['Date'] as index of main_df    
main_df['Date'] = main_file['Date']
main_df.set_index(main_df['Date'], inplace = True)
pd.to_datetime(main_df.index, format='%Y-%m-%d')
main_df.drop('Date', axis=1, inplace=True)

# main_df.head()    #now we have in our main df columns of closing prices for each of our tickers. 
main_df.columns = new_column_names
# main_df


In [4]:
def rolling_corr(df, ref_date, span):

    assert isinstance(span, int) and span > 0, "span must be a positive integer"
    
    # Check if ref_date is in the index
    if ref_date not in df.index:
        return "Date not found"
    else: 
        starting_point = None

        try:
            # Convert the starting point to a Timestamp (if it's a string)
            if isinstance(ref_date, str):
                try: 
                    starting_point = pd.to_datetime(ref_date)
                except ValueError as err: 
                    print(f'DateParseError : {err}')
                    print('Please enter an input of the format : YYYY-MM-DD')

            if starting_point is not None:

                date = pd.to_datetime(starting_point)     

                # Format the Timestamp object to a string in the desired format
                formatted_date = starting_point.strftime('%Y-%m-%d')

                # Find position of starting_point in df's index
                position = df.index.get_loc(formatted_date)

                # Calculate start position for slicing (ensure it's not negative)
                start_position = max(position - span, 0)
                if position - span < 0: 
                    print('Correlation period less than span. Check datetime range.')

                # Filter the DataFrame to get the previous ten elements from the starting point
                filtered_df = df.iloc[start_position:position]

                corr_matrix = filtered_df.corr()

                return corr_matrix.round(2)

        except KeyError as err: 
            print(f'Error due to wrong date input: {err}.')
            print(f'Recall date range of input dataframe: {df.index[0], df.index[-1]}')


In [6]:
def relative_change(corr1, corr2):
    range_corr = 2
    rel_range_change = ((corr2 - corr1) / range_corr) * 100
    
    return rel_range_change



def matrix_difference(matrix1, matrix2):
    # Check if the matrices have the same shape
    if matrix1.shape != matrix2.shape:
        raise ValueError("Matrices must have the same shape")
    
    # Initialize an empty matrix to store the differences
    rows = matrix1.shape[0]
    columns = matrix1.shape[1]
    result_matrix = pd.DataFrame(np.zeros((rows, columns)))
    result_matrix.columns = new_column_names
    
    # Iterate through the rows and columns of the matrices
    for i in range(rows):
        for j in range(columns):
            perct_change = relative_change(matrix1.iloc[i, j], matrix2.iloc[i, j])
            result_matrix.iloc[i, j] = perct_change
    return result_matrix



def matrix_difference_qual(matrix1, matrix2, heatmap=True):
    category_map = None
    # Check if the matrices have the same shape
    if matrix1.shape != matrix2.shape:
        raise ValueError("Matrices must have the same shape")
    
    # Initialize an empty matrix to store the differences
    rows = matrix1.shape[0]
    columns = matrix1.shape[1]
    result_matrix = pd.DataFrame(np.zeros((rows, columns)))
    result_matrix.columns = new_column_names
    
    # Iterate through the rows and columns of the matrices
    for i in range(matrix1.shape[0]):
        for j in range(matrix1.shape[1]):
            # Perform differentiation based on the values of the elements
            if matrix1.iloc[i, j] < 0 and matrix2.iloc[i, j] < 0:
                if matrix2.iloc[i, j] > matrix1.iloc[i, j]:
                    result_matrix.iloc[i, j] = 'Neg Stronger'
                else:
                    result_matrix.iloc[i, j] = 'Neg Weaker'
            elif matrix1.iloc[i, j] > 0 and matrix2.iloc[i, j] > 0:
                if matrix2.iloc[i, j] > matrix1.iloc[i, j]:
                    result_matrix.iloc[i, j] = 'Pos Stronger'
                else:
                    result_matrix.iloc[i, j] = 'Pos Weaker'
            elif matrix1.iloc[i, j] > 0 and matrix2.iloc[i, j] < 0:
                result_matrix.iloc[i, j] = 'Neg Stronger'
            elif matrix1.iloc[i, j] < 0 and matrix2.iloc[i, j] > 0:
                result_matrix.iloc[i, j] = 'Pos Stronger'
            elif 0.95 <= matrix1.iloc[i, j] / matrix2.iloc[i, j] <= 1.05:
                result_matrix.iloc[i, j] = 'UNCH'
    if heatmap: 
        category_map = {'Neg Stronger': -10, 'Neg Weaker': -5, 
                        'Pos Stronger': 10, 'Pos Weaker': 5, 
                        'UNCH': 0}
        df_numeric = result_matrix.applymap(lambda x: category_map[x])
        
        return df_numeric

    else: 
        print("Can't return a heatmap - Categ Variables of String Type")
        return result_matrix

    
    
    
def rolling_corr_difference(df, ref_date, span):
    
    assert isinstance(span, int) and span > 0, "span must be a positive integer"

    # Calculate correlation matrix for the current span
    corr_matrix_current = rolling_corr(df, ref_date, span=span)
    
    # Calculate the previous corr matrix's ref_date
    index_position = df.index.get_loc(ref_date)
    # Previous corr matrix's ref index position is max(0, index_position - span)
    temp_index_position = index_position - span 
    if temp_index_position < 0: 
        print(f'Period out of bound. Setting reference date to {df.index[0]}')
        temp_index_position = 0

    new_index_position = df.index.get_loc(df.index[temp_index_position])
    new_ref_date = df.index[new_index_position]

    # Calculate correlation matrix for the previous span
    corr_matrix_prev = rolling_corr(df, new_ref_date, span=span)
    
    # Calculate the difference between correlation matrices
    corr_diff = matrix_difference(corr_matrix_prev, corr_matrix_current)
    corr_diff_qual = matrix_difference_qual(corr_matrix_prev, corr_matrix_current)


    # Plotting heatmap
    plt.figure(figsize=(20, 20))
    colors = [(1, 0, 0), (1, 1, 0.75), (0, 0.5, 0)]  # Red to green
    cmap = sns.blend_palette(colors, as_cmap=True)
    sns.heatmap(corr_diff, annot=True, cmap=cmap, fmt=".1f", xticklabels=new_column_names, yticklabels=new_column_names)
    plt.title(f'Relative Range Percentage Change of Rolling Correlations between Assets Log Returns, {span} freq periods.')
    plt.xlabel('Assets')
    plt.ylabel('Assets')
    plt.tight_layout()
    plt.show()

    # Plotting heatmap
    plt.figure(figsize=(20, 20))
    sns.heatmap(corr_diff, annot=False, cmap='coolwarm', fmt=".2f", xticklabels=new_column_names, yticklabels=new_column_names)
    plt.title(f'Directional Difference between Rolling Correlation Matrices of Assets Log Returns, {span} freq periods.')
    plt.xlabel('Assets')
    plt.ylabel('Assets')
    plt.tight_layout()
    plt.show()
    
    return corr_diff, corr_diff_qual

In [20]:
#KAMADA KAWAI LAYOUT


def graph_net(df, ref_date, corr_threshold, span):
    corr_matrix = rolling_corr(main_df, ref_date, span)
    # Create a graph from the correlation matrix
    G = nx.Graph()

    #Ticker Categs & Coloring:
    ticker_categs = {
        'Equity': ['AAPL', 'META', 'V', 'MC.PA', 'NFLX', 'NKE', 'JPM', 'BAC', 'C', 'NVDA', 'LLY', 'TSLA'],
        'Index': ['^SPX', '^DJI', '^RUT'],
        'Credit': ['TLT', 'JNK', 'LQD'],
        'Commodities': ['IAU', 'CPER', 'GSG', 'CL=F'],
        'Crypto': ['BTC-USD', 'ETH-USD', 'XRP-USD']               
    }

    colors = {
        'Equity' : 'yellow',
        'Index' : 'blue',
        'Credit' : 'red',
        'Commodities': 'orange',
        'Crypto': 'green'
    }


    # Add edges to the graph
    for col1 in corr_matrix.columns:
        for col2 in corr_matrix.index:
            corr = corr_matrix.loc[col1, col2]
            # Add edge if absolute correlation is above the threshold and avoid duplicate edges
            if abs(corr) > x and col1 != col2:
                # Check if the edge already exists (undirected graph, so A->B is the same as B->A)
                if not G.has_edge(col1, col2) and not G.has_edge(col2, col1):
                    G.add_edge(col1, col2, weight=corr)

    # Assuming 'G' is your original graph with 'weight' attributes holding the correlations
    H = G.copy()

    # Update edge weights in H to be absolute values of the original weights
    for u, v, d in H.edges(data=True):
        d['weight'] = abs(d['weight'])


    # Assuming H is your graph for layout and G contains original correlation weights
    pos = nx.kamada_kawai_layout(H)
    # pos = nx.shell_layout(H)

    # Draw nodes
    nx.draw_networkx_nodes(G, pos, node_size=700, node_color='skyblue', alpha=0.6)

    # Separate positive and negative correlations
    positive_edges = [(u, v, d) for u, v, d in G.edges(data=True) if d['weight'] > 0]
    negative_edges = [(u, v, d) for u, v, d in G.edges(data=True) if d['weight'] < 0]

    # Define normalization based on the range of correlation values
    # This normalization will be used to map correlation values to colors
    max_positive_corr = max(abs(d['weight']) for u, v, d in positive_edges) if positive_edges else 0
    max_negative_corr = max(abs(d['weight']) for u, v, d in negative_edges) if negative_edges else 0

    # Function to get color intensity based on correlation
    def get_color_intensity(corr_value, max_corr):
        return corr_value**3 / max_corr**3 if max_corr else 0.0  # Avoid division by zero


    # Draw nodes with colors based on categories, if the node is found in the ticker_categs dictionary
    for category, nodes in ticker_categs.items():
        valid_nodes = [node for node in nodes if node in G.nodes()]
        nx.draw_networkx_nodes(G, pos,
                               nodelist=valid_nodes,
                               node_size=700,
                               node_color=colors[category],
                               alpha=0.6)

    # Draw positive edges with varying shades of green
    for u, v, d in positive_edges:
        intensity = get_color_intensity(abs(d['weight']), max_positive_corr)
        nx.draw_networkx_edges(G, pos, edgelist=[(u, v)], width=2,
                               edge_color=[(0, intensity, 0, 1)])  # RGBA tuple

    # Draw negative edges with varying shades of red
    for u, v, d in negative_edges:
        intensity = get_color_intensity(abs(d['weight']), max_negative_corr)
        nx.draw_networkx_edges(G, pos, edgelist=[(u, v)], width=2,
                               edge_color=[(intensity, 0, 0, 1)])  # RGBA tuple


    # Draw labels
    nx.draw_networkx_labels(G, pos, font_size=10)

    # Create legend patches
    import matplotlib.patches as mpatches
    legend_patches = [mpatches.Patch(color=color, label=category) for category, color in colors.items()]
    plt.legend(handles=legend_patches)

    plt.figure(figsize=(10, 10))
    plt.axis('off')
    plt.show()
    buf = io.BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    buf.seek(0)
    plt.close()  

    return buf

In [21]:
#Global Vars
x = 0.9
ref_date = '2024-02-14'
span = 5
new_column_names = ['AAPL', 'META', 'V', 'MC.PA', 'NFLX', 'NKE', 'JPM', 'BAC', 'C',
                    'NVDA', 'LLY', 'TSLA', '^SPX', '^DJI', '^RUT', 'IAU', 'CPER',
                    'GSG', 'CL=F', 'TLT', 'JNK', 'LQD', 'BTC-USD', 'ETH-USD', 'XRP-USD']

#for dash app
dates_list = main_df.index.tolist()
dash_df = main_df

In [24]:
from flask import Flask, render_template, request, send_file


app = Flask(__name__)


@app.route('/')
def index():
    available_dates = dates_list[11:]
    return render_template('index.html', available_dates=available_dates)

@app.route('/plot', methods=['POST'])
def plot():
    selected_date = request.form.get('selected_date')
    corr_threshold = 0.5  # Adjust as needed
    span = 5  # Fixed span
    
    plot_buf = graph_net(dash_df, ref_date, corr_threshold, span)
    
    return send_file(plot_buf, mimetype='image/png')

if __name__ == '__main__':
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


Address already in use
Port 5000 is in use by another program. Either identify and stop that program, or start the server with a different port.
On macOS, try disabling the 'AirPlay Receiver' service from System Preferences -> Sharing.


AssertionError: 