In [None]:
import pandas as pd
import numpy as np
import psycopg2

conn = psycopg2.connect(
    dbname = 'newyork',
    user = '',
    password = '',
    host = 'localhost',
    port = '5432'
)

best_nodes = ['ELLENBURG_WT_PWR', 'CHATEAUG_WT_PWR', 'CLINTON_WT_PWR', 'NORTH___COUNTRY_ESR', 'JERICHO_RISE_WT_PWR', 
              'CHAT_HIGH_FALL_HYD', 'NEG NORTH_KES_CHATEGAY', 'CHATEAUG_35_KV_LOAD', 'KNTFSRNC_46_KV_46KV_LOAD', 
              'NEG NORTH___LWR_SARANAC', 'NEG NORTH___ALICE_FALLS', 'NEG NORTH___PLATTSBURG', 'CLINTON___LFGE', 
              'NEG NORTH_FLCN_SEA', 'PLATSBRG_115KV_PMLD1', 'MARBLE_RIVER_WT_PWR', 'V_XM_10_SYNC_DSASP', 
              'FALCON___SEABRD_CC1', 'V_CMP_10_SYNC_DSASP', 'FALCON___SEABRD_CC2', 'ALTONA_WT_PWR']

# There are 58'943'742 elements in the file, so roughly 80k points per node
query = f"SELECT * FROM realtime_lbmp WHERE node IN %s"
df = pd.read_sql_query(query, conn, params=(tuple(best_nodes),))
conn.close()

df.head()

In [None]:
min_length = 1e9
for nodei in best_nodes:
    df_nodei = df[df.node==nodei]
    if len(df_nodei) < min_length:
        min_length = len(df_nodei)

# Crop to shortest list
prices = {}
for nodei in best_nodes:
    prices[nodei] = list(df[df.node==nodei].price[:min_length])

for key, value in prices.items():
    print(f'{key}: {value[:10]}...')

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

correlation_matrix = pd.DataFrame(prices).corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".1f")
plt.title('Correlation matrix')
plt.show()