# Discovery and characterization of operating modes in a fleet of assets via hypergraph clustering

In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans,DBSCAN,AgglomerativeClustering
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import MinMaxScaler
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.decomposition import PCA

from sklearn.manifold import TSNE
import umap

import sys
import cluster_validation_metrics as cvm

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')
import glob
import seaborn as sns

import sweetviz as sv

import os
import kahypar as kahypar
from scipy.sparse import csr_matrix

## Data
- On shore windfarm in Scotland- 14 wind turbines with 2MW rated powe
- 5 years of SCADA data with 10 minutes sampling rate


In [None]:
data_path = os.path.join('../app/data/Penmanshiel/')

In [None]:
def read_data_penmanshiel(turbine_number):
    
    local_file_scada = os.path.join(data_path, f'scada_T{turbine_number:02d}.csv')
    df_scada = pd.read_csv(local_file_scada)
    df_scada = df_scada.set_index('Datetime',drop=True)
    
    local_file_logs = os.path.join(data_path, f'logs_T{turbine_number:02d}.csv')
    df_logs = pd.read_csv(local_file_logs)

    return df_scada, df_logs

In [None]:
df_penmanshiel=pd.DataFrame()
for t_id in [1,2,4,5,6,7,8,9,10,11,12,13,14,15]:
    print(t_id)
    df_scada, _ = read_data_penmanshiel(t_id)
    df_scada = df_scada[(df_scada.index>="2018-01-01 00:00:00") & (df_scada.index<="2022-12-31 23:50:00")].copy()
    df_scada.reset_index(inplace=True)
    df_scada["Turbine"] = f'T{t_id:02d}'
    df_penmanshiel=pd.concat([df_penmanshiel,df_scada])

In [None]:
df_penmanshiel['Datetime']=pd.to_datetime(df_penmanshiel['Datetime'])

In [None]:
# df_penmanshiel.to_csv("penmanshiel_preprocessed.csv")

In [None]:
# df_penmanshiel=pd.read_csv("penmanshiel_preprocessed.csv")
# df_penmanshiel['Datetime']=pd.to_datetime(df_penmanshiel['Datetime'])

In [None]:
df_penmanshiel_filtered = df_penmanshiel.copy()

### Feature Engineering
- Motor current = Mean (Motor current axis 1,axis 2,axis 3)
- Motor temperature = Mean (Motot temperature axis 1,axis 2,axis 3)
- Blade angle = Mean (Blade angle A, B, C)

In [None]:
df_penmanshiel_filtered["Motor temperature (°C)"] = df_penmanshiel_filtered[['Temperature motor axis 1 (°C)',
                                          'Temperature motor axis 2 (°C)',
                                          'Temperature motor axis 3 (°C)']].mean(axis=1)

df_penmanshiel_filtered["Motor current (A)"] = df_penmanshiel_filtered[['Motor current axis 1 (A)',
                                          'Motor current axis 2 (A)',
                                          'Motor current axis 3 (A)',]].mean(axis=1)

temp = df_penmanshiel_filtered[['Blade angle (pitch position) A (°)','Blade angle (pitch position) B (°)','Blade angle (pitch position) C (°)']].apply(np.radians)
# Convert angular values to Cartesian coordinates
x = temp.apply(lambda row: np.cos(row), axis=1)
y = temp.apply(lambda row: np.sin(row), axis=1)
# Average the Cartesian coordinates
mean_x = x.mean(axis=1)
mean_y = y.mean(axis=1)
# Convert the average Cartesian coordinates back to an angle in radians
mean_angle_radians = np.arctan2(mean_y, mean_x)
df_penmanshiel_filtered['Blade Angle (pitch position) (°)'] = np.degrees(mean_angle_radians)

In [None]:
df_penmanshiel_filtered[['Temperature motor axis 1 (°C)','Temperature motor axis 2 (°C)','Temperature motor axis 3 (°C)',"Motor temperature (°C)",
                        'Motor current axis 1 (A)','Motor current axis 2 (A)','Motor current axis 3 (A)',"Motor current (A)",
                        'Blade angle (pitch position) A (°)','Blade angle (pitch position) B (°)','Blade angle (pitch position) C (°)','Blade Angle (pitch position) (°)']].hist(figsize=(12,12))
plt.show()

##### Convert Angular features to its sine and cosine components

In [None]:
for col in ['Nacelle position (°)','Vane position 1+2 (°)','Blade Angle (pitch position) (°)']:
    df_penmanshiel_filtered[col.split("(°)")[0]+"cos"] = np.cos(np.radians(df_penmanshiel_filtered[col]))
    df_penmanshiel_filtered[col.split("(°)")[0]+"sin"] = np.sin(np.radians(df_penmanshiel_filtered[col]))

##### Convert Wind direction and wind speed to u and v components

In [None]:
def convert_angles_lengths_to_u_v(angles, lengths, conversion='trigonometric', kind='deg'):
    u = -np.sin(angles * np.pi / 180) * lengths
    v = -np.cos(angles * np.pi / 180) * lengths
    return u, v

In [None]:
df_penmanshiel_filtered["Wind direction u"],df_penmanshiel_filtered["Wind direction v"] = convert_angles_lengths_to_u_v(df_penmanshiel_filtered['Wind direction (°)'],df_penmanshiel_filtered['Wind speed (m/s)'])

In [None]:
df_penmanshiel_final=df_penmanshiel_filtered[['Datetime','Turbine',
'Long Term Wind (m/s)','Wind direction u','Wind direction v',
 'Nacelle position cos', 'Nacelle position sin','Vane position 1+2 cos', 'Vane position 1+2 sin','Blade Angle (pitch position) cos','Blade Angle (pitch position) sin',
 'Generator bearing rear temperature (°C)','Generator bearing front temperature (°C)','Generator RPM (RPM)','Rotor bearing temp (°C)','Drive train acceleration (mm/ss)',
 'Gear oil temperature (°C)','Gear oil inlet temperature (°C)','Gear oil pump pressure (bar)',
 'Motor temperature (°C)','Motor current (A)',
 'Tower Acceleration X (mm/ss)','Tower Acceleration y (mm/ss)','Power (kW)']].copy()

In [None]:
df_penmanshiel_final.drop(["Datetime","Turbine",'Power (kW)'],axis=1).corr().stack().value_counts().sort_index(ascending=False)

Correlation between features is between -0.88 to 0.82

In [None]:
df_penmanshiel_final.drop(['Turbine','Datetime'],axis=1).hist(figsize=(20,20))
plt.show()

## Atomic asset behaviour extraction
- Divide data into weeks
- Calculate median per week per turbine for each feature

In [None]:
df_penmanshiel_per_week_median = df_penmanshiel_final.groupby([pd.Grouper(key='Datetime', freq='7D'),'Turbine']).median()

In [None]:
df_penmanshiel_per_week_median["Datetime"]=df_penmanshiel_per_week_median.index.get_level_values(0)
df_penmanshiel_per_week_median["week"]=df_penmanshiel_per_week_median.index.get_level_values(0)
df_penmanshiel_per_week_median["week"]=pd.factorize(df_penmanshiel_per_week_median['week'])[0]
df_penmanshiel_per_week_median["week"]=df_penmanshiel_per_week_median["week"]+1

df_penmanshiel_per_week_median["Turbine"]=df_penmanshiel_per_week_median.index.get_level_values(1)

df_penmanshiel_per_week_median["week-turbine"]="W"+df_penmanshiel_per_week_median["week"].astype(str)+":"+df_penmanshiel_per_week_median["Turbine"]

In [None]:
df_penmanshiel_per_week_median.set_index(["week-turbine"],inplace=True)

In [None]:
df_penmanshiel_per_week_median.drop(['week', 'Turbine','Datetime'],axis=1).hist(figsize=(20,20))
plt.show()

In [None]:
fig=px.line(df_penmanshiel_per_week_median.drop(['Datetime', 'week', 'Turbine','Power (kW)'],axis=1))
fig.show()

## Discovery of Elementary modes

### Layering

In [None]:
df_penmanshiel_per_week_median=df_penmanshiel_per_week_median[df_penmanshiel_per_week_median["Datetime"].astype(str).str.contains("2018")].copy()

In [None]:
# creating layers
Layer1 = df_penmanshiel_per_week_median.reset_index()[['week-turbine','Wind direction u','Wind direction v']].dropna().copy()

Layer2 = df_penmanshiel_per_week_median.reset_index()[['week-turbine','Nacelle position cos', 'Nacelle position sin','Vane position 1+2 cos', 'Vane position 1+2 sin','Blade Angle (pitch position) cos','Blade Angle (pitch position) sin']].dropna().copy()

Layer3 = df_penmanshiel_per_week_median.reset_index()[['week-turbine','Generator bearing rear temperature (°C)','Generator bearing front temperature (°C)','Generator RPM (RPM)','Rotor bearing temp (°C)','Drive train acceleration (mm/ss)']].dropna().copy()

Layer4 = df_penmanshiel_per_week_median.reset_index()[['week-turbine','Gear oil temperature (°C)','Gear oil inlet temperature (°C)','Gear oil pump pressure (bar)']].dropna().copy()

Layer5 = df_penmanshiel_per_week_median.reset_index()[['week-turbine','Motor temperature (°C)','Motor current (A)']].dropna().copy()

Layer6 = df_penmanshiel_per_week_median.reset_index()[['week-turbine','Tower Acceleration X (mm/ss)','Tower Acceleration y (mm/ss)']].dropna().copy()

In [None]:
layer_data=[Layer1,Layer2,Layer3,Layer4,Layer5,Layer6]
layer_dict={}

for l_id in range(len(layer_data)): # Specify number of layers
    layer_dict['Layer '+str(l_id+1)] = {}
    layer_dict['Layer '+str(l_id+1)]["Layer_data"] = layer_data[l_id]
    
    scaler = MinMaxScaler()
    layer_data_transformed = scaler.fit_transform(layer_data[l_id].drop(["week-turbine"],axis=1).to_numpy())
    layer_dict['Layer '+str(l_id+1)]["Data_transformed"] = layer_data_transformed
    layer_dict['Layer '+str(l_id+1)]["Clustering_result"] = cvm.find_optimal_number_of_clusters(layer_data_transformed, algorithm=AgglomerativeClustering, display= True, seed= 0, col_wrap=4 ,score_metrics = ['silhouette_score',
                             'calinski_harabasz_score',
                             'davies_bouldin_score',
                             'connectivity_score'], max_number_clusters=15)

#### Layer1

In [None]:
Layer1.drop(['week-turbine'],axis=1).hist()
plt.show()

In [None]:
tsne_layer1 = TSNE(n_components=2).fit_transform(layer_dict["Layer 1"]["Data_transformed"])
pca_layer1 = PCA(n_components=2).fit_transform(layer_dict["Layer 1"]["Data_transformed"])
umap_layer1 = umap.UMAP().fit_transform(layer_dict["Layer 1"]["Data_transformed"])

In [None]:
db = DBSCAN(eps=0.1).fit(layer_dict["Layer 1"]["Data_transformed"])
fig=px.scatter(Layer1,x=Layer1[Layer1.columns[1]],y=Layer1[Layer1.columns[2]],color=db.labels_.astype(str),hover_data=["week-turbine"],title="Original")
fig.show()
fig=px.scatter(x=tsne_layer1[:, 0],y=tsne_layer1[:, 1],color=db.labels_.astype(str),title="TSNE")
fig.show()
fig=px.scatter(x=pca_layer1[:, 0],y=pca_layer1[:, 1],color=db.labels_.astype(str),title="PCA (" +str(np.round(np.sum(PCA(n_components=2).fit(layer_dict["Layer 1"]["Data_transformed"]).explained_variance_ratio_),2)) +")")
fig.show()
fig=px.scatter(x=umap_layer1[:, 0],y=umap_layer1[:, 1],color=db.labels_.astype(str),title="UMAP")
fig.show()

In [None]:
db = DBSCAN(eps=0.1).fit(layer_dict["Layer 1"]["Data_transformed"])
Layer1["cluster"]=layer_dict["Layer 1"]["Layer_data"]["cluster"]=db.labels_

#### Layer2

In [None]:
Layer2.drop(['week-turbine'],axis=1).hist(figsize=(10,10))
plt.show()

In [None]:
tsne_layer2= TSNE(n_components=2).fit_transform(layer_dict["Layer 2"]["Data_transformed"])
pca_layer2= PCA(n_components=6).fit_transform(layer_dict["Layer 2"]["Data_transformed"])
umap_layer2 = umap.UMAP().fit_transform(layer_dict["Layer 2"]["Data_transformed"])

In [None]:
db = DBSCAN(eps=0.15).fit(layer_dict["Layer 2"]["Data_transformed"])
fig=px.scatter(Layer2,x="Nacelle position cos", y='Vane position 1+2 cos',color=db.labels_.astype(str),hover_data="week-turbine",title="Original")
fig.show()
fig=px.scatter(x=tsne_layer2[:, 0],y=tsne_layer2[:, 1],color=db.labels_.astype(str),title="TSNE")
fig.show()
fig=px.scatter(x=pca_layer2[:, 0],y=pca_layer2[:, 1],color=db.labels_.astype(str),title="PCA (" +str(np.round(np.sum(PCA(n_components=2).fit(layer_dict["Layer 2"]["Data_transformed"]).explained_variance_ratio_),2)) +")")
fig.show()
fig=px.scatter(x=umap_layer2[:, 0],y=umap_layer2[:, 1],color=db.labels_.astype(str),title="UMAP")
fig.show()

In [None]:
db = DBSCAN(eps=0.15).fit(layer_dict["Layer 2"]["Data_transformed"])
Layer2["cluster"]=layer_dict["Layer 2"]["Layer_data"]["cluster"]=db.labels_

#### Layer3

In [None]:
Layer3.drop(['week-turbine'],axis=1).hist(figsize=(10,10))
plt.show()

In [None]:
tsne_layer3= TSNE(n_components=2).fit_transform(layer_dict["Layer 3"]["Data_transformed"])
pca_layer3= PCA(n_components=2).fit_transform(layer_dict["Layer 3"]["Data_transformed"])
umap_layer3 = umap.UMAP().fit_transform(layer_dict["Layer 3"]["Data_transformed"])

In [None]:
db = DBSCAN(eps=0.1).fit(layer_dict["Layer 3"]["Data_transformed"])
fig = px.scatter(Layer3,x="Generator RPM (RPM)", y='Generator bearing rear temperature (°C)',color=db.labels_.astype(str),title="Original")
fig.show()
fig=px.scatter(x=tsne_layer3[:, 0],y=tsne_layer3[:, 1],color=db.labels_.astype(str),title="TSNE")
fig.show()
fig=px.scatter(x=pca_layer3[:, 0],y=pca_layer3[:, 1],color=db.labels_.astype(str),title="PCA (" +str(np.round(np.sum(PCA(n_components=2).fit(layer_dict["Layer 3"]["Data_transformed"]).explained_variance_ratio_),2)) +")")
fig.show()
fig=px.scatter(x=umap_layer3[:, 0],y=umap_layer3[:, 1],color=db.labels_.astype(str),title="UMAP")
fig.show()

In [None]:
db = DBSCAN(eps=0.1).fit(layer_dict["Layer 3"]["Data_transformed"])
Layer3["cluster"]=layer_dict["Layer 3"]["Layer_data"]["cluster"]=db.labels_

#### Layer4

In [None]:
Layer4.drop(['week-turbine'],axis=1).hist()
plt.show()

In [None]:
tsne_layer4= TSNE(n_components=2).fit_transform(layer_dict["Layer 4"]["Data_transformed"])
pca_layer4= PCA(n_components=2).fit_transform(layer_dict["Layer 4"]["Data_transformed"])
umap_layer4 = umap.UMAP().fit_transform(layer_dict["Layer 4"]["Data_transformed"])

In [None]:
db = DBSCAN(eps=0.15).fit(layer_dict["Layer 4"]["Data_transformed"])
fig = px.scatter(Layer4,x="Gear oil pump pressure (bar)", y='Gear oil inlet temperature (°C)',color=db.labels_.astype(str),hover_data="week-turbine",title="Original")
fig.show()
fig=px.scatter(x=tsne_layer4[:, 0],y=tsne_layer4[:, 1],color=db.labels_.astype(str),title="TSNE")
fig.show()
fig=px.scatter(x=pca_layer4[:, 0],y=pca_layer4[:, 1],color=db.labels_.astype(str),title="PCA (" +str(np.round(np.sum(PCA(n_components=2).fit(layer_dict["Layer 4"]["Data_transformed"]).explained_variance_ratio_),2)) +")")
fig.show()
fig=px.scatter(x=umap_layer4[:, 0],y=umap_layer4[:, 1],color=db.labels_.astype(str),title="UMAP")
fig.show()

In [None]:
db = DBSCAN(eps=0.15).fit(layer_dict["Layer 4"]["Data_transformed"])
Layer4["cluster"]=layer_dict["Layer 4"]["Layer_data"]["cluster"]=db.labels_

#### Layer5

In [None]:
Layer5.drop(['week-turbine'],axis=1).hist()
plt.show()

In [None]:
tsne_layer5 = TSNE(n_components=2).fit_transform(layer_dict["Layer 5"]["Data_transformed"])
pca_layer5 = PCA(n_components=2).fit_transform(layer_dict["Layer 5"]["Data_transformed"])
umap_layer5 = umap.UMAP().fit_transform(layer_dict["Layer 5"]["Data_transformed"])

In [None]:
# db = DBSCAN(eps=0.025).fit(layer_dict["Layer 5"]["Data_transformed"])
# fig=px.scatter(Layer5,x=Layer5["Motor temperature (°C)"],y=Layer5["Motor current (A)"],hover_data="week-turbine",color=db.labels_.astype(str)
#                ,width=800,height=800)
# fig.update_layout(legend=dict(
#         title=dict(
#             text='Cluster',
#             font=dict(size=20)
#         )
#     ), xaxis=dict(
#         titlefont=dict(size=20)
#     ),
#     yaxis=dict(
#         titlefont=dict(size=24) 
#     ))
# fig.update_xaxes(tickfont=dict(size=20))
# fig.update_yaxes(tickfont=dict(size=20))
# fig.show()

In [None]:
db = DBSCAN(eps=0.1).fit(layer_dict["Layer 5"]["Data_transformed"])
fig=px.scatter(Layer5,x=Layer5["Motor temperature (°C)"],y=Layer5["Motor current (A)"],hover_data="week-turbine",color=db.labels_.astype(str),title="Original")
fig.show()
fig=px.scatter(x=tsne_layer5[:, 0],y=tsne_layer5[:, 1],color=db.labels_.astype(str),title="TSNE")
fig.show()
fig=px.scatter(x=pca_layer5[:, 0],y=pca_layer5[:, 1],color=db.labels_.astype(str),title="PCA(1.0)")
fig.show()
fig=px.scatter(x=umap_layer5[:, 0],y=umap_layer5[:, 1],color=db.labels_.astype(str),title="UMAP")
fig.show()

In [None]:
db = DBSCAN(eps=0.1).fit(layer_dict["Layer 5"]["Data_transformed"])
Layer5["cluster"]=layer_dict["Layer 5"]["Layer_data"]["cluster"]=db.labels_

#### Layer6

In [None]:
Layer6.drop(['week-turbine'],axis=1).hist(figsize=(7,2))
plt.show()

In [None]:
tsne_layer6 = TSNE(n_components=2).fit_transform(layer_dict["Layer 6"]["Data_transformed"])
pca_layer6 = PCA(n_components=2).fit_transform(layer_dict["Layer 6"]["Data_transformed"])
umap_layer6 = umap.UMAP().fit_transform(layer_dict["Layer 6"]["Data_transformed"])

In [None]:
db = DBSCAN(eps=0.05).fit(layer_dict["Layer 6"]["Data_transformed"])
fig=px.scatter(Layer6,x=Layer6["Tower Acceleration X (mm/ss)"],y=Layer6["Tower Acceleration y (mm/ss)"],hover_data="week-turbine",color=db.labels_.astype(str),title="Original")
fig.show()
fig=px.scatter(x=tsne_layer6[:, 0],y=tsne_layer6[:, 1],color=db.labels_.astype(str),title="TSNE")
fig.show()
fig=px.scatter(x=pca_layer6[:, 0],y=pca_layer6[:, 1],color=db.labels_.astype(str),title="PCA (1.0)")
fig.show()
fig=px.scatter(x=umap_layer6[:, 0],y=umap_layer6[:, 1],color=db.labels_.astype(str),title="UMAP")
fig.show()

In [None]:
db = DBSCAN(eps=0.05).fit(layer_dict["Layer 6"]["Data_transformed"])
Layer6["cluster"]=layer_dict["Layer 6"]["Layer_data"]["cluster"]=db.labels_

#### Drop outliers

In [None]:
layer_data=[Layer1,Layer2,Layer3,Layer4,Layer5,Layer6]
for l_id, key in enumerate(layer_dict.keys()):
    temp=layer_dict[key]["Layer_data"]
    layer_dict[key]["Layer_data"]=temp[temp["cluster"]!=-1]
    layer_data[l_id]=temp[temp["cluster"]!=-1]

Layer1=Layer1[Layer1["cluster"]!=-1]
Layer2=Layer2[Layer2["cluster"]!=-1]
Layer3=Layer3[Layer3["cluster"]!=-1]
Layer4=Layer4[Layer4["cluster"]!=-1]
Layer5=Layer5[Layer5["cluster"]!=-1]
Layer6=Layer6[Layer6["cluster"]!=-1]

In [None]:
for key in layer_dict.keys():
    print(key+"--->"+str(len(np.unique(layer_dict[key]["Layer_data"]["cluster"])))+" clusters")

### Hypergraph

In [None]:
# creating a nested list, where each inner list lists the ids in that cluster. 
lst = [v for v in Layer1.groupby('cluster')['week-turbine'].apply(list).values]
lst = lst + [v for v in Layer2.groupby('cluster')['week-turbine'].apply(list).values]
lst = lst + [v for v in Layer3.groupby('cluster')['week-turbine'].apply(list).values]
lst = lst + [v for v in Layer4.groupby('cluster')['week-turbine'].apply(list).values]
lst = lst + [v for v in Layer5.groupby('cluster')['week-turbine'].apply(list).values]
lst = lst + [v for v in Layer6.groupby('cluster')['week-turbine'].apply(list).values]

print("Number of clusters in total", len(lst))
print("----------------------------------------------------")

hyperedge_indices = []
cnt = 0
hyperedge_indices.append(cnt)
for each in lst:
    cnt = cnt+len(each)
    hyperedge_indices.append(cnt)

print("printing hyper edge indices", hyperedge_indices)
print("-----------------------------------------------------")

nodes_hyper = df_penmanshiel_per_week_median.index.tolist()

hyperedges_1 = [item for sublist in lst for item in sublist]
hyperedges = [nodes_hyper.index(i) for i in hyperedges_1]
print("Hyper edges: ",hyperedges)
print("-----------------------------------------------------")

num_nodes = len(nodes_hyper)
num_nets = len(hyperedge_indices)-1

print("Number of Nodes: ",num_nodes)
print("Number of Nets: ",num_nets)

In [None]:
k = 2
# edge and node weight are not considered as they are not relavent in our case.
# k values doesnot have a significance as we are not partitioning the hypergraph
# using the algorithm.
hypergraph = kahypar.Hypergraph(num_nodes, num_nets, hyperedge_indices, hyperedges, k)
context = kahypar.Context()
# path needs to be changed based on the location of file.
context.loadINIconfiguration("cut_kKaHyPar_sea20.ini")

print("number of edges of hyper-graph", hypergraph.numEdges())
print("number of nodes of hyper-graph", hypergraph.numNodes())
print("number of pins of hyper-graph", hypergraph.numPins())

# for i in hypergraph.nodes():
#     print(hypergraph.nodeDegree(i)) # degree of each node.

#### Partitioning the hypergraph using NNS, initially identifying incident edges of each node.

In [None]:
node_incident_edges = [] # list containg the incident edges of each node in sublist.
for each_node in hypergraph.nodes():
    ie = []
    for incident_edge in hypergraph.incidentEdges(each_node):
        ie.append(incident_edge)
    node_incident_edges.append(ie)

##### Cluster with single element is not considered as hyperedge

In [None]:
# Clusters list
print("original number of clusters: ",len(lst))
cluster_list = [sublist for sublist in lst if len(sublist) > 1]
print("Clusters with respect to edges: ",len(cluster_list))

In [None]:
#Obtaining the neighbourhood of each edge.
total_edges = hypergraph.numEdges()

neighbourhood = [] # neighbourhood of each edge is presented in order.
number_neighbours = []
for i in range(total_edges):
    temp = []
    for node, edges in enumerate(node_incident_edges):
        if i in edges:
            for e in edges:
                temp.append(e)
    neighbourhood.append(set(temp))
    number_neighbours.append(len(set(temp)))
# print("neighbourhood", neighbourhood)
print("number of neighbours", number_neighbours)

In [None]:
neighbourhood

In [None]:
layer_list_temp=[]
for ind,key in enumerate([Layer1,Layer2,Layer3,Layer4,Layer5,Layer6]):
    for cls in range(key["cluster"].nunique()):
        layer_list_temp.append("Layer"+str(ind+1)+"- cluster"+str(cls+1))

In [None]:
# Initialize a matrix of zeros with 14 indices and 14 sets
presence_matrix = np.zeros((len(neighbourhood), len(neighbourhood)))

# Update the matrix to indicate presence of each index in each set
for i, set_ in enumerate(neighbourhood):
    for index in set_:
        presence_matrix[index][i] = 1
        
# Create the figure
fig = go.Figure()

# Add one bar for each index
for i in range(len(neighbourhood)):
    fig.add_trace(go.Bar(
        x=layer_list_temp,
        y=presence_matrix[:, i],
        name=layer_list_temp[i]
    ))

# Update the layout for a stacked bar chart
fig.update_layout(
    barmode='stack',
    title="Neighbours",
    yaxis_title='Presence Count',
    legend_title='HyperEdge'
)

fig.show()

In [None]:
from sklearn.metrics import adjusted_mutual_info_score,adjusted_rand_score
l_id=[1,2,3,4,5,6]
l_name=["External Wind","Wind positioning(internal)","Drive train","Gear","Motor","Tower"]
layer_data_new=[Layer1,Layer2,Layer3,Layer4,Layer5,Layer6]
df_ami=pd.DataFrame(index=l_name,columns=l_name)
for id,name in enumerate(l_name):
    for subid,subname in enumerate(l_name):
        common_points=np.intersect1d(layer_data_new[id-1]["week-turbine"].values,layer_data_new[subid-1]["week-turbine"].values)
        df_ami.loc[name,subname]=np.round(adjusted_mutual_info_score(layer_data_new[id-1][layer_data_new[id-1]["week-turbine"].isin(common_points)]["cluster"],layer_data_new[subid-1][layer_data_new[subid-1]["week-turbine"].isin(common_points)]["cluster"]),2)
        
fig=px.imshow(df_ami,text_auto=True)
fig.show()

In [None]:
# calculating Nearest Neighbourhood Similarity
NNS = {}
for i in range(total_edges):
    for j in range(i, total_edges):
        if i != j:
            # print("checking intersection of ", i, "and", j, "i.e.,", neighbourhood[i], "and", neighbourhood[j])
            intersection = neighbourhood[i].intersection(neighbourhood[j])
            if (i not in intersection) or (j not in intersection): # or condition is not required. if i is in intersection then automatically j will be in the intersection. 
                # print(i, j, intersection, neighbourhood[i], neighbourhood[j])
                NNS[(i,j)] = 0
            else:
                union = neighbourhood[i].union(neighbourhood[j])
                NNS[(i,j)] = len(intersection)/len(union)
            # print(union)

print(len(NNS))

In [None]:
cnt=0
df_snns=pd.DataFrame(index=np.arange(0,total_edges,1),columns=np.arange(0,total_edges,1))
for i in np.arange(0,total_edges,1):
    for j in np.arange(0,total_edges,1):
        if (i,j) in NNS:
            cnt=cnt+1
            df_snns.loc[i,j]=NNS[i,j]

In [None]:
df_snns

- SImilarity score
    - 0 means those edges donot overlap directly
    - 1 means those edges overlap completely?

In [None]:
# converting the similarity matrix into distance matrix.
NNS_dist = {k: 1-v for k, v in NNS.items()}

In [None]:
# obtaining the distance matrix
array_dist = []
for i in range(total_edges):
    temp = []
    for j in range(total_edges):
        if i != j:
            temp.append(NNS_dist[(min(i, j), max(i,j))])
        else:
            temp.append(0)
    array_dist.append(temp)
# print(array)   

In [None]:
df_dist=pd.DataFrame(index=np.arange(0,total_edges,1),columns=np.arange(0,total_edges,1))
for i in np.arange(0,total_edges,1):
    for j in np.arange(0,total_edges,1):
        # if (i<=j):
        df_dist.loc[i,j]=np.round(array_dist[i][j],2)

In [None]:
temp=df_dist.copy()
temp.index=temp.index.astype(str)
temp.columns=temp.columns.astype(str)
fig=px.imshow(temp,text_auto=True)
fig.update_layout(height=700,width=700)
fig.show()

- More similar (more overlapping) means less distant
- Distance metric
    - distance 1 means no overlap
    - distance 0 means full overlap

In [None]:
df_dist.sum(axis=1)

In [None]:
# Similary matrix
df_sim=1-df_dist
df_sim

#### KMedoids

In [None]:
# Clustering using KMedoids, and calculated distance matrix based on NNS
from sklearn_extra.cluster import KMedoids

silhouette_score = []
labels_kmedoids = []
for num_clusters in range(2, total_edges):
    kmedoids_ = KMedoids(n_clusters=num_clusters, metric='precomputed', method='pam', init='k-medoids++', random_state=0).fit_predict(np.array(array_dist))
    # print(kmedoids_)
    hy_sorted_cluster = []
    for _ in range(num_clusters):
        hy_sorted_cluster.extend(np.where(kmedoids_ == _)[0].tolist())

    heat_map = []
    for i in hy_sorted_cluster:
        temp = []
        for j in hy_sorted_cluster:
            if i != j:
                temp.append(NNS_dist[(min(i, j), max(i,j))])
            else:
                temp.append(0)
        heat_map.append(temp)
        

    silhouette_score.append(metrics.silhouette_score(array_dist, kmedoids_, metric="precomputed"))
    labels_kmedoids.append(kmedoids_)

    index_heatmap = kmedoids_.copy()
    index_heatmap.sort()
    dataframe_heat_map = pd.DataFrame(heat_map, index=index_heatmap, columns=index_heatmap)

In [None]:
i_ = 2
for each in silhouette_score:
    print(i_, each)
    i_ = i_+1

In [None]:
plt.plot(range(2,total_edges), silhouette_score)
plt.ylabel("silhouette score")
plt.xlabel("number of clusters")

#### KMeans

In [None]:
cvm.find_optimal_number_of_clusters(array_dist, algorithm=KMeans, display= True, seed= 0, col_wrap=4 ,score_metrics = ['silhouette_score',
                             'calinski_harabasz_score',
                             'davies_bouldin_score',
                             'connectivity_score'], max_number_clusters=total_edges-1)

#### Agglomerative Clustering

In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage
Z = linkage(np.array(array_dist), method='ward')

# Plot the dendrogram
plt.figure(figsize=(10, 5))
dendrogram(Z)
plt.xlabel('Sample Index')
plt.ylabel('Distance')
plt.show()

In [None]:
cvm.find_optimal_number_of_clusters(array_dist, algorithm=AgglomerativeClustering, display= True, seed= 0, col_wrap=4 ,score_metrics = ['silhouette_score',
                             'calinski_harabasz_score',
                             'davies_bouldin_score',
                             'connectivity_score'], max_number_clusters=total_edges-1)

### KMedoids
Based on best silhouette score, KMedoids is chosen

In [None]:
kmeans = KMedoids(n_clusters=5, metric='precomputed', method='pam', init='k-medoids++', random_state=0).fit_predict(np.array(array_dist))
dict_clusters = {0: [], 1:[], 2:[],3:[],4:[]}
for index, each in enumerate(kmeans):
    dict_clusters[each].append(index)
print(dict_clusters)

In [None]:
def generate_final_clusters(final_clusters1, hypergraph1, method):
    # mapping hyperedges to data objects to obtain the clustering solution of data objects
    temp_del = 0
    clustering_nodes = {}
    for key, val in final_clusters1.items():
        if method == 'donot_inc_key_in_cluster':
            pins_center = []
        elif method == "inc_key_in_cluster":
            pins_center = list(hypergraph1.pins(key))
        
        for _ in val:
            pins_center.extend(list(hypergraph1.pins(_)))
        clustering_nodes[key] = set(pins_center)
        temp_del = temp_del + len(set(pins_center))

    if Debug == True:
        print("clustering of data objects", clustering_nodes) # dict, key = center(hyperedge), values = data objects
    
    # replacing the index of the data object with its short id
    clus_nodes_short_id = {}
    for key, val in clustering_nodes.items():
        # print(val)
        clus_nodes_short_id[key] = {nodes_hyper[x] for x in val} # note that sets are not ordered

    if Debug == True:
        print("clustering solution, key = center (hyperedge), val = set of short_ids")
        print(clus_nodes_short_id)
    return clus_nodes_short_id

In [None]:
Debug = False
clus_sol = generate_final_clusters(dict_clusters, hypergraph, 'donot_inc_key_in_cluster')
# print(clus_sol) # clus_sol is the final clustering solution based on k-medoids based method. key: cluster number, value: data objects in cluster

In [None]:
cluster_objects_list=[list(value) for value in clus_sol.values()]

In [None]:
for key in clus_sol.keys():
    print(str(key) +" : "+str(len(list(clus_sol[key]))))

In [None]:
fig=go.Figure()
for key in clus_sol.keys():
    temp=df_penmanshiel_per_week_median.reset_index().copy()
    temp=temp[temp["week-turbine"].isin(list(clus_sol[key]))].copy()

    fig.add_trace(go.Box(y=temp['Power (kW)'], name='E'+str(key)))

fig.update_layout(title="Active power")
# fig.update_xaxes(title="Cluster")
fig.update_yaxes(title="Power (kW)")
fig.show()

In [None]:
for col in ['Long Term Wind (m/s)', 'Wind direction u', 'Wind direction v',
       'Nacelle position cos', 'Nacelle position sin', 'Vane position 1+2 cos',
       'Vane position 1+2 sin', 'Blade Angle (pitch position) cos',
       'Blade Angle (pitch position) sin',
       'Generator bearing rear temperature (°C)',
       'Generator bearing front temperature (°C)', 'Generator RPM (RPM)',
       'Rotor bearing temp (°C)', 'Drive train acceleration (mm/ss)',
       'Gear oil temperature (°C)', 'Gear oil inlet temperature (°C)',
       'Gear oil pump pressure (bar)', 'Motor temperature (°C)',
       'Motor current (A)', 'Tower Acceleration X (mm/ss)',
       'Tower Acceleration y (mm/ss)']:
    fig=go.Figure()
    for key in clus_sol.keys():
        temp=df_penmanshiel_per_week_median.reset_index().copy()
        temp=temp[temp["week-turbine"].isin(list(clus_sol[key]))].copy()
        fig.add_trace(go.Box(y=temp[col], name=str(key)))
    
    fig.update_layout(title=col)
    fig.show()

### Cluster wise analysis

In [None]:
for key in clus_sol.keys():
    print("Cluster: ",key)
    print(str(len(list(clus_sol[key]))) +" turbine-week pairs")
    temp=df_penmanshiel_per_week_median.loc[list(clus_sol[key])]
    print("Number of turbines: ",temp["Turbine"].nunique())
    print("Number of unique weeks: ",temp["week"].nunique())
          
    fig=px.bar(temp["Turbine"].values,color=temp["week"].astype(str).values)
    fig.update_xaxes(title="Turbine")
    fig.update_layout(legend_title_text="Week")
    fig.show()

In [None]:
df_clus_stats=pd.DataFrame(index=clus_sol.keys(),columns=['Wind direction u', 'Wind direction v',
       'Nacelle position cos', 'Nacelle position sin', 'Vane position 1+2 cos',
       'Vane position 1+2 sin', 'Blade Angle (pitch position) cos',
       'Blade Angle (pitch position) sin',
       'Generator bearing rear temperature (°C)',
       'Generator bearing front temperature (°C)', 'Generator RPM (RPM)',
       'Rotor bearing temp (°C)', 'Drive train acceleration (mm/ss)',
       'Gear oil temperature (°C)', 'Gear oil inlet temperature (°C)',
       'Gear oil pump pressure (bar)', 'Motor temperature (°C)',
       'Motor current (A)', 'Tower Acceleration X (mm/ss)',
       'Tower Acceleration y (mm/ss)', 'Power (kW)'])
for ind,row in df_clus_stats.iterrows():
    temp=df_penmanshiel_per_week_median.reset_index().copy()
    temp=temp[temp["week-turbine"].isin(list(clus_sol[ind]))].copy()
    df_clus_stats.loc[ind]=temp[df_clus_stats.columns].median()

df_clus_stats.index=df_clus_stats.index.astype(str)

In [None]:
temp=pd.DataFrame(MinMaxScaler().fit_transform(df_clus_stats), columns=df_clus_stats.columns, index=df_clus_stats.index)
temp=temp.round(2)
temp=temp.astype(str)
temp.index="E"+temp.index
temp=temp.iloc[1::]
fig = px.imshow(temp.drop("Power (kW)",axis=1),text_auto=True,labels=dict(y="Elementary mode"))
# fig = px.imshow(df_clus_stats,labels=dict(x="Week", y="Cluster", aspect="auto"))
fig.update_layout(height=800,width=1000,coloraxis_showscale=False)
fig.show()

#### KPI

In [None]:
df_penmanshiel_kpi=df_penmanshiel_filtered[['Datetime','Turbine', 'Wind direction (°)','Wind speed (m/s)','Blade angle (pitch position) (°)',
  'Vane position 1+2 (°)', 'Nacelle position (°)',                                        
 'Generator bearing rear temperature (°C)','Generator bearing front temperature (°C)','Generator RPM (RPM)','Rotor bearing temp (°C)','Drive train acceleration (mm/ss)',
 'Gear oil temperature (°C)','Gear oil inlet temperature (°C)','Gear oil pump pressure (bar)',
 'Motor temperature (°C)','Motor current (A)',
 'Tower Acceleration X (mm/ss)','Tower Acceleration y (mm/ss)',
'Power (kW)','Reactive power (kvar)','Apparent power (kVA)','Potential power default PC (kW)','Potential power reference turbines (kW)', 'Potential power estimated (kW)','Reactive power (kvar)',
'Energy Export (kWh)','Energy Import (kWh)','Energy Theoretical (kWh)','Potential Power Energy Budget (kW)',
'Grid voltage (V)','Grid current (A)','Grid frequency (Hz)',
'Power factor (cosphi)','Capacity factor','Production Factor']].copy()

In [None]:
df_penmanshiel_kpi_per_week_median = df_penmanshiel_kpi.groupby([pd.Grouper(key='Datetime', freq='7D'),'Turbine']).median()

In [None]:
df_penmanshiel_kpi_per_week_median["Datetime"]=df_penmanshiel_kpi_per_week_median.index.get_level_values(0)
df_penmanshiel_kpi_per_week_median["week"]=df_penmanshiel_kpi_per_week_median.index.get_level_values(0)
df_penmanshiel_kpi_per_week_median["week"]=pd.factorize(df_penmanshiel_kpi_per_week_median['week'])[0]
df_penmanshiel_kpi_per_week_median["week"]=df_penmanshiel_kpi_per_week_median["week"]+1

df_penmanshiel_kpi_per_week_median["Turbine"]=df_penmanshiel_kpi_per_week_median.index.get_level_values(1)

df_penmanshiel_kpi_per_week_median["week-turbine"]="W"+df_penmanshiel_kpi_per_week_median["week"].astype(str)+":"+df_penmanshiel_kpi_per_week_median["Turbine"]

In [None]:
df_penmanshiel_kpi_per_week_median.set_index(["week-turbine"],inplace=True)

In [None]:
temp=df_penmanshiel_kpi_per_week_median.reset_index().copy()

In [None]:
df_clus_stats_kpi=pd.DataFrame(index=clus_sol.keys(),columns=['Power (kW)','Apparent power (kVA)','Potential power reference turbines (kW)',
                                                              'Energy Export (kWh)','Energy Import (kWh)','Energy Theoretical (kWh)',
                                                              'Grid voltage (V)','Grid current (A)','Grid frequency (Hz)',
                                                              'Power factor (cosphi)','Capacity factor','Production Factor'])
for ind,row in df_clus_stats_kpi.iterrows():
    temp=df_penmanshiel_kpi_per_week_median.reset_index().copy()
    temp=temp[temp["week-turbine"].isin(list(clus_sol[ind]))].copy()
    df_clus_stats_kpi.loc[ind]=temp[df_clus_stats_kpi.columns].median()

df_clus_stats_kpi.index=df_clus_stats_kpi.index.astype(str)

In [None]:
temp=pd.DataFrame(MinMaxScaler().fit_transform(df_clus_stats_kpi), columns=df_clus_stats_kpi.columns, index=df_clus_stats_kpi.index)
temp=temp.round(2)
temp=temp.astype(str)
fig = px.imshow(temp.dropna(),text_auto=True,labels=dict(y="Cluster"))
# fig = px.imshow(df_clus_stats,labels=dict(x="Week", y="Cluster", aspect="auto"))
fig.update_layout(height=800,width=1000,coloraxis_showscale=False)
fig.show()

### Week wise analysis

In [None]:
week_cluster={}
for w_id in np.arange(1,54,1):
    clst_lst=[]
    for key in clus_sol.keys():
        lst=[element for element in list(clus_sol[key]) if "W"+str(w_id)+":" in element]
        if (len(lst)!=0):
            clst_lst.append(key)
    week_cluster[w_id]=clst_lst


turbine_cluster={}
for turb in df_penmanshiel_per_week_median["Turbine"].unique():
    clst_lst=[]
    for key in clus_sol.keys():
        lst=[element for element in list(clus_sol[key]) if turb in element]
        if (len(lst)!=0):
            clst_lst.append(key)
    turbine_cluster[turb]=clst_lst

In [None]:
df_week_cluster=pd.DataFrame(str(0),index=list(week_cluster.keys()),columns=np.arange(0,len(clus_sol.keys()),1))
for ind,row in df_week_cluster.iterrows():
    df_week_cluster.loc[ind,week_cluster[ind]]=str(1)
df_week_cluster.index=df_week_cluster.index.astype(str)
df_week_cluster.columns=df_week_cluster.columns.astype(str)


df_turbine_cluster=pd.DataFrame(str(0),index=list(turbine_cluster.keys()),columns=np.arange(0,len(clus_sol.keys()),1))
for ind,row in df_turbine_cluster.iterrows():
    df_turbine_cluster.loc[ind,turbine_cluster[ind]]=str(1)
df_turbine_cluster.index=df_turbine_cluster.index.astype(str)
df_turbine_cluster.columns=df_turbine_cluster.columns.astype(str)

In [None]:
fig = px.imshow(df_week_cluster.T,labels=dict(x="Week", y="Cluster", color="Presence",aspect="auto"))
# fig.update_layout(height=700)
fig.show()

fig = px.imshow(df_turbine_cluster.T,labels=dict(x="Turbine", y="Cluster", color="Presence",aspect="auto"))
# fig.update_layout(height=500)
fig.show()

## Detection and evolution analysis of composite modes

### Formal Concept Analysis

In [None]:
df_fca=pd.DataFrame(index=df_penmanshiel_per_week_median.index.astype(str),columns=clus_sol.keys())
for ind,row in df_fca.iterrows():
    for clus in row.index:
        if (ind in list(clus_sol[clus])):
            df_fca.loc[ind,clus]=True
        else:
            df_fca.loc[ind,clus]=False
df_fca.columns=df_fca.columns.astype(str)
df_fca.columns="E"+df_fca.columns

In [None]:
len(df_fca)

In [None]:
df_fca.dropna(inplace=True)

In [None]:
from fcapy.context import FormalContext
K = FormalContext.from_pandas(pd.DataFrame(df_fca))

from fcapy.lattice import ConceptLattice
L = ConceptLattice.from_context(K)

from fcapy.visualizer import LineVizNx
fig, ax = plt.subplots(figsize=(10, 5))
vsl = LineVizNx()
vsl.draw_concept_lattice(L, ax=ax, flg_node_indices=True,flg_new_intent_count_prefix=False)
ax.set_title('')
plt.tight_layout()
plt.savefig("fca.png")
plt.show()

In [None]:
for concept in L:
    print(f'Concept with attributes {concept.intent}: {len(concept.extent)} instances')

In [None]:
df_week_turbine_plot=pd.DataFrame(index=df_week_cluster.index.astype(int),columns=df_turbine_cluster.index)
unique_combination=[]
for w_id in np.arange(1,54,1):
    for t_id in df_turbine_cluster.index:
        week_turb= "W"+str(w_id)+":"+t_id 
        comb = [key for key, values in clus_sol.items() if week_turb in values]
        df_week_turbine_plot.loc[w_id,t_id]=comb
        if comb not in unique_combination:
            unique_combination.append(comb)

In [None]:
unique_dict=dict(zip(np.arange(100,100+len(unique_combination)+1,1), unique_combination))
unique_dict={tuple(value): key for key, value in unique_dict.items()}
unique_dict

In [None]:
df_week_turbine_plot_id=df_week_turbine_plot.copy()
for ind,row in df_week_turbine_plot_id.iterrows():
    for col in df_week_turbine_plot_id.columns:
        df_week_turbine_plot_id.loc[ind,col]=unique_dict[tuple(df_week_turbine_plot_id.loc[ind,col])]

In [None]:
df_week_turbine_plot_id.index=df_week_turbine_plot_id.index.astype(str)
df_week_turbine_plot_id=df_week_turbine_plot_id.astype(int).astype(str)
fig = px.imshow(df_week_turbine_plot_id.T,labels=dict(x="Week", y="Turbine"))
fig.update_layout(height=500)
fig.show()
unique_dict

In [None]:
# df_week_turbine_plot_id.replace("100",3,inplace=True)
# df_week_turbine_plot_id.replace("101",1,inplace=True)
# df_week_turbine_plot_id.replace("102",6,inplace=True)
# df_week_turbine_plot_id.replace("103",5,inplace=True)
# df_week_turbine_plot_id.replace("104",4,inplace=True)
# df_week_turbine_plot_id.replace("105",8,inplace=True)
# df_week_turbine_plot_id.replace("106",7,inplace=True)
# df_week_turbine_plot_id.replace("107",2,inplace=True)

In [None]:
# fig = px.imshow(df_week_turbine_plot_id.T,labels=dict(x="Week", y="Turbine"),text_auto=True)
# fig.update_layout(height=500)
# fig.show()
# unique_dict

In [None]:
df_fca_temp=df_fca.copy()
df_fca_temp['Cluster_Count'] = df_fca_temp.sum(axis=1)

# Prepare data for plotting
cluster_data = []
for cluster in df_fca_temp.columns[:-1]:  # exclude 'Cluster_Count'
    cluster_members = df_fca_temp[df_fca[cluster]]
    for count in cluster_members['Cluster_Count'].unique():
        cluster_data.append({
            'Cluster': cluster,
            'Datapoints': (cluster_members['Cluster_Count'] == count).sum(),
            'Cluster Membership Count': count
        })

plot_data = pd.DataFrame(cluster_data)
plot_data["Cluster Membership Count"]=plot_data["Cluster Membership Count"].astype(str)
plot_data["Cluster"]="E"+plot_data["Cluster"]
# Create the plot
fig = px.bar(plot_data, y='Cluster', x='Datapoints', 
             color='Cluster Membership Count', orientation='h',
             labels={'Datapoints': 'Number of Datapoints', 'Cluster Membership Count': 'Number of elementary mode a datapoint belongs to'},
             color_continuous_scale=px.colors.sequential.Viridis,text='Datapoints',text_auto=True)
fig.update_layout(xaxis_title='Number of Datapoints', yaxis_title='Elementary mode',legend=dict(x=0.5, y=0.9))
fig.update_traces(texttemplate='%{text}',textposition='inside')
fig.show()

In [None]:
temp=df_penmanshiel_per_week_median
temp["cluster"]=None
for ind,row in temp.iterrows():
    wk=ind.split(":")[0].split("W")[1]
    tb=ind.split(":")[1]
    temp.loc[ind,"cluster"]=df_week_turbine_plot_id.loc[wk,tb]
temp.reset_index(inplace=True)

##### Adjusted mutual information score

In [None]:
from sklearn.metrics import adjusted_mutual_info_score,adjusted_rand_score
l_id=[1,2,3,4,5,6,7]
l_name=["External Wind","Wind positioning(internal)","Drive train","Gear","Motor","Tower","Hypercluster"]
layer_data_new=[Layer1,Layer2,Layer3,Layer4,Layer5,Layer6,temp[["cluster","week-turbine"]]]
df_ami=pd.DataFrame(index=l_name,columns=l_name)
for id,name in enumerate(l_name):
    for subid,subname in enumerate(l_name):
        common_points=np.intersect1d(layer_data_new[id-1]["week-turbine"].values,layer_data_new[subid-1]["week-turbine"].values)
        df_ami.loc[name,subname]=np.round(adjusted_mutual_info_score(layer_data_new[id-1][layer_data_new[id-1]["week-turbine"].isin(common_points)]["cluster"],layer_data_new[subid-1][layer_data_new[subid-1]["week-turbine"].isin(common_points)]["cluster"]),2)
        
fig=px.imshow(df_ami,text_auto=True)
fig.show()