The output in looker studio:
[LINK](https://lookerstudio.google.com/embed/reporting/2d94a92e-7881-4adf-bfd7-c32a2c5b77fb/page/axcID)

BiClustering with KMeans

In [18]:
import pandas as pd
import chardet
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import SpectralBiclustering

In [42]:
# Detect the file encoding
with open('testHeatRawData1.csv', 'rb') as file:
    raw_data = file.read()
    encoding = chardet.detect(raw_data)['encoding']

# Read the CSV file with the detected encoding and convert it to a DataFrame
data = pd.read_csv('testHeatRawData1.csv', encoding=encoding, delimiter='\t')

# Prepare the data for biclustering
pivot_data = data.pivot_table(index='Child Id', columns='Name', values='Score').fillna(0)


# Standardize the data
scaler = StandardScaler()
standardized_data = scaler.fit_transform(pivot_data)

# Perform clustering using k-means
kmeans = KMeans(n_clusters=5, random_state=0)
kmeans.fit(standardized_data)
labels = kmeans.labels_

# Add cluster labels to the original pivot data
pivot_data['Cluster'] = labels

pivot_data = pivot_data.astype(int)
pivot_data = pivot_data.reset_index()

# Melt the data into a long format
melted_data = pd.melt(pivot_data, id_vars=['Child Id', 'Cluster'], value_vars=['RIDELA', 'RSDQB', 'RSDQE', 'RSDQH', 'RSDQP'], var_name='Category', value_name='Score')

# Save the clustered data to a CSV file
melted_data.to_csv('clustering_data.csv', index=True)



In [45]:
# Detect the file encoding
with open('testHeatRawData1.csv', 'rb') as file:
    raw_data = file.read()
    encoding = chardet.detect(raw_data)['encoding']

# Read the CSV file with the detected encoding and convert it to a DataFrame
data = pd.read_csv('testHeatRawData1.csv', encoding=encoding, delimiter='\t')

# Prepare the data for biclustering
pivot_data = data.pivot_table(index='Child Id', columns='Name', values='Score').fillna(0)

# Standardize the data
scaler = StandardScaler()
standardized_data = scaler.fit_transform(pivot_data)

# Perform biclustering using SpectralBiclustering
biclustering = SpectralBiclustering(n_clusters=(5,3), random_state=0) # 5 row cluster, 3 column cluster
biclustering.fit(standardized_data)

# Add row and column cluster labels
pivot_data['Row Cluster'] = biclustering.row_labels_
pivot_data.loc['Column Cluster'] = np.append(biclustering.column_labels_, -1)  # Add a dummy value for 'Row Cluster'

# Rearrange the data according to the biclusters
row_order = np.argsort(biclustering.row_labels_)
column_order = np.argsort(np.append(biclustering.column_labels_, -1))  # Include the dummy value for 'Row Cluster'
biclustered_data = pivot_data.iloc[row_order].T.iloc[column_order].T

biclustered_data = pivot_data.astype(int)


# Save the biclustered data to a CSV file
biclustered_data.to_csv('biclustered_data.csv', index=True)
