# Chapter 7

# 7.3.2. Identifying Patterns and Anomalies

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import zscore
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import seaborn as sns
import matplotlib.pyplot as plt

# Example data
data = pd.read_csv('example_data/data.csv')

# Correlation Analysis
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True)
plt.title('Correlation Analysis')
plt.show()

# Cluster Analysis
kmeans = KMeans(n_clusters=3)
clusters = kmeans.fit_predict(data)
data['Cluster'] = clusters
sns.pairplot(data, hue='Cluster')
plt.title('Cluster analysis')
plt.show()

# Principal Component Analysis (PCA)
pca = PCA(n_components=2)
principal_components = pca.fit_transform(data.drop('Cluster', axis=1))
pca_df = pd.DataFrame(data = principal_components, columns = ['PC1', 'PC2'])
sns.scatterplot(x='PC1', y='PC2', data=pca_df)
plt.title('PCA Result')
plt.show()

# Anomaly Detection using Z-scores
z_scores = np.abs(zscore(data.drop('Cluster', axis=1)))
threshold = 3
outliers = np.where(z_scores > threshold)
outlier_data = data.iloc[outliers[0]]
plt.scatter(data.index, data['PLK1'])
plt.scatter(outlier_data.index, outlier_data['PLK1'], color='r')
plt.title('Anomaly Detection')
plt.show()