# 03_Clustering wells to specific patterns

This script allows to show cluster together wells with similar movements patterns

## 1) Import packages

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from adjustText import adjust_text

## 2) Read the ViewPoint File

Change to pd.read_csv in case the file is csv

In [None]:
data = pd.read_excel(r"LOCATIONANDNAMEOFTHEFILE")             ")

## 3) Subset  and aggregate the data with the measurements to cluster

You can choose the grouping approach, now it is setted on mean

In [None]:
sub_data = data[["aname", "inadist","smldist", "lardur", "lardist"]]
aggregated_data = sub_data.groupby('aname').mean() 
aggregated_data.head()

## 4) Standardize the data

In [None]:
X = aggregated_data.values  
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)

## 4) Perform k-means clustering

You can choose the number of clusters

In [None]:
n_clusters = 3  
kmeans = KMeans(n_clusters=n_clusters)
labels = kmeans.fit_predict(X_standardized)

## 5) Plot the results

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(X_standardized[:, 0], X_standardized[:, 1], c=labels, cmap='rainbow')

# Annotate the data points with well names using the adjustText library
texts = []
for i, well_name in enumerate(aggregated_data.index):
    x_pos, y_pos = X_standardized[i, 0], X_standardized[i, 1]
    text = plt.text(x_pos, y_pos, well_name, ha='center', va='bottom')
    texts.append(text)

# Automatically adjust label positions to avoid overlapping
adjust_text(texts, arrowprops=dict(arrowstyle='->', color='black'), force_text=0.2)

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Cluster Visualization with Well Names (Avoiding Overlapping)')
plt.show()