In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.tensorboard import SummaryWriter
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [None]:
# read data from file, original: https://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx
df = pd.read_csv(
    './Online_Retail.csv', 
    # nrows=20000
)
df.fillna(-1, inplace=True)

# Create features
df['TotalPrice'] = df['Quantity'] * df['UnitPrice']
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df['Month'] = df['InvoiceDate'].dt.month
df['Day'] = df['InvoiceDate'].dt.day
df['Hour'] = df['InvoiceDate'].dt.hour

# df.StockCode = df.StockCode.astype('category')
# df.StockCode = df.StockCode.cat.codes
df.head()

In [None]:
df.StockCode.value_counts()

In [None]:
df.CustomerID.value_counts()

In [None]:
# Group by CustomerID and Country
grouped_data = df.groupby(['CustomerID', 'Country']).agg({'TotalPrice': 'sum', 'Quantity': 'sum', 'Month': 'nunique', 'Day': 'nunique', 'Hour': 'nunique'}).reset_index()

In [None]:
# Scale data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(
    grouped_data[['TotalPrice', 
                  'Quantity', 
                  'Month', 
                  'Day', 
                  'Hour'
                ]])

In [None]:
scaled_data.shape

In [None]:
# Cluster data
kmeans = KMeans(n_clusters=3)
kmeans.fit(scaled_data)

# Get cluster labels
labels = kmeans.labels_

In [None]:
# Add cluster labels to data
grouped_data['Cluster'] = labels

# Calculate mean values for each cluster
cluster_summary = grouped_data.groupby('Cluster').agg({'TotalPrice': 'mean', 'Quantity': 'mean', 'Month': 'mean', 'Day': 'mean', 'Hour': 'mean'}).reset_index()

# Print cluster summary
print(cluster_summary)

In [None]:
import matplotlib.pyplot as plt

# Plot data
plt.scatter(grouped_data['TotalPrice'], grouped_data['Quantity'], c=grouped_data['Cluster'])
plt.xlabel('Total Price')
plt.ylabel('Quantity')
plt.show()

In [None]:
# Create subplots
fig, axs = plt.subplots(2, 2, figsize=(10, 10))

# Plot data
axs[0, 0].scatter(grouped_data['TotalPrice'], grouped_data['Quantity'], c=grouped_data['Cluster'])
axs[0, 0].set_xlabel('Total Price')
axs[0, 0].set_ylabel('Quantity')

axs[0, 1].scatter(grouped_data['TotalPrice'], grouped_data['Month'], c=grouped_data['Cluster'])
axs[0, 1].set_xlabel('Total Price')
axs[0, 1].set_ylabel('Month')

axs[1, 0].scatter(grouped_data['TotalPrice'], grouped_data['Day'], c=grouped_data['Cluster'])
axs[1, 0].set_xlabel('Total Price')
axs[1, 0].set_ylabel('Day')

axs[1, 1].scatter(grouped_data['TotalPrice'], grouped_data['Hour'], c=grouped_data['Cluster'])
axs[1, 1].set_xlabel('Total Price')
axs[1, 1].set_ylabel('Hour')

plt.show()

In [1]:
# Import modules
import numpy as np
import torch
from sklearn.cluster import KMeans
from torch.utils.tensorboard import SummaryWriter

# Generate some random data
X = np.random.randn(100, 2)

# Create a KMeans instance with 3 clusters
kmeans = KMeans(n_clusters=3)

# Fit the algorithm to the data
kmeans.fit(X)

# Obtain the cluster labels for each data point
labels = kmeans.labels_

# Create a SummaryWriter object that will write data to a log directory
writer = SummaryWriter(log_dir="kmeans")

# Write the cluster labels as scalars to TensorBoard using the writer object
for i in range(len(labels)):
    writer.add_scalar("cluster", labels[i], i)

# Write the high-dimensional data as embeddings to TensorBoard using the writer object
writer.add_embedding(torch.from_numpy(X), metadata=labels, tag="data")

# Close the writer object
writer.close()

  from .autonotebook import tqdm as notebook_tqdm
