In [3]:
import plotly.express as px
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans

# Create some data
X, _ = make_blobs(n_samples=200, centers=3, n_features=2, random_state=42)

# Apply k-means
kmeans = KMeans(n_clusters=3)
kmeans.fit(X)
labels = kmeans.labels_

# Create a DataFrame with labels and features
import pandas as pd
df = pd.DataFrame(X, columns=['Feature 1', 'Feature 2'])
df['Cluster'] = labels

# Visualize clusters using Plotly Express
fig = px.scatter(df, x='Feature 1', y='Feature 2', color='Cluster')
fig.show()


In [4]:
# Cluster performance 

from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

# Assuming 'X' is your data and 'labels' are the cluster labels from the k-means algorithm

# Silhouette Score
silhouette_avg = silhouette_score(X, labels)
print(f"Silhouette Score: {silhouette_avg}")

# Calinski-Harabasz Index
calinski_harabasz = calinski_harabasz_score(X, labels)
print(f"Calinski-Harabasz Index: {calinski_harabasz}")

# Davies-Bouldin Index
davies_bouldin = davies_bouldin_score(X, labels)
print(f"Davies-Bouldin Index: {davies_bouldin}")


Silhouette Score: 0.8467003894636074
Calinski-Harabasz Index: 3453.183319092555
Davies-Bouldin Index: 0.22002745320431016


In [5]:
import plotly.express as px
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
import pandas as pd

# Create some data with 3 features for 3D visualization
X, _ = make_blobs(n_samples=200, centers=3, n_features=3, random_state=42)

# Apply k-means
kmeans = KMeans(n_clusters=3)
kmeans.fit(X)
labels = kmeans.labels_

# Create a DataFrame with labels and features
df = pd.DataFrame(X, columns=['Feature 1', 'Feature 2', 'Feature 3'])
df['Cluster'] = labels

# Visualize clusters using Plotly Express in 3D
fig = px.scatter_3d(df, x='Feature 1', y='Feature 2', z='Feature 3', color='Cluster')
fig.show()

In [6]:
import plotly.graph_objects as go
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
import pandas as pd

# Create some data with 3 features for 3D visualization
X, _ = make_blobs(n_samples=200, centers=3, n_features=3, random_state=42)

# Apply k-means
kmeans = KMeans(n_clusters=3)
kmeans.fit(X)
labels = kmeans.labels_

# Create a DataFrame with labels and features
df = pd.DataFrame(X, columns=['Feature 1', 'Feature 2', 'Feature 3'])
df['Cluster'] = labels.astype(str)  # Convert cluster labels to strings for discrete color mapping

# Create a 3D scatter plot using Plotly Graph Objects for more customization
fig = go.Figure()

# Adding traces for each cluster
for cluster_id, cluster_df in df.groupby('Cluster'):
    fig.add_trace(
        go.Scatter3d(
            x=cluster_df['Feature 1'],
            y=cluster_df['Feature 2'],
            z=cluster_df['Feature 3'],
            mode='markers',
            marker=dict(
                size=5,
                line=dict(
                    width=0.5,
                    color='DarkSlateGrey'
                )
            ),
            name=f'Cluster {cluster_id}'
        )
    )

# Customizing the layout of the plot
fig.update_layout(
    title="3D Scatter Plot of K-Means Clusters",
    scene=dict(
        xaxis_title='Feature 1',
        yaxis_title='Feature 2',
        zaxis_title='Feature 3'
    ),
    margin=dict(r=0, b=0, l=0, t=30)
)

fig.show()


In [9]:
import plotly.express as px
import pandas as pd
import numpy as np

# Suppose we have a dictionary with three features and add more points including outliers
data_dict = {
    'Feature 1': [5, 10, 15, 20, 25] + list(np.random.normal(20, 5, size=95)) + [100, 105],  # Outliers added
    'Feature 2': [30, 35, 40, 45, 50] + list(np.random.uniform(30, 55, size=95)) + [2, 3],    # Outliers added
    'Feature 3': [55, 60, 65, 70, 75] + list(np.random.normal(60, 5, size=95)) + [120, 130]  # Outliers added
}

# Convert the dictionary to a DataFrame for easier plotting with Plotly
df = pd.DataFrame(data_dict)

# Melting the DataFrame to long format to work easily with Plotly Express
df_melted = df.melt(var_name='Features', value_name='Values')

# Create a box plot
fig = px.box(df_melted, x='Features', y='Values', color='Features', notched=True, points='all')

# Show the plot
fig.show()


In [13]:
import plotly.express as px
import pandas as pd

# Suppose we have a DataFrame with some data
df = pd.DataFrame({
    'x': [1, 2, 3, 4, 5],
    'y': [10, 11, 12, 13, 14],
    'group': ['A', 'B', 'A', 'B', 'A']
})

# Create a scatter plot with increased bubble size
fig = px.scatter(df, x='x', y='y', color='group', title='Scatter Plot Example', size_max=20)

fig.update_traces(marker=dict(size=12))  # Here, `size=12` sets the marker size to 12

# Show the plot
fig.show()


In [14]:
import plotly.express as px

# Example data
stages = ["Visit", "Sign-up", "Selection", "Purchase"]
values_segment_1 = [1000, 600, 400, 300]  # Example values for segment 1
values_segment_2 = [900, 500, 300, 200]   # Example values for segment 2

# Create a DataFrame to hold the data
funnel_data = {
    "Stage": stages * 2,  # Repeat stages for each segment
    "Value": values_segment_1 + values_segment_2,  # Combine values for both segments
    "Segment": ["Segment 1"]*len(stages) + ["Segment 2"]*len(stages)  # Identify the segment
}

df_funnel = pd.DataFrame(funnel_data)

# Create the funnel chart
fig = px.funnel(df_funnel, x='Value', y='Stage', color='Segment')

# Show the plot
fig.show()
