In [None]:
import pandas as pd

# Load the dataset
file_path = 'treadmill_data.csv'
df = pd.read_csv(file_path)

# Display descriptive statistics for numeric columns
descriptive_stats = df.describe().transpose()
descriptive_stats

In [None]:
# Descriptive statistics for non-numeric columns
descriptive_stats_non_numeric = df.describe(include=['object']).transpose()
descriptive_stats_non_numeric

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Set the style for the plots
sns.set(style="darkgrid")
plt.style.use("dark_background")

# Visualization 1: Distribution of Age
plt.figure(figsize=(10, 6))
sns.histplot(df['Age'], bins=15, kde=True, color='skyblue')
plt.title('Distribution of Age')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()

# Visualization 2: Income vs Miles Scatter Plot
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Income', y='Miles', data=df, hue='Gender', palette='bright')
plt.title('Income vs Miles by Gender')
plt.xlabel('Income')
plt.ylabel('Miles')
plt.show()

# Visualization 3: Product Popularity
plt.figure(figsize=(10, 6))
product_counts = df['Product'].value_counts()
sns.barplot(x=product_counts.index, y=product_counts.values, palette='Blues_r')
plt.title('Product Popularity')
plt.xlabel('Product')
plt.ylabel('Count')
plt.show()

In [None]:
# Calculate the correlation matrix

# Set the style for the plots
sns.set(style="darkgrid")
plt.style.use("dark_background")

# Calculate the correlation matrix
correlation_matrix = df.corr()

# Display the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='Blues_r')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# Gender Distribution
plt.figure(figsize=(8, 5))
sns.countplot(x='Gender', data=df, palette='bright')
plt.title('Gender Distribution')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()

In [None]:
# Boxplots for Numeric Variables by Gender

# Boxplot for Age by Gender
plt.figure(figsize=(10, 6))
sns.boxplot(x='Gender', y='Age', data=df, palette='bright')
plt.title('Age Distribution by Gender')
plt.xlabel('Gender')
plt.ylabel('Age')
plt.show()

# Boxplot for Income by Gender
plt.figure(figsize=(10, 6))
sns.boxplot(x='Gender', y='Income', data=df, palette='bright')
plt.title('Income Distribution by Gender')
plt.xlabel('Gender')
plt.ylabel('Income')
plt.show()

# Boxplot for Miles by Gender
plt.figure(figsize=(10, 6))
sns.boxplot(x='Gender', y='Miles', data=df, palette='bright')
plt.title('Miles Distribution by Gender')
plt.xlabel('Gender')
plt.ylabel('Miles')
plt.show()

In [None]:
# Pair Plot of Numeric Variables
sns.pairplot(df, hue='Gender', palette='bright')
plt.suptitle('Pair Plot of Numeric Variables by Gender', y=1.02)
plt.show()

In [None]:
# Marital Status Analysis
plt.figure(figsize=(8, 5))
sns.countplot(x='MaritalStatus', data=df, palette='bright')
plt.title('Marital Status Distribution')
plt.xlabel('Marital Status')
plt.ylabel('Count')
plt.show()

In [None]:
# Income Distribution by Product
plt.figure(figsize=(10, 6))
sns.boxplot(x='Product', y='Income', data=df, palette='Blues_r')
plt.title('Income Distribution by Product')
plt.xlabel('Product')
plt.ylabel('Income')
plt.show()

In [None]:
# Fitness Level Analysis
plt.figure(figsize=(10, 6))
sns.countplot(x='Fitness', data=df, palette='Blues_r')
plt.title('Fitness Level Distribution')
plt.xlabel('Fitness Level')
plt.ylabel('Count')
plt.show()

In [None]:
# Correcting the Cluster Analysis to show Product instead of cluster numbers

# KMeans Clustering
kmeans = KMeans(n_clusters=3, random_state=0)
kmeans.fit(scaled_df)

# Adding cluster labels to the dataframe
df['Cluster'] = kmeans.labels_

# Mapping cluster labels to products
cluster_to_product = {i: v for i, v in enumerate(df.groupby('Cluster')['Product'].apply(lambda x: x.mode()[0]))}
df['ProductCluster'] = df['Cluster'].map(cluster_to_product)

# Visualizing the clusters with Product labels
plt.figure(figsize=(10, 8))
sns.scatterplot(x='Age', y='Income', hue='ProductCluster', data=df, palette='bright')
plt.title('Cluster Analysis - Age vs Income with Product Labels')
plt.xlabel('Age')
plt.ylabel('Income')
plt.show()

In [None]:
# Fitness Level Analysis by Product

plt.figure(figsize=(12, 8))
sns.barplot(x='Product', y='Fitness', data=df, palette='Blues_r', ci=None)
plt.title('Fitness Level by Product')
plt.xlabel('Product')
plt.ylabel('Average Fitness Level')
plt.show()