# Data Analysis

This notebook contains exploratory data analysis and insights generation from the processed data.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load processed data
data_path = '../data/processed/therapists_data.csv'
df = pd.read_csv(data_path)

# Display the first few rows of the dataset
df.head()

In [None]:
# Summary statistics
df.describe()

In [None]:
# Visualize the distribution of ratings
plt.figure(figsize=(10, 6))
sns.histplot(df['Rating'], bins=20, kde=True)
plt.title('Distribution of Therapist Ratings')
plt.xlabel('Rating')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Analyze the most common specialties
specialties = df['Specialty'].str.get_dummies(sep=', ')
specialty_counts = specialties.sum().sort_values(ascending=False)

# Visualize the top specialties
plt.figure(figsize=(12, 8))
sns.barplot(x=specialty_counts.values[:10], y=specialty_counts.index[:10])
plt.title('Top 10 Specialties')
plt.xlabel('Count')
plt.ylabel('Specialty')
plt.show()

In [None]:
# Correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()