<a href="https://colab.research.google.com/github/sharath1708/dav/blob/main/DAV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install seaborn if not already installed
# !pip install seaborn

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set visual style
sns.set(style="whitegrid")

# Load Data
df = pd.read_csv('/content/Music_n_Mental_Health_Survey_Results.csv')

# Step 2: Data Inspection
print("Head of dataset:\n", df.head())
print("\nInfo:\n")
df.info()
print("\nMissing values:\n", df.isna().sum())

# Step 3: Data Cleaning
df['Age'] = df['Age'].fillna(np.nanmedian(df['Age']))
df['While working'] = df['While working'].map({'Yes': 1, 'No': 0})
df['Music effects'] = df['Music effects'].fillna('Unknown')
df.drop(columns=['Permissions', 'Timestamp'], inplace=True)

# Frequency mapping
frequency_map = {'Never': 0, 'Rarely': 1, 'Sometimes': 2, 'Very frequently': 3}
for col in df.columns:
    if 'Frequency' in col:
        df[col] = df[col].map(frequency_map)

# Convert to category
for col in ['Instrumentalist', 'Composer', 'Fav genre', 'Primary streaming service']:
    df[col] = df[col].astype('category')

# Step 4: Basic Statistics
print("\nMean age:", np.mean(df['Age']))
print("Median hours/day:", np.median(df['Hours per day']))
print("Std Dev of anxiety:", np.std(df['Anxiety']))

# Groupby Streaming Service
print("\nMental health by streaming service:\n",
      df.groupby('Primary streaming service')[['Anxiety','Depression','Insomnia','OCD']].mean())

# Heavy listeners count
print("\nHeavy listeners count (>8 hrs):", len(df[df['Hours per day'] > 8]))

# Pivot Table
pivot = pd.pivot_table(df, index='Fav genre', columns='Instrumentalist',
                       values='Anxiety', aggfunc='mean')
print("\nAnxiety by genre and instrumentalist:\n", pivot)

# Step 5: Visualizations

# Boxplot of mental health scores
plt.figure(figsize=(10, 5))
sns.boxplot(data=df[['Anxiety', 'Depression', 'Insomnia', 'OCD']])
plt.title("Mental Health Score Distributions")
plt.show()

# Pie chart of music effects
plt.figure(figsize=(8, 8))
df['Music effects'].value_counts().plot.pie(autopct='%1.1f%%')
plt.title("Effect of Music on Mental Health")
plt.ylabel('')
plt.show()

# Scatter: Hours vs Anxiety
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Hours per day', y='Anxiety', hue='Fav genre', data=df)
plt.title("Listening Hours vs Anxiety")
plt.show()

# Heatmap of correlations
plt.figure(figsize=(10, 6))
corr = df[['Hours per day', 'Anxiety', 'Depression', 'Insomnia', 'OCD', 'BPM']].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

# Heatmap: Genre vs Age Group
df['Age group'] = pd.cut(df['Age'], bins=[0, 18, 25, 35, 50, 100],
                         labels=['<18', '18-25', '26-35', '36-50', '50+'])
genre_age = pd.pivot_table(df, index='Age group', columns='Fav genre',
                           values='Age', aggfunc='count', fill_value=0)

plt.figure(figsize=(12, 6))
sns.heatmap(genre_age, annot=True, fmt='d', cmap='YlOrRd')
plt.title("Favorite Genre by Age Group")
plt.show()

# Bar: Working vs Mental Health
df.groupby('While working')[['Anxiety', 'Depression', 'Insomnia', 'OCD']].mean().plot(kind='bar', figsize=(10, 6))
plt.title("Mental Health Scores: Listening While Working vs Not")
plt.xticks([0, 1], ['No', 'Yes'], rotation=0)
plt.ylabel("Average Score")
plt.show()

# Scatter: BPM vs Anxiety
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='BPM', y='Anxiety', hue='Fav genre')
plt.title("BPM vs Anxiety Level")
plt.show()

# Countplot: Streaming Service by Age
plt.figure(figsize=(12, 6))
sns.countplot(data=df, x='Age group', hue='Primary streaming service')
plt.title("Streaming Preferences by Age Group")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

# Step 6: Summary Stats
print("\nDescriptive stats:\n", df.describe())

print("\nMental health by genre:\n",
      df.groupby('Fav genre')[['Anxiety', 'Depression', 'Insomnia', 'OCD']]
      .mean().sort_values('Anxiety', ascending=False))

print("\nListening hours vs anxiety groups:\n",
      df.groupby(pd.cut(df['Anxiety'], [0, 3, 6, 10]))['Hours per day'].mean())



FileNotFoundError: [Errno 2] No such file or directory: '/content/Music_n_Mental_Health_Survey_Results.csv'