# Indian Kids Screen Time — Visual Exploration
This notebook reproduces the visual analysis (univariate & bivariate) for the
`Indian_Kids_Screen_Time.csv` dataset. It is structured so it looks like you wrote
and executed it cell-by-cell.

Place the CSV at `/mnt/data/Indian_Kids_Screen_Time.csv` (already provided).


In [None]:
# --- 1. Imports & setup ---
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Optional nicer visuals (no error if not installed)
try:
    import seaborn as sns
    sns.set(style="whitegrid")
except Exception:
    pass

# Optional: wordcloud
try:
    from wordcloud import WordCloud, STOPWORDS
    HAVE_WORDCLOUD = True
except Exception:
    HAVE_WORDCLOUD = False

# Where outputs (figures) will be saved
out_dir = "figures"
os.makedirs(out_dir, exist_ok=True)

print('Libraries loaded. figures will be saved to', out_dir)

In [None]:
# --- 2. Load dataset ---
FILEPATH = "/mnt/data/Indian_Kids_Screen_Time.csv"
df = pd.read_csv(FILEPATH)
df.head()

In [None]:
# Quick info about the dataset
df.info()

In [None]:
# --- 3. Data preparation ---
bins = [5, 10, 15, 20]
labels = ['6-10', '11-15', '16-20']
df['Age_Band'] = pd.cut(df['Age'], bins=bins, labels=labels, right=True)
df['Age_Band'] = pd.Categorical(df['Age_Band'], categories=labels, ordered=True)
df[['Age','Age_Band']].head()

## Univariate analysis

**Distribution of daily screen time**

In [None]:
# Histogram: Distribution of daily screen time
fig, ax = plt.subplots(figsize=(10,6))
ax.hist(df['Avg_Daily_Screen_Time_hr'].dropna(), bins=30, edgecolor='black', alpha=0.95)
ax.set_title('Distribution of Daily Screen Time (hrs)', fontsize=16)
ax.set_xlabel('Daily Screen Time (hrs)')
ax.set_ylabel('Frequency')
plt.show()
fig.savefig(os.path.join(out_dir, 'hist_screen_time.png'))

**Distribution of Age Bands**

In [None]:
fig, ax = plt.subplots(figsize=(8,6))
counts = df['Age_Band'].value_counts().reindex(labels)
counts.plot(kind='bar', ax=ax)
ax.set_title('Distribution of Age Bands')
ax.set_xlabel('Age Band')
ax.set_ylabel('Count')
plt.show()
fig.savefig(os.path.join(out_dir, 'bar_age_bands.png'))

**Primary device usage**

In [None]:
fig, ax = plt.subplots(figsize=(10,6))
dev_counts = df['Primary_Device'].value_counts()
dev_counts.plot(kind='bar', ax=ax)
ax.set_title('Primary Device Usage')
ax.set_xlabel('Device')
ax.set_ylabel('Count')
plt.show()
fig.savefig(os.path.join(out_dir, 'bar_primary_device.png'))

## Bivariate analysis

**Boxplot: Screen time by Gender**

In [None]:
fig, ax = plt.subplots(figsize=(8,6))
df.boxplot(column='Avg_Daily_Screen_Time_hr', by='Gender', ax=ax, grid=False)
plt.suptitle('')
ax.set_title('Screen Time by Gender')
ax.set_xlabel('Gender')
ax.set_ylabel('Daily Screen Time (hrs)')
plt.show()
fig.savefig(os.path.join(out_dir, 'box_gender.png'))

**Boxplot: Screen time by Age Band**

In [None]:
fig, ax = plt.subplots(figsize=(9,6))
df.boxplot(column='Avg_Daily_Screen_Time_hr', by='Age_Band', ax=ax, grid=False)
plt.suptitle('')
ax.set_title('Screen Time by Age Band')
ax.set_xlabel('Age Band')
ax.set_ylabel('Daily Screen Time (hrs)')
plt.show()
fig.savefig(os.path.join(out_dir, 'box_age_band.png'))

**Boxplot: Screen time by Location Type (Urban/Rural)**

In [None]:
fig, ax = plt.subplots(figsize=(8,6))
df.boxplot(column='Avg_Daily_Screen_Time_hr', by='Urban_or_Rural', ax=ax, grid=False)
plt.suptitle('')
ax.set_title('Screen Time by Location Type')
ax.set_xlabel('Urban or Rural')
ax.set_ylabel('Daily Screen Time (hrs)')
plt.show()
fig.savefig(os.path.join(out_dir, 'box_location.png'))

**Line plot: Average screen time by Age Band and Gender**

In [None]:
grouped = df.groupby(['Age_Band','Gender'])['Avg_Daily_Screen_Time_hr'].mean().unstack()
fig, ax = plt.subplots(figsize=(10,6))
grouped.plot(ax=ax, marker='o')
ax.set_title('Average Screen Time by Age Band and Gender')
ax.set_xlabel('Age Band')
ax.set_ylabel('Average Daily Screen Time (hrs)')
ax.legend(title='Gender')
plt.show()
fig.savefig(os.path.join(out_dir, 'line_ageband_gender.png'))

**Overlay: Screen time distribution by Age Band**

In [None]:
fig, ax = plt.subplots(figsize=(10,6))
for band in labels:
    subset = df.loc[df['Age_Band'] == band, 'Avg_Daily_Screen_Time_hr'].dropna()
    if len(subset) > 0:
        ax.hist(subset, bins=25, alpha=0.4, label=str(band), edgecolor='black')
ax.set_title('Screen Time Distribution by Age Band')
ax.set_xlabel('Daily Screen Time (hrs)')
ax.set_ylabel('Frequency')
ax.legend(title='Age Band')
plt.show()
fig.savefig(os.path.join(out_dir, 'hist_by_ageband.png'))

## Topic trends — Health Impacts

We split the `Health_Impacts` column by commas and count occurrences.

In [None]:
impacts_series = df['Health_Impacts'].dropna().astype(str).str.split(',')
impacts_flat = [it.strip() for sub in impacts_series for it in sub if it and it.strip()]
impacts_counts = pd.Series(impacts_flat).value_counts()

# Bar chart
fig, ax = plt.subplots(figsize=(10,6))
top_n = 12
impacts_counts.head(top_n).plot(kind='barh', ax=ax)
ax.set_title(f'Top {top_n} Reported Health Impacts')
ax.set_xlabel('Count')
ax.invert_yaxis()
plt.show()
fig.savefig(os.path.join(out_dir, 'bar_health_impacts.png'))

# Word cloud
if HAVE_WORDCLOUD:
    text = ' '.join(impacts_flat)
    stopwords = set(STOPWORDS)
    wc = WordCloud(width=1200, height=600, background_color='white', stopwords=stopwords, collocations=False).generate(text)
    fig, ax = plt.subplots(figsize=(12,6))
    ax.imshow(wc, interpolation='bilinear')
    ax.axis('off')
    plt.show()
    fig.savefig(os.path.join(out_dir, 'wordcloud_health_impacts.png'))
else:
    print('WordCloud not installed; to generate wordcloud install the package: pip install wordcloud')

In [None]:
# Save aggregate tables for reporting
grouped.reset_index().to_csv(os.path.join(out_dir, 'avg_screen_by_ageband_gender.csv'), index=False)
impacts_counts.to_csv(os.path.join(out_dir, 'health_impacts_counts.csv'))
print('Saved aggregated CSVs in', out_dir)

### Next steps / customization

- Change color schemes or fonts (use seaborn/matplotlib rcParams).
- Export figures to a single PDF or PowerPoint (e.g., using `matplotlib.backends.backend_pdf.PdfPages` or python-pptx).
- If you want the notebook executed here and images embedded, tell me and I will run it and show outputs.