# The Need for The Feed 📱😵‍💫

Helin Yilmaz, Maya Alhashem, Nory Arroyo, Oviya Adhan, Vanessa Navarro

DATASCI 209 Data Visualization

Final Project Exploratory Data Visualization

# Data Import & Cleaning

### Import Libraries

In [62]:
import pandas as pd
import numpy as np
import altair as alt
import os

In [63]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [64]:
# Navigate to the shared project folder where your files actually are
import os
folder_id = "1UHJTiSennuDQQLeSaill-5Gzo3kPcJid"
project_path = f'/content/drive/.shortcut-targets-by-id/{folder_id}/209 Final Project'

### Import Data

In [65]:
# Load the clean data files
mental_df_clean = pd.read_csv(f'{project_path}/mental_health_clean.csv')
social_df_clean = pd.read_csv(f'{project_path}/social_media_clean.csv')

print(f"Mental health data: {mental_df_clean.shape}")
print(f"Social media data: {social_df_clean.shape}")

Mental health data: (24998, 17)
Social media data: (449, 35)


In [66]:
# Clean and prepare data with correct column names
social_df = social_df_clean.copy()
social_df = social_df.rename(columns={
    'sm_validation_seeking_freq': 'validation_seeking',
    'sm_comparison_freq': 'comparison_freq',
    'sm_comparison_feelings': 'comparison_feelings',
    'worried': 'worried',
    'depressed_freq': 'depressed',
    'interest_fluctuation': 'interest_fluctuation',
    'gender': 'gender',
    'age': 'age'
})

# Clean gender categories
def clean_gender(gender):
    if gender == 'Male':
        return 'Male'
    elif gender == 'Female':
        return 'Female'
    else:
        return 'Non-binary/Other'

social_df['gender_clean'] = social_df['gender'].apply(clean_gender)

# Create validation seeking categories
social_df['validation_category'] = social_df['validation_seeking'].map({
    1: 'Never/Rarely', 2: 'Sometimes', 3: 'Often', 4: 'Very Often', 5: 'Always'
})

# Create mental health composite score (higher = worse mental health)
social_df['mental_health_score'] = (social_df['worried'] + social_df['depressed'] + social_df['interest_fluctuation']) / 3

### Data Pre-Processing

In [67]:
# # Custom summary dataframe of mental_df

# # Initialize list to hold extracted information
# social_summary_rows = []

# # Iterate through columns and extract information
# for col in social_df_clean.columns:
#     row = {
#         'Column Name': col,                                 # Name of column
#         'Data Type': social_df_clean[col].dtype,                  # Check data type
#         'NaN Count': social_df_clean[col].isna().sum(),           # Check NaN counts
#         'Unique Values': social_df_clean[col].unique().tolist()   # Check unique values
#     }

#     social_summary_rows.append(row)

# # Convert list to DF and view
# social_summary = pd.DataFrame(social_summary_rows)
# print(f'Shape of Social Media dataframe: {social_df_clean.shape}') # Print shape
# print("\n📱 Social Media Dataset Summary:") # Print custom summary
# social_summary

# Hypothesis & Visualization

### Hypothesis:
Users who frequently seek validation on social media show higher comparison behaviors and report more negative mental health outcomes

In [68]:
# Clean and prepare data with better gender categories
social_df = social_df_clean.copy()

# Create simplified column names (no need for rename since those long question columns don't exist)
social_df['validation_seeking'] = social_df['sm_validation_seeking_freq']
social_df['comparison_freq'] = social_df['sm_comparison_freq']
social_df['comparison_feelings'] = social_df['sm_comparison_feelings']
social_df['depressed'] = social_df['depressed_freq']
# worried and interest_fluctuation already have correct names

# Clean gender categories - combine non-binary options and weird answers
def clean_gender(gender):
    if gender in ['Male']:
        return 'Male'
    elif gender in ['Female']:
        return 'Female'
    else:  # All non-binary, unclear, or other responses
        return 'Non-binary/Other'

social_df['gender_clean'] = social_df['gender'].apply(clean_gender)

# Create validation seeking categories for better analysis
social_df['validation_category'] = social_df['validation_seeking'].map({
    1: 'Never/Rarely', 2: 'Sometimes', 3: 'Often', 4: 'Very Often', 5: 'Always'
})

# Create mental health composite score (higher = worse mental health)
social_df['mental_health_score'] = (social_df['worried'] + social_df['depressed'] + social_df['interest_fluctuation']) / 3

### Visualizations

In [69]:
# Gemstone and Meadow color palettes
gemstone_colors = ['#B8A9D9', '#9B59B6', '#16A085', '#1ABC9C', '#F39C12', '#E67E22', '#95A5A6', '#8E44AD', '#27AE60']
meadow_colors = ['#FF7675', '#FD79A8', '#74B9FF', '#81ECEC', '#A29BFE', '#FDCB6E', '#6C5CE7', '#00B894', '#2D3436']

# Visualization 1: Using Gemstone colors
viz1 = alt.Chart(social_df).mark_bar().encode(
    x=alt.X('validation_category:O',
            title='How Often People Seek Validation',
            sort=['Never/Rarely', 'Sometimes', 'Often', 'Very Often', 'Always']),
    y=alt.Y('mean(comparison_freq):Q',
            title='Average Social Comparison Score (1-5)',
            scale=alt.Scale(domain=[0, 4])),
    color=alt.Color('validation_category:N',
                   scale=alt.Scale(range=gemstone_colors[:5]),
                   legend=alt.Legend(title="Validation Seeking Level")),
    tooltip=['validation_category:N', 'mean(comparison_freq):Q', 'count():Q']
).properties(
    title='People Who Seek More Validation Also Compare Themselves More',
    width=500,
    height=300
)

# Add counts and averages on bars
counts = alt.Chart(social_df).mark_text(
    align='center',
    baseline='bottom',
    fontSize=11,
    dy=-5,
    fontWeight='bold'
).encode(
    x=alt.X('validation_category:O', sort=['Never/Rarely', 'Sometimes', 'Often', 'Very Often', 'Always']),
    y=alt.Y('mean(comparison_freq):Q'),
    text=alt.Text('count():Q', format='d'),
    color=alt.value('black')
)

viz1_final = viz1 + counts

# Visualization 2:
mental_health_trends = alt.Chart(social_df).transform_fold(
    ['worried', 'depressed', 'interest_fluctuation'],
    as_=['mental_health_aspect', 'score']
).mark_line(point=True, strokeWidth=3).encode(
    x=alt.X('validation_seeking:O', title='Validation Seeking Level'),
    y=alt.Y('mean(score):Q', title='Average Score (1-5 scale)', scale=alt.Scale(domain=[2.5, 4.5])),
    color=alt.Color('mental_health_aspect:N',
                   scale=alt.Scale(range=[gemstone_colors[0], gemstone_colors[2], gemstone_colors[4]]),
                   legend=alt.Legend(
                       title="Mental Health Indicators",
                       orient='top-right',
                       symbolLimit=4
                   )),
    tooltip=['validation_seeking:O', 'mental_health_aspect:N', 'mean(score):Q']
).properties(
    title='All Mental Health Indicators Worsen with Validation Seeking',
    width=400,
    height=350
)

# Right: Bar chart
mental_health_overall = alt.Chart(social_df).mark_bar().encode(
    x=alt.X('validation_seeking:O', title='Validation Level'),
    y=alt.Y('mean(mental_health_score):Q',
            title='Overall Mental Health Score (Higher = Worse)',
            scale=alt.Scale(domain=[2.8, 4.2])),
    color=alt.Color('validation_seeking:O',
                   scale=alt.Scale(range=[meadow_colors[2], meadow_colors[3], meadow_colors[5], meadow_colors[0], meadow_colors[1]]),
                   legend=None),  # Add this back!
    tooltip=['validation_seeking:O', 'mean(mental_health_score):Q', 'count():Q']
).properties(
    title='Overall Mental Health Score Gets Progressively Worse',
    width=400,
    height=350
)

viz2_final = alt.hconcat(mental_health_trends, mental_health_overall, spacing=40)

# Visualization 3: Using both Gemstone and Meadow colors
# Left: Depression levels with Gemstone colors
depression_chart = alt.Chart(social_df).mark_bar().encode(
    x=alt.X('validation_seeking:O', title='Validation Seeking Level'),
    y=alt.Y('mean(depressed):Q',
            title='Average Depression Level',
            scale=alt.Scale(domain=[3, 4.5])),
    color=alt.Color('validation_seeking:O',
                   scale=alt.Scale(range=gemstone_colors[:5]),
                   legend=None),
    tooltip=['validation_seeking:O', 'mean(depressed):Q', 'count():Q']
).properties(
    title='Depression Increases with Validation Seeking',
    width=280,
    height=250
)

# Right: How comparisons feel with Meadow colors
comparison_chart = alt.Chart(social_df).mark_bar().encode(
    y=alt.Y('comparison_feel_label:O',
            title='How Social Comparisons Feel',
            sort=['Very Good', 'Good', 'Neutral', 'Bad', 'Very Bad']),
    x=alt.X('count():Q', title='Number of People'),
    color=alt.Color('comparison_feel_label:N',
                   scale=alt.Scale(range=[meadow_colors[7], meadow_colors[5], meadow_colors[3], meadow_colors[1], meadow_colors[0]]),
                   legend=None),
    tooltip=['comparison_feel_label:O', 'count():Q']
).properties(
    title='Most People Feel Bad About Social Comparisons',
    width=280,
    height=250
)

viz3_final = alt.hconcat(depression_chart, comparison_chart, spacing=40)

### Display Visualizations

***Visualization 1:*** Basic Relationship (Initial Exploration)

In [70]:
display(viz1_final)

In [71]:
# Test each chart individually
print("Line chart:")
display(mental_health_trends)

print("Bar chart:")
display(mental_health_overall)

print("Combined:")
display(viz2_final)

Line chart:


Bar chart:


Combined:


Bar heights = average comparison score for each validation group
Shows that people who seek validation "Always" have comparison scores around 3.8/5
People who "Never/Rarely" seek validation only compare at 2.1/5
Clear progression: more validation seeking → higher comparison behavior

***Visualization 2:*** Multi-dimensional Analysis (Refined)

In [72]:
display(viz2_final)

***Visualization 3:*** Correlation Heatmap

In [73]:
display(viz3_final)

In [75]:
json_string = viz2_final.to_json()
with open(f'{project_path}/viz2_final.json', 'w') as f:
    f.write(json_string)

print(f"Visualization saved to: {project_path}/viz2_final.json")

Visualization saved to: /content/drive/.shortcut-targets-by-id/1UHJTiSennuDQQLeSaill-5Gzo3kPcJid/209 Final Project/viz2_final.json
