In [9]:
import pandas as pd
import os
import altair as alt
import numpy as np
import plotnine as pt
import geopandas as gpd
import plotly

In [10]:
os.chdir('/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data')

In [16]:
# Load enrollment data
df_enr = pd.read_csv('2024/100_enr1.csv')

# Load school profile data (has state, district, rural_urban info)
# Try 100_prof2.csv or the School Basic Profile file
df_profile = pd.read_csv('2024/100_prof1.csv')  # Use actual filename

# Merge on pseudocode (school ID)
df_merged = df_enr.merge(df_profile[['pseudocode', 'state', 'district', 'rural_urban']], 
                          on='pseudocode', 
                          how='left')

Gender Gap by State

In [None]:
# Class columns for boys and girls
boy_cols = ['cpp_b', 'c1_b', 'c2_b', 'c3_b', 'c4_b', 'c5_b', 'c6_b', 
'c7_b', 'c8_b', 'c9_b', 'c10_b', 'c11_b', 'c12_b']
girl_cols = ['cpp_g', 'c1_g', 'c2_g', 'c3_g', 'c4_g', 'c5_g', 'c6_g', 
'c7_g', 'c8_g', 'c9_g', 'c10_g', 'c11_g', 'c12_g']

# Calculate total boys and girls for each row
df_merged['total_boys'] = df_merged[boy_cols].sum(axis=1)
df_merged['total_girls'] = df_merged[girl_cols].sum(axis=1)
df_merged['total_students'] = df_merged['total_boys'] + df_merged['total_girls']

# Convert state names to title case
df_merged['state'] = df_merged['state'].str.title()

# Group by state and sum
state_gender = df_merged.groupby('state').agg({
'total_boys': 'sum',
'total_girls': 'sum'
}).reset_index()

# Calculate gender gap
state_gender['total_enrollment'] = state_gender['total_boys'] + state_gender['total_girls']
state_gender['girls_percentage'] = (state_gender['total_girls'] / state_gender['total_enrollment']) * 100
state_gender['boys_percentage'] = (state_gender['total_boys'] / state_gender['total_enrollment']) * 100

# Gender gap: positive means more girls, negative means more boys
state_gender['gender_gap'] = state_gender['girls_percentage'] - state_gender['boys_percentage']

# Sort by gender gap
state_gender = state_gender.sort_values('gender_gap').reset_index(drop=True)

# Using your existing state_gender dataframe
# Reshape for stacked percentage bars
state_gender_pct = []
for _, row in state_gender.iterrows():
    state = row['state']
    boys_pct = row['boys_percentage']
    girls_pct = row['girls_percentage']
    gender_gap = row['gender_gap']
    
    # Two rows per state: boys % and girls %
    state_gender_pct.append({
        'state': state,
        'Gender': 'Boys',
        'Percentage': boys_pct,
        'gender_gap': gender_gap
    })
    state_gender_pct.append({
        'state': state,
        'Gender': 'Girls',
        'Percentage': girls_pct,
        'gender_gap': gender_gap
    })

df_gender_pct = pd.DataFrame(state_gender_pct)

# Save
df_gender_pct.to_json('/Users/trishapunamiya/tpunamiya.github.io-4/charts/gender_gap_percentage_stacked.json', orient='records', indent=2)
print("✓ Percentage stacked data created")
print(df_gender_pct.head(10))

state_gender.to_json('/Users/trishapunamiya/tpunamiya.github.io-4/charts/gender_gap_by_state.json', orient='records', indent=2)

✓ Percentage stacked data created
        state Gender  Percentage  gender_gap
0     Haryana   Boys   53.736568   -7.473137
1     Haryana  Girls   46.263432   -7.473137
2      Punjab   Boys   53.731404   -7.462807
3      Punjab  Girls   46.268596   -7.462807
4   Rajasthan   Boys   52.941510   -5.883021
5   Rajasthan  Girls   47.058490   -5.883021
6     Gujarat   Boys   52.749229   -5.498459
7     Gujarat  Girls   47.250771   -5.498459
8  Chandigarh   Boys   52.597331   -5.194663
9  Chandigarh  Girls   47.402669   -5.194663


# Bubble Chart - Student/Teacher

In [11]:
# Load teacher data
df_teacher = pd.read_csv('2024/100_tch.csv')

# Merge teacher data with state info on pseudocode
df_teacher_merged = df_teacher.merge(
    df_profile[['pseudocode', 'state']], 
    on='pseudocode', 
    how='left'
)

# Aggregate teachers by state
state_teachers = df_teacher_merged.groupby('state').agg({
    'total_tch': 'sum'
}).reset_index()

# Aggregate schools and students by state
schools_per_state = df_merged.groupby('state').agg({
    'pseudocode': 'nunique',  # Count unique schools
    'total_students': 'sum'
}).reset_index()

# Rename pseudocode to num_schools (DO THIS BEFORE MERGING)
schools_per_state.rename(columns={'pseudocode': 'num_schools'}, inplace=True)

# Calculate average students per school
schools_per_state['avg_students_per_school'] = schools_per_state['total_students'] / schools_per_state['num_schools']

# Merge enrollment and teacher data
state_data = schools_per_state.merge(state_teachers, on='state')

# Calculate student-teacher ratio
state_data['student_teacher_ratio'] = state_data['total_students'] / state_data['total_tch']

# Add region classification
def classify_region(state):
    """Classify Indian states into regions"""
    north = ['JAMMU & KASHMIR', 'HIMACHAL PRADESH', 'PUNJAB', 'CHANDIGARH', 
             'UTTARAKHAND', 'HARYANA', 'DELHI', 'RAJASTHAN']
    east_central = ['UTTAR PRADESH', 'BIHAR', 'JHARKHAND', 'WEST BENGAL', 
                    'ODISHA', 'CHHATTISGARH']
    west = ['MADHYA PRADESH', 'GUJARAT', 'MAHARASHTRA', 'GOA', 
            'DADRA & NAGAR HAVELI AND DAMAN & DIU', 'DAMAN & DIU', 
            'DADRA AND NAGAR HAVELI']
    south = ['ANDHRA PRADESH', 'KARNATAKA', 'KERALA', 'TAMIL NADU', 
             'TELANGANA', 'PUDUCHERRY', 'LAKSHADWEEP', 
             'ANDAMAN & NICOBAR ISLANDS', 'ANDAMAN AND NICOBAR ISLANDS']
    northeast = ['ARUNACHAL PRADESH', 'ASSAM', 'MANIPUR', 'MEGHALAYA', 
                 'MIZORAM', 'NAGALAND', 'SIKKIM', 'TRIPURA']
    
    state_upper = state.upper()
    if state_upper in north:
        return 'North'
    elif state_upper in east_central:
        return 'East/Central'
    elif state_upper in west:
        return 'West'
    elif state_upper in south:
        return 'South'
    elif state_upper in northeast:
        return 'Northeast'
    else:
        return 'Other'

state_data['region'] = state_data['state'].apply(classify_region)

print(state_data[['state', 'num_schools', 'avg_students_per_school', 'student_teacher_ratio', 'region']])

# Save to JSON
#state_data.to_json('student_teacher_bubble.json', orient='records', indent=2)

print("\n✓ Data saved!")
print(f"\nAvg students per school range: {state_data['avg_students_per_school'].min():.0f} to {state_data['avg_students_per_school'].max():.0f}")

                                   state  num_schools  \
0              ANDAMAN & NICOBAR ISLANDS          408   
1                         ANDHRA PRADESH        61316   
2                      ARUNACHAL PRADESH         3208   
3                                  ASSAM        55283   
4                                  BIHAR        94334   
5                             CHANDIGARH          207   
6                           CHHATTISGARH        56802   
7   DADRA & NAGAR HAVELI AND DAMAN & DIU          433   
8                                  DELHI         5556   
9                                    GOA         1479   
10                               GUJARAT        53292   
11                               HARYANA        23494   
12                      HIMACHAL PRADESH        17330   
13                       JAMMU & KASHMIR        24046   
14                             JHARKHAND        44269   
15                             KARNATAKA        74589   
16                             

In [12]:
import pandas as pd
import altair as alt

# Assuming df_merged has enrollment data

# Define class columns
class_levels = ['cpp', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11', 'c12']
class_labels = ['Pre-Primary', 'Class 1', 'Class 2', 'Class 3', 'Class 4', 'Class 5', 
                'Class 6', 'Class 7', 'Class 8', 'Class 9', 'Class 10', 'Class 11', 'Class 12']

# Aggregate by class level
dropout_data = []

for i, class_level in enumerate(class_levels):
    boys_col = f'{class_level}_b'
    girls_col = f'{class_level}_g'
    
    total_boys = df_merged[boys_col].sum()
    total_girls = df_merged[girls_col].sum()
    
    dropout_data.append({
        'Class': class_labels[i],
        'Gender': 'Boys',
        'Enrollment': total_boys,
        'Class_Order': i
    })
    
    dropout_data.append({
        'Class': class_labels[i],
        'Gender': 'Girls',
        'Enrollment': total_girls,
        'Class_Order': i
    })

df_dropout = pd.DataFrame(dropout_data)

# Create the chart with Altair
chart = alt.Chart(df_dropout).mark_line(point=True, strokeWidth=3).encode(
    x=alt.X('Class:N', 
            axis=alt.Axis(title=None, labelAngle=-45),
            sort=class_labels),
    y=alt.Y('Enrollment:Q',
            axis=alt.Axis(title='Number of Students Enrolled', format='~s')),
    color=alt.Color('Gender:N',
                    scale=alt.Scale(domain=['Boys', 'Girls'], 
                                   range=['#5499C7', '#E74C3C']),
                    legend=alt.Legend(title='Gender')),
    tooltip=['Class', 'Gender', alt.Tooltip('Enrollment:Q', format=',')]
).properties(
    width=650,
    height=400,
    title={
        "text": "Student Enrollment Drop-off by Class Level",
        "subtitle": "All-India enrollment from Pre-Primary through Class 12 | Data: UDISE+ India",
        "fontSize": 16,
        "subtitleFontSize": 12
    }
)

# Display the chart
chart.show()

# Save as JSON for Vega-Lite

In [13]:
# List of years to process
years = [2022, 2023]

# Store all data
all_years_data = []

for year in years:
    print(f"Processing {year}...")
    
    # Load enrollment data for this year
    enr_file = f'/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/{year}/100_enr1.csv'
    df_enr = pd.read_csv(enr_file)
    
    # Load profile data for this year to get state info
    profile_file = f'/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/{year}/100_prof1.csv'  # Adjust filename if different
    df_profile = pd.read_csv(profile_file)
    
    # Merge
    df_merged_year = df_enr.merge(df_profile[['pseudocode', 'state']], on='pseudocode', how='left')
    
    # Add year column
    df_merged_year['year'] = year
    
    # Append to list
    all_years_data.append(df_merged_year)
    
    print(f"  ✓ Loaded {len(df_merged_year):,} rows for {year}")

# Combine all years
df_all_years = pd.concat(all_years_data, ignore_index=True)

Processing 2022...
  ✓ Loaded 7,695,344 rows for 2022
Processing 2023...
  ✓ Loaded 8,234,734 rows for 2023


### Loading 2021-2024 Enrolment and School Profile Data

In [14]:
years = [2022, 2023, 2024]
all_years_data = []

for year in years:
    print(f"\n=== Processing {year} ===")
    
    # Load enrollment data for this year
    enr_file = f'/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/{year}/100_enr1.csv'  # Adjust path as needed
    df_enr = pd.read_csv(enr_file)
    print(f"Loaded enrollment: {len(df_enr):,} rows")
    
    # Load profile data for this year
    profile_file = f'/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/{year}/100_prof1.csv'
    df_profile = pd.read_csv(profile_file)
    print(f"Loaded profile: {len(df_profile):,} rows")
    
    # Merge
    df_merged_year = df_enr.merge(df_profile[['pseudocode', 'state']], on='pseudocode', how='left')
    print(f"After merge: {len(df_merged_year):,} rows")
    
    # Add year column
    df_merged_year['year'] = year
    
    # Append to list
    all_years_data.append(df_merged_year)
    print(f"✓ Added {year} to list (list now has {len(all_years_data)} dataframes)")

# 2021 data
profile_2021 = pd.read_csv('/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/2021/nationalProfile_1.csv')
enroll_2021 = pd.read_csv('/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/2021/22-100_enr1/nationalEnrol1.csv')
df_merged_2021 = enroll_2021.merge(profile_2021[['psuedocode', 'state']], on='psuedocode', how='left')
df_merged_2021['year'] = 2021
#all_years_data.append(df_merged_2021)

# Combine all years
df_all = pd.concat(all_years_data, ignore_index=True)

print(f"\n=== FINAL RESULT ===")
print(f"Total rows: {len(df_all):,}")
print(f"Years in combined data: {df_all['year'].unique()}")



=== Processing 2022 ===


ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

In [None]:
# Cleaning of the merged dataset
df_all['state'] = df_all['state'].astype(str).str.strip().str.title()

# changing Tamilnadu to Tamil Nadu
df_all['state'] = df_all['state'].replace('Tamilnadu', 'Tamil Nadu')     


In [None]:
# -------------------------------------------------------
# STEP 1 — Prepare funnel data for all years and states
# -------------------------------------------------------
class_levels = ['c1','c2','c3','c4','c5','c6','c7','c8','c9','c10','c11','c12']
class_labels = ['Class 1','Class 2','Class 3','Class 4','Class 5','Class 6',
                'Class 7','Class 8','Class 9','Class 10','Class 11','Class 12']

records = []

for year in sorted(df_all['year'].unique()):
    df_year = df_all[df_all['year'] == year]
    
    # Aggregate All-India
    for i, class_level in enumerate(class_levels):
        boys_col = f'{class_level}_b'
        girls_col = f'{class_level}_g'
        total_boys = df_year[boys_col].sum()
        total_girls = df_year[girls_col].sum()
        records.append({'Class': class_labels[i], 'Gender': 'Boys', 'Enrollment': -total_boys,
                        'Year': year, 'State': 'All India'})
        records.append({'Class': class_labels[i], 'Gender': 'Girls', 'Enrollment': total_girls,
                        'Year': year, 'State': 'All India'})

    # Aggregate by state
    for state in df_year['state'].unique():
        df_state = df_year[df_year['state'] == state]
        for i, class_level in enumerate(class_levels):
            boys_col = f'{class_level}_b'
            girls_col = f'{class_level}_g'
            total_boys = df_state[boys_col].sum()
            total_girls = df_state[girls_col].sum()
            records.append({'Class': class_labels[i], 'Gender': 'Boys', 'Enrollment': -total_boys,
                            'Year': year, 'State': state})
            records.append({'Class': class_labels[i], 'Gender': 'Girls', 'Enrollment': total_girls,
                            'Year': year, 'State': state})

df_dropout = pd.DataFrame(records)
df_dropout['Class'] = pd.Categorical(df_dropout['Class'], categories=class_labels[::-1], ordered=True)

# -------------------------------------------------------
# STEP 2 — Precompute max enrollment per state (RUN THIS FIRST)
# -------------------------------------------------------
state_max = df_dropout.groupby('State')['Enrollment'].apply(
    lambda x: max(abs(x.min()), abs(x.max()))
).reset_index()
state_max = state_max.rename(columns={'Enrollment': 'max_enrollment'})
df_dropout = df_dropout.merge(state_max, on='State', how='left')

# -------------------------------------------------------
# STEP 3 — Altair interactive funnel chart (MATCHED THEME)
# -------------------------------------------------------

year_param = alt.param(
    name='YearParam',
    value=int(df_dropout['Year'].min()),
    bind=alt.binding_range(
        min=int(df_dropout['Year'].min()),
        max=int(df_dropout['Year'].max()),
        step=1,
        name='Year: '
    )
)

state_param = alt.param(
    name='StateParam',
    value='All India',
    bind=alt.binding_select(
        options=sorted(df_dropout['State'].unique()),
        name='State: '
    )
)

# Create a dummy dataset for scale anchors
scale_anchors = []
for state in df_dropout['State'].unique():
    max_val = df_dropout[df_dropout['State'] == state]['max_enrollment'].iloc[0]
    scale_anchors.append({'State': state, 'anchor_value': -max_val})
    scale_anchors.append({'State': state, 'anchor_value': max_val})

df_anchors = pd.DataFrame(scale_anchors)

# Base layer - invisible points to set scale
base = alt.Chart(df_anchors).transform_filter(
    alt.datum.State == state_param
).mark_point(opacity=0).encode(
    x='anchor_value:Q'
).add_params(state_param)

# Main chart layer
bars = alt.Chart(df_dropout).transform_filter(
    alt.datum.State == state_param
).transform_filter(
    alt.datum.Year == year_param
).mark_bar().encode(
    y=alt.Y('Class:N', 
        sort=class_labels[::-1], 
        title=None,
        axis=alt.Axis(labelPadding=10, labelFontSize=12)
    ),
    x=alt.X('Enrollment:Q',
        axis=alt.Axis(
            title='Number of Students (thousands)',
            labelExpr="abs(datum.value)/1000",
            grid=False,
            tickCount=8,
            labelFontSize=12,
            titleFontSize=12
        )
    ),
    color=alt.Color('Gender:N', 
        scale=alt.Scale(domain=['Boys','Girls'], range=['#77ba99','#ba7798']),
        legend=alt.Legend(
            orient='right',
            titleFontSize=12,
            labelFontSize=12,
            symbolSize=200
        )
    )
).add_params(year_param)

# Add a center line at zero
rule = alt.Chart(pd.DataFrame({'x': [0]})).mark_rule(
    color='gray',
    strokeWidth=1.5,
    strokeDash=[5, 5]
).encode(x='x:Q')

# Layer them together
chart = alt.layer(base, bars, rule).resolve_scale(
    x='shared'
).properties(
    width=500,
    height=350,
    title={
        'text': 'Education Pipeline: Enrollment by Class and Gender',
        'subtitle': 'Note: Since 2021 data is only available for 25 of 37 states, it is excluded from this chart',
        'fontSize': 16,
        'subtitleFontSize': 12,
        'subtitleColor': '#666',
        'anchor': 'start'
    }
).configure_view(
    strokeWidth=1, stroke='gray'
).configure_axis(
    domainWidth=1
)

chart

In [None]:
chart.save('/Users/trishapunamiya/tpunamiya.github.io-4/education_dropout_byStateGender.json')

In [None]:
# Check states with data in 2021
df_2021 = df_dropout[df_dropout['Year'] == 2021]

print(f"Total states in dataset: {df_dropout['State'].nunique()}")
print(f"States with data in 2021: {df_2021['State'].nunique()}")
print(f"\nTotal rows for 2021: {len(df_2021)}")

# See which states have data in 2021
states_2021 = sorted(df_2021['State'].unique())
print(f"\nStates with 2021 data:")
for state in states_2021:
    state_data = df_2021[df_2021['State'] == state]
    total_enrollment = state_data['Enrollment'].abs().sum()
    print(f"  {state}: {len(state_data)} rows, {total_enrollment:,.0f} total enrollment")

# Check which states are missing 2021 data
all_states = set(df_dropout['State'].unique())
states_with_2021 = set(df_2021['State'].unique())
missing_2021 = sorted(all_states - states_with_2021)

if missing_2021:
    print(f"\nStates MISSING 2021 data ({len(missing_2021)}):")
    for state in missing_2021:
        print(f"  {state}")

# Also check what years are available
print(f"\nAvailable years: {sorted(df_dropout['Year'].unique())}")

# print list of states with data in 2024
df_2024 = df_dropout[df_dropout['Year'] == 2024]
states_2024 = sorted(df_2024['State'].unique())

Total states in dataset: 37
States with data in 2021: 25

Total rows for 2021: 600

States with 2021 data:
  All India: 24 rows, 126,765,763 total enrollment
  Andaman & Nicobar Islands: 24 rows, 151,705 total enrollment
  Arunachal Pradesh: 24 rows, 733,159 total enrollment
  Chandigarh: 24 rows, 525,753 total enrollment
  Chhattisgarh: 24 rows, 14,387,235 total enrollment
  Dadra & Nagar Haveli And Daman & Diu: 24 rows, 261,430 total enrollment
  Delhi: 24 rows, 8,829,947 total enrollment
  Goa: 24 rows, 639,529 total enrollment
  Gujarat: 24 rows, 22,608,909 total enrollment
  Haryana: 24 rows, 11,098,194 total enrollment
  Himachal Pradesh: 24 rows, 2,450,679 total enrollment
  Jammu & Kashmir: 24 rows, 6,716,730 total enrollment
  Jharkhand: 24 rows, 17,403,082 total enrollment
  Kerala: 24 rows, 15,472,681 total enrollment
  Ladakh: 24 rows, 142,935 total enrollment
  Lakshadweep: 24 rows, 36,275 total enrollment
  Manipur: 24 rows, 1,461,657 total enrollment
  Meghalaya: 24 rows

In [None]:
# Create a line chart from 2021-2024 showing total enrollment over time
line_data = df_all.groupby(['year']).agg({
    'cpp_b': 'sum', 'cpp_g': 'sum',
    'c1_b': 'sum', 'c1_g': 'sum',
    'c2_b': 'sum', 'c2_g': 'sum',
    'c3_b': 'sum', 'c3_g': 'sum',
    'c4_b': 'sum', 'c4_g': 'sum',
    'c5_b': 'sum', 'c5_g': 'sum',
    'c6_b': 'sum', 'c6_g': 'sum',
    'c7_b': 'sum', 'c7_g': 'sum',
    'c8_b': 'sum', 'c8_g': 'sum',
    'c9_b': 'sum', 'c9_g': 'sum',
    'c10_b': 'sum', 'c10_g': 'sum',
    'c11_b': 'sum', 'c11_g': 'sum',
    'c12_b': 'sum', 'c12_g': 'sum'
}).reset_index()
line_data['total_enrollment'] = line_data.sum(axis=1) - line_data['year']
line_data = line_data[['year', 'total_enrollment']]

## Cholorepth Map

In [None]:
import geopandas as gpd

# Read the shapefile
gdf = gpd.read_file('/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/India_maps-master/Survey-of-India-Index-Maps/StateBoundary/StateBoundary.shp')

# Check current coordinate system
print("Current CRS:", gdf.crs)

# Check ALL columns
print("\nAll columns in the shapefile:")
print(gdf.columns.tolist())

# Look at first row to see what's there
print("\nFirst row:")
print(gdf.head(1))

# CRITICAL: Reproject to WGS84 (lat/long) for web maps
gdf = gdf.to_crs("EPSG:4326")

print("\nNew CRS:", gdf.crs)

# We'll rename the state column once we see what it's called
# For now, just save it
gdf.to_file('/Users/trishapunamiya/tpunamiya.github.io-4/charts/india_states.json', driver='GeoJSON')

print("\n✓ Converted to GeoJSON with WGS84 projection!")

Current CRS: EPSG:3857

All columns in the shapefile:
['state', 'geometry']

First row:
               state                                           geometry
0  ANDAMAN & NICOBAR  MULTIPOLYGON (((10341718.474 1449533.161, 1034...

New CRS: EPSG:4326

✓ Converted to GeoJSON with WGS84 projection!


In [None]:
# Read in school facility data for 2024
df_fac_2024 = pd.read_csv('/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/2024/100_fac.csv')

# merge with profile to get state info
df_profile_2024 = pd.read_csv('/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/2024/100_prof1.csv')
df_profile2_2024 = pd.read_csv('/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/2024/100_prof2.csv')

df_sch_2024 = df_fac_2024.merge(
    df_profile_2024[['pseudocode', 'state']], 
    on='pseudocode', 
    how='left'
)

# checking the merged data
print(f"Merged facility data rows: {len(df_sch_2024)}")
print(f"States in facility data: {df_sch_2024['state'].nunique()}")

Merged facility data rows: 1471473
States in facility data: 36


In [None]:
# Calculate infrastructure index - only count FUNCTIONAL facilities
state_infrastructure = df_sch_2024.groupby('state').agg({
    'electricity_availability': lambda x: (x == 1).sum() / len(x) * 100,  # Only 1 = Yes and functional
    'internet': lambda x: (x == 1).sum() / len(x) * 100,  # Only 1 = Yes
    'library_availability': lambda x: (x == 1).sum() / len(x) * 100,  # Only 1 = Yes
    'total_girls_func_toilet': lambda x: (x > 0).sum() / len(x) * 100,  # Has at least 1 functional toilet
    'pseudocode': 'count'
}).reset_index()

# Rename columns
state_infrastructure.rename(columns={
    'electricity_availability': 'pct_electricity_functional',
    'internet': 'pct_internet',
    'library_availability': 'pct_library',
    'total_girls_func_toilet': 'pct_girls_toilets',
    'pseudocode': 'num_schools'
}, inplace=True)

# Create composite infrastructure index
state_infrastructure['infrastructure_index'] = state_infrastructure[[
    'pct_electricity_functional', 'pct_internet', 'pct_library', 'pct_girls_toilets'
]].mean(axis=1)

# cleaning state names
# Mapping from your current names to GeoJSON names
name_mapping = {
    "ANDAMAN & NICOBAR ISLANDS": "Andaman and Nicobar",
    "ANDHRA PRADESH": "Andhra Pradesh",
    "ARUNACHAL PRADESH": "Arunachal Pradesh",
    "ASSAM": "Assam",
    "BIHAR": "Bihar",
    "CHANDIGARH": "Chandigarh",
    "CHHATTISGARH": "Chhattisgarh",
    "DADRA & NAGAR HAVELI AND DAMAN & DIU": "Dadra and Nagar Haveli and Daman and Diu",
    "DELHI": "Delhi",
    "GOA": "Goa",
    "GUJARAT": "Gujarat",
    "HARYANA": "Haryana",
    "HIMACHAL PRADESH": "Himachal Pradesh",
    "JAMMU & KASHMIR": "Jammu and Kashmir",
    "JHARKHAND": "Jharkhand",
    "KARNATAKA": "Karnataka",
    "KERALA": "Kerala",
    "LADAKH": "Ladakh",
    "LAKSHADWEEP": "Lakshadweep",
    "MADHYA PRADESH": "Madhya Pradesh",
    "MAHARASHTRA": "Maharashtra",
    "MANIPUR": "Manipur",
    "MEGHALAYA": "Meghalaya",
    "MIZORAM": "Mizoram",
    "NAGALAND": "Nagaland",
    "ODISHA": "Orissa",
    "PUDUCHERRY": "Puducherry",
    "PUNJAB": "Punjab",
    "RAJASTHAN": "Rajasthan",
    "SIKKIM": "Sikkim",
    "TAMIL NADU": "Tamil Nadu",
    "TELANGANA": "Telangana",
    "TRIPURA": "Tripura",
    "UTTAR PRADESH": "Uttar Pradesh",
    "UTTARAKHAND": "Uttarakhand",
    "WEST BENGAL": "West Bengal"
}

# Update state names
state_infrastructure['state'] = state_infrastructure['state'].map(name_mapping)


# Sort by infrastructure index
state_infrastructure = state_infrastructure.sort_values('infrastructure_index')

print(state_infrastructure[['state', 'pct_electricity_functional', 'pct_internet', 'pct_library', 'pct_girls_toilets', 'infrastructure_index']])

# Save
state_infrastructure.to_json('/Users/trishapunamiya/tpunamiya.github.io-4/charts/state_infrastructure_index.json', orient='records', indent=2)

                                       state  pct_electricity_functional  \
22                                 Meghalaya                   28.072942   
21                                   Manipur                   63.266181   
2                          Arunachal Pradesh                   62.341282   
32                                   Tripura                   79.951446   
13                         Jammu and Kashmir                   87.189980   
23                                   Mizoram                   80.347257   
35                               West Bengal                   97.100784   
24                                  Nagaland                   78.181818   
17                                    Ladakh                   76.378772   
33                             Uttar Pradesh                   86.031682   
29                                    Sikkim                   97.028112   
19                            Madhya Pradesh                   87.785784   
28          

In [None]:
# Load India GeoJSON
with open('india_states.json', 'r') as f:
    india_geo = json.load(f)

# Normalization function
def normalize_state_name(name):
    name = name.upper().strip()
    replacements = {
        "ANDAMAN & NICOBAR": "ANDAMAN & NICOBAR ISLANDS",
        "DADAR & NAGAR HAVELI": "DADRA & NAGAR HAVELI AND DAMAN & DIU",
        "DAMAN & DIU": "DADRA & NAGAR HAVELI AND DAMAN & DIU"
    }
    return replacements.get(name, name)

# Normalize state names in GeoJSON
for feature in india_geo["features"]:
    original_name = feature["properties"]["state"]
    feature["properties"]["state_norm"] = normalize_state_name(original_name)

# Normalize state names in your data
state_infrastructure["state_norm"] = state_infrastructure["state"].apply(normalize_state_name)

# Save normalized GeoJSON
with open('india_states_normalized.json', 'w') as f:
    json.dump(india_geo, f)

# Save normalized data
state_infrastructure.to_json('/Users/trishapunamiya/tpunamiya.github.io-4/charts/state_infrastructure_normalized.json', orient='records', indent=2)

## trying for multiple years

In [6]:
import pandas as pd
import json

# Function to normalize state names
def normalize_and_map_state(state_name):
    """Convert state name to standardized format"""
    if pd.isna(state_name):
        return None
    
    # Normalize to uppercase for matching
    normalized = str(state_name).strip().upper()
    
    # Mapping dictionary (all uppercase keys)
    mapping = {
        "ANDAMAN & NICOBAR ISLANDS": "Andaman and Nicobar",
        "ANDAMAN AND NICOBAR ISLANDS": "Andaman and Nicobar",
        "ANDHRA PRADESH": "Andhra Pradesh",
        "ARUNACHAL PRADESH": "Arunachal Pradesh",
        "ASSAM": "Assam",
        "BIHAR": "Bihar",
        "CHANDIGARH": "Chandigarh",
        "CHHATTISGARH": "Chhattisgarh",
        "DADRA & NAGAR HAVELI AND DAMAN & DIU": "Dadra and Nagar Haveli and Daman and Diu",
        "DADRA AND NAGAR HAVELI AND DAMAN AND DIU": "Dadra and Nagar Haveli and Daman and Diu",
        "DELHI": "Delhi",
        "GOA": "Goa",
        "GUJARAT": "Gujarat",
        "HARYANA": "Haryana",
        "HIMACHAL PRADESH": "Himachal Pradesh",
        "JAMMU & KASHMIR": "Jammu and Kashmir",
        "JAMMU AND KASHMIR": "Jammu and Kashmir",
        "JHARKHAND": "Jharkhand",
        "KARNATAKA": "Karnataka",
        "KERALA": "Kerala",
        "LADAKH": "Ladakh",
        "LAKSHADWEEP": "Lakshadweep",
        "MADHYA PRADESH": "Madhya Pradesh",
        "MAHARASHTRA": "Maharashtra",
        "MANIPUR": "Manipur",
        "MEGHALAYA": "Meghalaya",
        "MIZORAM": "Mizoram",
        "NAGALAND": "Nagaland",
        "ODISHA": "Orissa",
        "PUDUCHERRY": "Puducherry",
        "PUNJAB": "Punjab",
        "RAJASTHAN": "Rajasthan",
        "SIKKIM": "Sikkim",
        "TAMIL NADU": "Tamil Nadu",
        "TAMILNADU": "Tamil Nadu",  # Handle variant spelling
        "TELANGANA": "Telangana",
        "TRIPURA": "Tripura",
        "UTTAR PRADESH": "Uttar Pradesh",
        "UTTARAKHAND": "Uttarakhand",
        "WEST BENGAL": "West Bengal"
    }
    
    return mapping.get(normalized, None)

def process_year_data(year):
    """Process infrastructure data for a given year"""
    base_path = f'/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/{year}/'
    
    # Read facility and profile data
    df_fac = pd.read_csv(f'{base_path}100_fac.csv')
    df_profile = pd.read_csv(f'{base_path}100_prof1.csv')
    
    # Merge to get state info
    df_sch = df_fac.merge(
        df_profile[['pseudocode', 'state']], 
        on='pseudocode', 
        how='left'
    )
    
    print(f"\nYear {year}:")
    print(f"  Merged rows: {len(df_sch)}")
    print(f"  Unique states: {df_sch['state'].nunique()}")
    print(f"  Sample state names: {df_sch['state'].unique()[:5]}")
    
    # Calculate infrastructure metrics
    state_infrastructure = df_sch.groupby('state').agg({
        'electricity_availability': lambda x: (x == 1).sum() / len(x) * 100,
        'internet': lambda x: (x == 1).sum() / len(x) * 100,
        'library_availability': lambda x: (x == 1).sum() / len(x) * 100,
        'total_girls_func_toilet': lambda x: (x > 0).sum() / len(x) * 100,
        'pseudocode': 'count'
    }).reset_index()
    
    # Store original state names before mapping
    state_infrastructure['original_state'] = state_infrastructure['state']
    
    # Rename columns
    state_infrastructure.rename(columns={
        'electricity_availability': 'pct_electricity_functional',
        'internet': 'pct_internet',
        'library_availability': 'pct_library',
        'total_girls_func_toilet': 'pct_girls_toilets',
        'pseudocode': 'num_schools'
    }, inplace=True)
    
    # Create composite infrastructure index
    state_infrastructure['infrastructure_index'] = state_infrastructure[[
        'pct_electricity_functional', 'pct_internet', 'pct_library', 'pct_girls_toilets'
    ]].mean(axis=1)
    
    # Clean state names using the normalize function
    state_infrastructure['state'] = state_infrastructure['state'].apply(normalize_and_map_state)
    
    # Check for unmapped states
    unmapped = state_infrastructure[state_infrastructure['state'].isna()]
    if len(unmapped) > 0:
        print(f"  WARNING: {len(unmapped)} states not mapped!")
        print(f"  Unmapped state names: {unmapped.index.tolist()}")
        # Show the actual state name from original data
        for idx in unmapped.index:
            original_state = unmapped.loc[idx, 'original_state']
            print(f"    -> Original name: '{original_state}'")
    
    # Remove rows with unmapped states
    state_infrastructure = state_infrastructure.dropna(subset=['state'])
    
    # Drop the original_state column as we don't need it anymore
    state_infrastructure = state_infrastructure.drop(columns=['original_state'])
    
    print(f"  States after mapping: {len(state_infrastructure)}")
    
    # Add year column
    state_infrastructure['year'] = year
    
    return state_infrastructure

# Process all years
all_years_data = []
for year in [2021, 2022, 2023, 2024]:
    try:
        year_data = process_year_data(year)
        all_years_data.append(year_data)
    except FileNotFoundError:
        print(f"Warning: Data files for {year} not found. Skipping...")
    except Exception as e:
        print(f"Error processing {year}: {str(e)}")

# Combine all years
if all_years_data:
    combined_data = pd.concat(all_years_data, ignore_index=True)
    
    # Sort by year and state
    combined_data = combined_data.sort_values(['year', 'state'])
    
    print("\n=== Combined Data Summary ===")
    print(f"Total rows: {len(combined_data)}")
    print(f"Years: {sorted(combined_data['year'].unique())}")
    print(f"States per year: {combined_data.groupby('year')['state'].count()}")
    
    # Display sample data
    print("\nSample data (first state, all years):")
    sample_state = combined_data['state'].iloc[0]
    print(combined_data[combined_data['state'] == sample_state][
        ['year', 'state', 'infrastructure_index', 'pct_electricity_functional', 
         'pct_internet', 'pct_library', 'pct_girls_toilets']
    ])
    
    # Save to JSON
    output_path = '/Users/trishapunamiya/tpunamiya.github.io-4/charts/state_infrastructure_2021_2024.json'
    combined_data.to_json(output_path, orient='records', indent=2)
    print(f"\nData saved to: {output_path}")
    
    # Also create a year-indexed version for easier filtering
    year_indexed = {}
    for year in combined_data['year'].unique():
        year_data = combined_data[combined_data['year'] == year].to_dict('records')
        year_indexed[str(year)] = year_data
    
    output_path_indexed = '/Users/trishapunamiya/tpunamiya.github.io-4/charts/state_infrastructure_by_year.json'
    with open(output_path_indexed, 'w') as f:
        json.dump(year_indexed, f, indent=2)
    print(f"Year-indexed data saved to: {output_path_indexed}")

else:
    print("No data was processed successfully.")


Year 2022:
  Merged rows: 1466109
  Unique states: 36
  Sample state names: ['Uttar Pradesh' 'Bihar' 'Sikkim' 'Arunachal Pradesh' 'Nagaland']
  States after mapping: 36

Year 2023:
  Merged rows: 1471891
  Unique states: 36
  Sample state names: ['JHARKHAND' 'KARNATAKA' 'MADHYA PRADESH' 'BIHAR' 'HARYANA']
  States after mapping: 36

Year 2024:
  Merged rows: 1471473
  Unique states: 36
  Sample state names: ['TAMIL NADU' 'ASSAM' 'KERALA' 'BIHAR' 'RAJASTHAN']
  States after mapping: 36

=== Combined Data Summary ===
Total rows: 108
Years: [2022, 2023, 2024]
States per year: year
2022    36
2023    36
2024    36
Name: state, dtype: int64

Sample data (first state, all years):
    year                state  infrastructure_index  \
0   2022  Andaman and Nicobar             84.963768   
36  2023  Andaman and Nicobar             87.135922   
72  2024  Andaman and Nicobar             88.786765   

    pct_electricity_functional  pct_internet  pct_library  pct_girls_toilets  
0               

## Stacked Area Chart - Rural vs Urban School Type

In [None]:
# ============================================
# CORRECTED DATA LOADING WITH MANAGEMENT
# ============================================

years = [2022, 2023, 2024]
all_years_data = []

for year in years:
    print(f"\n=== Processing {year} ===")
    
    # Load enrollment data for this year
    enr_file = f'/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/{year}/100_enr1.csv'
    df_enr = pd.read_csv(enr_file)
    print(f"Loaded enrollment: {len(df_enr):,} rows")
    
    # Load profile data for this year
    profile_file = f'/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/{year}/100_prof1.csv'
    df_profile = pd.read_csv(profile_file)
    print(f"Loaded profile: {len(df_profile):,} rows")
    
    # Merge with MORE columns from profile (including management)
    df_merged_year = df_enr.merge(
        df_profile[['pseudocode', 'state', 'managment', 'rural_urban']],  # Note: it's 'managment' (typo in schema)
        on='pseudocode', 
        how='left'
    )
    print(f"After merge: {len(df_merged_year):,} rows")
    
    # Add year column
    df_merged_year['year'] = year
    
    # Append to list
    all_years_data.append(df_merged_year)
    print(f"✓ Added {year} to list")

# ============================================
# 2021 DATA (different column names)
# ============================================

profile_2021 = pd.read_csv('/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/2021/nationalProfile_1.csv')
enroll_2021 = pd.read_csv('/Users/trishapunamiya/Desktop/LSE/Data Viz/Project/Raw Data/2021/22-100_enr1/nationalEnrol1.csv')

# Check column names in 2021 data
print("\n=== 2021 Profile Columns ===")
print(profile_2021.columns.tolist())

# Merge 2021 data (note: might use 'managment' or 'management')
df_merged_2021 = enroll_2021.merge(
    profile_2021[['psuedocode', 'state', 'managment', 'rural_urban']],  # or 'management' if different
    on='psuedocode', 
    how='left'
)
df_merged_2021['year'] = 2021

# Standardize column name
df_merged_2021.rename(columns={'psuedocode': 'pseudocode'}, inplace=True)

all_years_data.append(df_merged_2021)

# ============================================
# COMBINE ALL YEARS
# ============================================

df_all = pd.concat(all_years_data, ignore_index=True)

# Standardize the management column name (fix typo)
if 'managment' in df_all.columns:
    df_all.rename(columns={'managment': 'management'}, inplace=True)


=== Processing 2022 ===
Loaded enrollment: 7,695,344 rows
Loaded profile: 1,466,109 rows
After merge: 7,695,344 rows
✓ Added 2022 to list

=== Processing 2023 ===
Loaded enrollment: 8,234,734 rows
Loaded profile: 1,471,891 rows
After merge: 8,234,734 rows
✓ Added 2023 to list

=== Processing 2024 ===
Loaded enrollment: 8,580,816 rows
Loaded profile: 1,471,473 rows
After merge: 8,580,816 rows
✓ Added 2024 to list

=== 2021 Profile Columns ===
['psuedocode', 'state', 'district', 'rural_urban', 'assembly', 'parliamentary', 'lgd_urban_local_body_name', 'lgd_ward_name', 'lgd_vill_name', 'lgd_vill_panchayat_name', 'school_category', 'school_type', 'lowclass', 'highclass', 'managment', 'pincode', 'year_of_establishment', 'year_of_recognition_pr', 'year_of_recognition_up', 'year_of_recognition_sec', 'year_of_recognition_hsec', 'special_school_for_cwsn', 'shift_school', 'resi_school', 'resi_type', 'minority_school', 'medium_instr1', 'medium_of_instr2', 'medium_of_instr3', 'medium_of_instr4', '

In [None]:
# ============================================
# PROCESS DATA (same as before)
# ============================================

# Calculate total enrollment per school
class_columns = ['cpp_b', 'cpp_g', 'c1_b', 'c1_g', 'c2_b', 'c2_g', 
                 'c3_b', 'c3_g', 'c4_b', 'c4_g', 'c5_b', 'c5_g',
                 'c6_b', 'c6_g', 'c7_b', 'c7_g', 'c8_b', 'c8_g',
                 'c9_b', 'c9_g', 'c10_b', 'c10_g', 'c11_b', 'c11_g',
                 'c12_b', 'c12_g']

df_all[class_columns] = df_all[class_columns].fillna(0)
df_all['total_enrollment'] = df_all[class_columns].sum(axis=1)

# Cleaning State Names
df_all['state'] = df_all['state'].astype(str).str.strip().str.title()

# changing Tamilnadu to Tamil Nadu
df_all['state'] = df_all['state'].replace('Tamilnadu', 'Tamil Nadu') 

# Management mapping
management_mapping = {
    1: 'State Government',
    2: 'State Government',
    3: 'State Government',
    6: 'State Government',
    7: 'State Government',
    90: 'State Government',
    91: 'State Government',
    92: 'Central Government',
    93: 'Central Government',
    94: 'Central Government',
    95: 'Central Government',
    96: 'Central Government',
    101: 'Central Government',
    4: 'Government Aided',
    5: 'Private Unaided',
    8: 'Private Unaided',
    97: 'Other',
    98: 'Other',
    99: 'Other',
    89: 'Other',
    102: 'Other'
}

if 'managment' in df_all.columns:
    df_all.rename(columns={'managment': 'management'}, inplace=True)

df_all['school_category'] = df_all['management'].map(management_mapping)

# Map rural_urban
rural_urban_mapping = {
    1: 'Rural',
    2: 'Urban',
    3: 'Not Known'
}

df_all['location_type'] = df_all['rural_urban'].map(rural_urban_mapping)

# Filter
df_all = df_all[
    (df_all['total_enrollment'] > 0) & 
    (df_all['school_category'].notna()) &
    (df_all['location_type'].isin(['Rural', 'Urban']))
]

# ============================================
# AGGREGATE AND CALCULATE PERCENTAGES
# ============================================

# CORRECTED AGGREGATION - Include state!
df_aggregated = df_all.groupby(['year', 'state', 'location_type', 'school_category']).agg({
    'total_enrollment': 'sum'
}).reset_index()

# Calculate percentage within each year, STATE, and location
df_aggregated['percentage'] = df_aggregated.groupby(['year', 'state', 'location_type'])['total_enrollment'].transform(
    lambda x: (x / x.sum()) * 100
)

# Add 'All India' aggregation
df_all_india = df_all.groupby(['year', 'location_type', 'school_category']).agg({
    'total_enrollment': 'sum'
}).reset_index()
df_all_india['state'] = 'All India'
df_all_india['percentage'] = df_all_india.groupby(['year', 'location_type'])['total_enrollment'].transform(
    lambda x: (x / x.sum()) * 100
)

# Combine
df_aggregated = pd.concat([df_aggregated, df_all_india], ignore_index=True)

# Save to json
df_aggregated.to_json('/Users/trishapunamiya/tpunamiya.github.io-4/Project_JSON/rural_urban_school_type.json', orient='records', indent=2)

In [None]:
# ============================================
# CREATE CHART WITH CUSTOMIZATIONS
# ============================================

alt.data_transformers.disable_max_rows()

# Get list of states
state_list = ['All India'] + sorted([s for s in df_aggregated['state'].unique() if s != 'All India'])

# State dropdown
state_dropdown = alt.binding_select(
    options=state_list,
    name='Select State: '
)

state_selection = alt.selection_point(
    fields=['state'],
    bind=state_dropdown,
    value='All India'
)

# Color scale for all 5 categories
color_scale_extended = alt.Scale(
    domain=['State Government', 'Central Government', 'Government Aided', 'Private Unaided', 'Other'],
    range=['#d5adc1', '#25171e', '#82536a', '#77ba99', '#E0E0E0']
)

# Create base chart
base = alt.Chart(df_aggregated).mark_area(
    line=True,
    opacity=0.85,
    interpolate='monotone'
).encode(
    x=alt.X('year:O', 
            title='Year',
            axis=alt.Axis(
                labelAngle=0, 
                labelFontSize=11,
                labelPadding=5,
                domain=True,
                domainWidth=1
            ),
            scale=alt.Scale(padding=0)),
    y=alt.Y('sum(percentage):Q',
            title='Percentage of Total Enrollment (%)',
            axis=alt.Axis(labelFontSize=11),
            stack='zero',
            scale=alt.Scale(domain=[0, 100])),
    color=alt.Color('school_category:N',
                    title='School Type',
                    scale=color_scale_extended,
                    legend=alt.Legend(
                        orient='right',
                        titleFontSize=11, 
                        labelFontSize=10,
                        symbolSize=80,
                        labelLimit=150,
                        columns=1
                    )),
    tooltip=[
        alt.Tooltip('year:O', title='Year'),
        alt.Tooltip('state:N', title='State'),
        alt.Tooltip('location_type:N', title='Location'),
        alt.Tooltip('school_category:N', title='School Type'),
        alt.Tooltip('sum(percentage):Q', title='Percentage', format='.1f'),
        alt.Tooltip('sum(total_enrollment):Q', title='Total Students', format=',')
    ],
    order=alt.Order('school_category:N', sort='ascending')
).transform_filter(
    state_selection
).add_params(
    state_selection
)

# Create two separate charts
chart_rural = base.transform_filter(
    alt.datum.location_type == 'Rural'
).encode(
    y=alt.Y('sum(percentage):Q',
            title='Percentage of Total Enrollment (%)',
            axis=alt.Axis(labelFontSize=11),
            stack='zero',
            scale=alt.Scale(domain=[0, 100]))
).properties(
    width=250,
    height=280,
    title='Rural'
)

chart_urban = base.transform_filter(
    alt.datum.location_type == 'Urban'
).encode(
    y=alt.Y('sum(percentage):Q',
            title='',
            axis=alt.Axis(labelFontSize=11),
            stack='zero',
            scale=alt.Scale(domain=[0, 100]))
).properties(
    width=250,
    height=280,
    title='Urban'
)

# Combine side by side
chart_combined = alt.hconcat(chart_rural, chart_urban).properties(
    title={
        "text": "Enrollment Distribution by School Type: Rural vs Urban (2021-2024)",
        "subtitle": "Select a state from the dropdown",
        "fontSize": 16,
        "subtitleFontSize": 11,
        "anchor": "start"
    }
).configure_view(
    strokeWidth=0,
    continuousHeight=280,
    continuousWidth=250
).configure_axis(
    labelFontSize=11,
    titleFontSize=12,
    gridColor='#e5e7eb',
    domainColor='#333333'
).configure_concat(
    spacing=10
).resolve_scale(
    y='independent'
)

# Get the JSON spec and manually fix height
chart_spec = chart_combined.to_dict()

# Force height in the spec
if 'hconcat' in chart_spec:
    for chart in chart_spec['hconcat']:
        chart['height'] = 280
        chart['width'] = 250

# Recreate chart from modified spec
chart_combined = alt.Chart.from_dict(chart_spec)

# Display
chart_combined

# ============================================
# SAVE AS JSON
# ============================================

# Save as Vega-Lite JSON spec
chart_combined.save('Project_JSON/enrollment_chart.json')
print("\n✅ Chart saved as: enrollment_chart.json")

chart_combined


✅ Chart saved as: enrollment_chart.json
