## User Statistics Analysis (users.csv)

Analyzing user engagement across CloudBank and ICOR institutions for Spring 2026.

In [30]:
# Calculate totals by filtering the "where" column
# Current semester (spring_2026) is the last column
# Load users.csv file
users_df = pd.read_csv('users.csv')
print("Users CSV loaded successfully")
print(f"Shape: {users_df.shape}")
current_semester_col = users_df.columns[-1]
print(f"\nCurrent semester column: {users_df.columns[-1]}")
print(f"Previous semester column: {users_df.columns[-2]}")
print(f"Year ago semester column: {users_df.columns[-6]}")
# Calculate totals - overall should equal CloudBank + ICOR
cloudbank_total = users_df[users_df['where'] == 'cloudbank'][current_semester_col].sum()
icor_total = users_df[users_df['where'] == 'icor'][current_semester_col].sum()
overall_total = cloudbank_total + icor_total

print("=" * 60)
print(f"SPRING 2026 USER STATISTICS ({current_semester_col})")
print("=" * 60)
print(f"CloudBank Total Users:           {cloudbank_total:,.0f}")
print(f"ICOR Total Users:                {icor_total:,.0f}")
print(f"Overall Total Users:             {overall_total:,.0f}")
print("=" * 60)

# Compare across semesters
# Get the last few semester columns for comparison
fall_2025_col = users_df.columns[-2]
spring_2025_col = users_df.columns[-6]


cloudbank_semesters = {
    'spring_2026': users_df[users_df['where'] == 'cloudbank'][current_semester_col].sum(),
    'fall_2025': users_df[users_df['where'] == 'cloudbank'][fall_2025_col].sum(),
    'spring_2025': users_df[users_df['where'] == 'cloudbank'][spring_2025_col].sum()
}

icor_semesters = {
    'spring_2026': users_df[users_df['where'] == 'icor'][current_semester_col].sum(),
    'fall_2025': users_df[users_df['where'] == 'icor'][fall_2025_col].sum(),
    'spring_2025': users_df[users_df['where'] == 'icor'][spring_2025_col].sum()
}

print("\nSemester Comparison:")
print(f"\nCloudBank Users:")
for semester, count in cloudbank_semesters.items():
    print(f"  {semester}: {count:,.0f}")

print(f"\nICOR Users:")
for semester, count in icor_semesters.items():
    print(f"  {semester}: {count:,.0f}")


Users CSV loaded successfully
Shape: (72, 16)

Current semester column: spring_2026
Previous semester column: fall_2025
Year ago semester column: summer_2024
SPRING 2026 USER STATISTICS (spring_2026)
CloudBank Total Users:           1,273
ICOR Total Users:                1,419
Overall Total Users:             2,692

Semester Comparison:

CloudBank Users:
  spring_2026: 1,273
  fall_2025: 1,389
  spring_2025: 89

ICOR Users:
  spring_2026: 1,419
  fall_2025: 1,098
  spring_2025: 0


In [38]:
# Calculate summary statistics
otter_df = pd.read_csv('otter_standalone_use.csv', skiprows=1)
# Spring 2026 runs from January 1 - June 14, 2026
spring_2026_months = ['2026-01', '2026-02', '2026-03', '2026-04', '2026-05', '2026-06']
spring_2026_data = otter_df[otter_df['Year-Month'].isin(spring_2026_months)]
total_submissions = spring_2026_data[' Number of Users'].sum()
total_notebooks = spring_2026_data[' Number of Notebooks'].sum()
avg_submissions_per_week = spring_2026_data[' Number of Users'].mean()
avg_notebooks_per_week = spring_2026_data[' Number of Notebooks'].mean()

print("=" * 60)
print("SPRING 2026 SUMMARY (January - June 2026)")
print("=" * 60)
print(f"Total Submissions (Unique Users):  {total_submissions:,}")
print(f"Total Notebooks Used:              {total_notebooks:,}")
print(f"Average Submissions per Week:      {avg_submissions_per_week:.1f}")
print(f"Average Notebooks per Week:        {avg_notebooks_per_week:.1f}")
print("=" * 60)

SPRING 2026 SUMMARY (January - June 2026)
Total Submissions (Unique Users):  667
Total Notebooks Used:              3,760
Average Submissions per Week:      83.4
Average Notebooks per Week:        470.0
