<a href="https://colab.research.google.com/github/meredith224/Code-Quiz/blob/main/Pandas_Method_Chaining_8_15_25.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

np.random.seed(42)

n = 100

df = pd.DataFrame({
    'student_id': [f"S{i:03d}" for i in range(n)],
    'major': np.random.choice(['Biology', 'Engineering', 'Psychology', 'History'], size=n),
    'GPA': np.round(np.random.normal(3.0, 0.4, size=n), 2).clip(0, 4),
    'credits_completed': np.random.randint(30, 140, size=n),
    'enrolled_full_time': np.random.choice([True, False], size=n, p=[0.7, 0.3])
})

print("✅ Sample of dataset:")
print(df.head())

✅ Sample of dataset:
  student_id       major   GPA  credits_completed  enrolled_full_time
0       S000  Psychology  3.30                119               False
1       S001     History  3.07                 89                True
2       S002     Biology  2.95                 31                True
3       S003  Psychology  2.88                 30                True
4       S004  Psychology  2.41                 77                True


In [None]:
# Objective: Get top 5 full-time Engineering students by GPA
#            who have completed at least 80 credits, and show selected columns only

print("\n🔗 Top 5 full-time Engineering students (credits >= 80) by GPA:")

(
    df
    .query("major == 'Engineering' and enrolled_full_time == True")  # filter
    .loc[lambda d: d['credits_completed'] >= 80]                      # additional filter
    .sort_values('GPA', ascending=False)                              # sort
    .head(5)                                                          # top 5
    .loc[:, ['student_id', 'GPA', 'credits_completed']]               # select columns
    .pipe(print)                                                      # show result
)



🔗 Top 5 full-time Engineering students (credits >= 80) by GPA:
   student_id   GPA  credits_completed
33       S033  3.63                139
95       S095  3.62                122
25       S025  3.54                133
27       S027  3.40                128
65       S065  3.16                 83


In [None]:
print("\n📊 Average GPA by major (full-time students only):")
(
    df
    .query("enrolled_full_time == True")
    .groupby('major')['GPA']
    .mean()
    .sort_values(ascending=False)
    .round(2)
    .pipe(print)
)


In [None]:
print("\n🏅 High performers (GPA >= 3.5) count by major:")
(
    df
    .loc[lambda d: d['GPA'] >= 3.5]
    .groupby('major')
    .size()
    .sort_values(ascending=False)
    .pipe(print)
)

In [None]:
print("\n📈 Percent of full-time students per major:")
(
    df
    .groupby('major')['enrolled_full_time']
    .mean()
    .mul(100)
    .round(1)
    .sort_values(ascending=False)
    .pipe(print)
)


📈 Percent of full-time students per major:
major
Biology        85.0
History        80.0
Engineering    76.9
Psychology     66.7
Name: enrolled_full_time, dtype: float64


In [None]:
print("\n🎯 Average credits completed by GPA brackets:")
df['GPA_bracket'] = pd.cut(df['GPA'], bins=[0, 2.5, 3.0, 3.5, 4.0], labels=['Low', 'Average', 'Good', 'Excellent'])
(
    df
    .groupby('GPA_bracket')['credits_completed']
    .mean()
    .round(1)
    .pipe(print)
)