In [6]:
import pandas as pd

# Create a DataFrame to mimic the Courses table
data = {
    'student': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
    'class': ['Math', 'English', 'Math', 'Biology', 'Math', 'Computer', 'Math', 'Math', 'Math']
}
course_df = pd.DataFrame(data)
print("Original DataFrame:")
print(course_df)

# Group by 'class' and count the number of students in each class
# .groupby('class') groups the data by the 'class' column
# .size() counts the number of occurrences in each group
class_counts = course_df.groupby('class').size().reset_index(name='student_count')

# .reset_index() is used to convert the GroupBy object to a DataFrame and
# the 'name' parameter renames the new column with counts to 'student_count'
print("\nClass counts DataFrame:")
print(class_counts)

# Filter classes with at least 5 students
# This filters the DataFrame to include only the rows where 'student_count' is 5 or more
class_with_min_5_students = class_counts[class_counts['student_count'] >= 5]

print("\nClasses with at least 5 students:")
print(class_with_min_5_students)

# Select only the 'class' column for the final result
# This extracts the 'class' column from the filtered DataFrame
result = class_with_min_5_students[['class']]

print("\nFinal result with classes having at least 5 students:")
print(result)


Original DataFrame:
  student     class
0       A      Math
1       B   English
2       C      Math
3       D   Biology
4       E      Math
5       F  Computer
6       G      Math
7       H      Math
8       I      Math

Class counts DataFrame:
      class  student_count
0   Biology              1
1  Computer              1
2   English              1
3      Math              6

Classes with at least 5 students:
  class  student_count
3  Math              6

Final result with classes having at least 5 students:
  class
3  Math
