In [17]:
import pandas as pd

import plotly.graph_objects as go

In [18]:
df = pd.read_csv(
    filepath_or_buffer="Math-Students.csv"
)

- school - student's school (binary: 'GP' - Gabriel Pereira or 'MS' - Mousinho da Silveira)
- sex - student's sex (binary: 'F' - female or 'M' - male)
- age - student's age (numeric: from 15 to 22)
- address - student's home address type (binary: 'U' - urban or 'R' - rural)
- famsize - family size (binary: 'LE3' - less or equal to 3 or 'GT3' - greater than 3)
- Pstatus - parent's cohabitation status (binary: 'T' - living together or 'A' - apart)
- Medu - mother's education (numeric: 0 - none, 1 - primary education (4th grade), 2 - 5th to 9th grade, 3 - secondary education or 4 - higher education)
- Fedu - father's education (numeric: 0 - none, 1 - primary education (4th grade), 2 - 5th to 9th grade, 3 - secondary education or 4 - higher education)
- Mjob - mother's job (nominal: 'teacher', 'health' care related, civil 'services' (e.g. administrative or police), 'athome' or 'other')
- Fjob - father's job (nominal: 'teacher', 'health' care related, civil 'services' (e.g., administrative or police), 'at home' or 'other')
- reason - reason to choose this school (nominal: close to 'home', school 'reputation', 'course' preference or 'other')
guardian - student's guardian (nominal: 'mother', 'father' or 'other')
traveltime - home to school travel time (numeric: 1 - 4 hours)
studytime - weekly study time (numeric: 1 - 10 hours)
failures - number of past class failures (numeric: n if 1 or more)
schoolsup - extra educational support (binary: yes or no)
famsup - family educational support (binary: yes or no)
paid - extra paid classes within the course subject (Math or Portuguese) (binary: yes or no)
activities - extra-curricular activities (binary: yes or no)
nursery - attended nursery school (binary: yes or no)
higher - wants to take higher education (binary: yes or no)
internet - Internet access at home (binary: yes or no)
romantic - with a romantic relationship (binary: yes or no)
famrel - quality of family relationships (numeric: from 1 - very bad to 5 - excellent)
freetime - free time after school (numeric: from 1 - very low to 5 - very high)
goout - going out with friends (numeric: from 1 - very low to 5 - very high)
Dalc - workday alcohol consumption (numeric: from 1 - very low to 5 - very high)
Walc - weekend alcohol consumption (numeric: from 1 - very low to 5 - very high)
health - current health status (numeric: from 1 - very bad to 5 - very good)
absences - number of school absences (numeric: from 0 to 93)
These grades are related to the course subject, Math:
G1 - first period grade (numeric: from 0 to 20)
G2 - second-period grade (numeric: from 0 to 20)
G3 - final grade (numeric: from 0 to 20, output target)

In [19]:
school_mapper = {
    'GP': 'Gabriel Pereira',
    'MS': 'Mousinho da Silveira'
}

df["school"] = df["school"].replace(school_mapper)

In [20]:
sex_mapper = {
    'F': 'Female',
    'M': 'Male'
}

df["sex"] = df["sex"].replace(sex_mapper)

In [21]:
(
    df
    .groupby(by="school")
    .agg(
        number_of_students=("school", "count"), 
        female_students=("sex", lambda sex: (sex == "Female").sum()),
        male_students=("sex", lambda sex: (sex == "Male").sum())

    )
)

Unnamed: 0_level_0,number_of_students,female_students,male_students
school,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Gabriel Pereira,349,183,166
Mousinho da Silveira,50,25,25


In [22]:
df

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,Gabriel Pereira,Female,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,Gabriel Pereira,Female,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,Gabriel Pereira,Female,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,Gabriel Pereira,Female,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,Gabriel Pereira,Female,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,Mousinho da Silveira,Male,19,U,LE3,T,1,1,other,at_home,...,3,2,3,3,3,5,5,8,9,9
395,Mousinho da Silveira,Male,18,U,GT3,T,4,4,teacher,services,...,5,3,2,1,2,4,0,8,7,7
396,Mousinho da Silveira,Male,17,U,GT3,T,4,4,teacher,services,...,5,3,2,1,2,4,0,8,7,7
397,Mousinho da Silveira,Male,19,U,GT3,T,4,4,teacher,other,...,5,3,2,1,2,4,0,8,7,7


### Why Did They Choose this University?

In [23]:
df["reason"].value_counts()

reason
course        147
home          109
reputation    107
other          36
Name: count, dtype: int64

In [24]:
(
    df
    .groupby(by="school")
    .agg(
        course=("reason", lambda reason: (reason == "course").sum()), 
        home=("reason", lambda reason: (reason == "home").sum()),
        reputation=("reason", lambda reason: (reason == "reputation").sum()), 
        other=("reason", lambda reason: (reason == "other").sum())
    )
)

Unnamed: 0_level_0,course,home,reputation,other
school,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Gabriel Pereira,124,98,100,27
Mousinho da Silveira,23,11,7,9


In [25]:
df_grouped = (
    df
    .groupby(by="school")
    .agg(
        course=("reason", lambda reason: (reason == "course").sum()), 
        home=("reason", lambda reason: (reason == "home").sum()),
        reputation=("reason", lambda reason: (reason == "reputation").sum()), 
        other=("reason", lambda reason: (reason == "other").sum())
    )
    .reset_index()
)

fig = go.Figure()

fig.add_trace(go.Bar(
    x=df_grouped["school"],
    y=df_grouped["course"],
    name="course"
))

fig.add_trace(go.Bar(
    x=df_grouped["school"],
    y=df_grouped["home"],
    name="home"
))

fig.add_trace(go.Bar(
    x=df_grouped["school"],
    y=df_grouped["reputation"],
    name="reputation"
))

fig.add_trace(go.Bar(
    x=df_grouped["school"],
    y=df_grouped["other"],
    name="other"
))

# Stack bars vertically
fig.update_layout(
    barmode='stack',
    title="Reasons by School",
    xaxis_title="School",
    yaxis_title="Count",
    width=800,
    height=500,
    xaxis_tickangle=-45
)

fig.update_traces(width=0.6)
fig.show()

In [26]:
df

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,Gabriel Pereira,Female,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,Gabriel Pereira,Female,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,Gabriel Pereira,Female,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,Gabriel Pereira,Female,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,Gabriel Pereira,Female,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,Mousinho da Silveira,Male,19,U,LE3,T,1,1,other,at_home,...,3,2,3,3,3,5,5,8,9,9
395,Mousinho da Silveira,Male,18,U,GT3,T,4,4,teacher,services,...,5,3,2,1,2,4,0,8,7,7
396,Mousinho da Silveira,Male,17,U,GT3,T,4,4,teacher,services,...,5,3,2,1,2,4,0,8,7,7
397,Mousinho da Silveira,Male,19,U,GT3,T,4,4,teacher,other,...,5,3,2,1,2,4,0,8,7,7
