In [1]:
import polars as pl
import numpy as np  # noqa: F401
import plotly.express as px  # noqa: F401
import plotly.graph_objects as go
import plotly.io as pio
import plotly.offline as po
pio.renderers.default='notebook'
po.init_notebook_mode(connected=True) 

## Read in the data

data source: https://www.kaggle.com/datasets/uciml/student-alcohol-consumption

In [2]:
stu_math = pl.read_csv("student-mat.csv", has_header=True)
stu_por = pl.read_csv('student-por.csv', has_header=True)
print(stu_math.head(5))
print(stu_por.head(5))

shape: (5, 33)
┌────────┬─────┬─────┬─────────┬───┬──────────┬─────┬─────┬─────┐
│ school ┆ sex ┆ age ┆ address ┆ … ┆ absences ┆ G1  ┆ G2  ┆ G3  │
│ ---    ┆ --- ┆ --- ┆ ---     ┆   ┆ ---      ┆ --- ┆ --- ┆ --- │
│ str    ┆ str ┆ i64 ┆ str     ┆   ┆ i64      ┆ i64 ┆ i64 ┆ i64 │
╞════════╪═════╪═════╪═════════╪═══╪══════════╪═════╪═════╪═════╡
│ GP     ┆ F   ┆ 18  ┆ U       ┆ … ┆ 6        ┆ 5   ┆ 6   ┆ 6   │
│ GP     ┆ F   ┆ 17  ┆ U       ┆ … ┆ 4        ┆ 5   ┆ 5   ┆ 6   │
│ GP     ┆ F   ┆ 15  ┆ U       ┆ … ┆ 10       ┆ 7   ┆ 8   ┆ 10  │
│ GP     ┆ F   ┆ 15  ┆ U       ┆ … ┆ 2        ┆ 15  ┆ 14  ┆ 15  │
│ GP     ┆ F   ┆ 16  ┆ U       ┆ … ┆ 4        ┆ 6   ┆ 10  ┆ 10  │
└────────┴─────┴─────┴─────────┴───┴──────────┴─────┴─────┴─────┘
shape: (5, 33)
┌────────┬─────┬─────┬─────────┬───┬──────────┬─────┬─────┬─────┐
│ school ┆ sex ┆ age ┆ address ┆ … ┆ absences ┆ G1  ┆ G2  ┆ G3  │
│ ---    ┆ --- ┆ --- ┆ ---     ┆   ┆ ---      ┆ --- ┆ --- ┆ --- │
│ str    ┆ str ┆ i64 ┆ str     ┆   ┆ i64      

In [3]:
print(stu_math.describe())

shape: (9, 34)
┌────────────┬────────┬──────┬───────────┬───┬──────────┬───────────┬───────────┬──────────┐
│ statistic  ┆ school ┆ sex  ┆ age       ┆ … ┆ absences ┆ G1        ┆ G2        ┆ G3       │
│ ---        ┆ ---    ┆ ---  ┆ ---       ┆   ┆ ---      ┆ ---       ┆ ---       ┆ ---      │
│ str        ┆ str    ┆ str  ┆ f64       ┆   ┆ f64      ┆ f64       ┆ f64       ┆ f64      │
╞════════════╪════════╪══════╪═══════════╪═══╪══════════╪═══════════╪═══════════╪══════════╡
│ count      ┆ 395    ┆ 395  ┆ 395.0     ┆ … ┆ 395.0    ┆ 395.0     ┆ 395.0     ┆ 395.0    │
│ null_count ┆ 0      ┆ 0    ┆ 0.0       ┆ … ┆ 0.0      ┆ 0.0       ┆ 0.0       ┆ 0.0      │
│ mean       ┆ null   ┆ null ┆ 16.696203 ┆ … ┆ 5.708861 ┆ 10.908861 ┆ 10.713924 ┆ 10.41519 │
│ std        ┆ null   ┆ null ┆ 1.276043  ┆ … ┆ 8.003096 ┆ 3.319195  ┆ 3.761505  ┆ 4.581443 │
│ min        ┆ GP     ┆ F    ┆ 15.0      ┆ … ┆ 0.0      ┆ 3.0       ┆ 0.0       ┆ 0.0      │
│ 25%        ┆ null   ┆ null ┆ 16.0      ┆ … ┆ 0.0     

In [4]:
by_sex = stu_math.group_by("sex").agg(
    pl.col("sex").count().alias('count')
)

print(by_sex)

shape: (2, 2)
┌─────┬───────┐
│ sex ┆ count │
│ --- ┆ ---   │
│ str ┆ u32   │
╞═════╪═══════╡
│ M   ┆ 187   │
│ F   ┆ 208   │
└─────┴───────┘


In [5]:
by_school = stu_math.group_by('school').agg(
    pl.col("school").count().alias('count_school')
)

print(by_school)

shape: (2, 2)
┌────────┬──────────────┐
│ school ┆ count_school │
│ ---    ┆ ---          │
│ str    ┆ u32          │
╞════════╪══════════════╡
│ MS     ┆ 46           │
│ GP     ┆ 349          │
└────────┴──────────────┘


In [6]:
fig1 = go.Figure()

fig1.add_trace(go.Bar(
    x = by_school['school'],
    y = by_school['count_school'],
    width = 0.4
))

fig1.update_layout(
    title_text = 'Number of Students by School',
    xaxis_title = 'School',
    yaxis_title = 'Count',
    width = 500,
    height = 500,
    template = 'plotly_white'
)

fig1.show()

In [7]:
fig2 = go.Figure()

fig2.add_trace(go.Bar(
    x = by_sex['sex'],
    y = by_sex['count'],
    width = .4
))

fig2.update_layout(
    title_text = 'Number of Students by Sex',
    xaxis_title = 'Sex',
    yaxis_title = 'Count',
    width = 650,
    height = 650,
    template = 'plotly_dark'
)

fig2.show()

In [None]:
fig_age = go.Figure()

fig_age.add_trace(go.(
    x = stu_math['age'],
    y = stu_math['studytime']
))

fig_age.update_layout(
    width = 650,
    height = 650,
)

fig_age.show()