In [112]:
import pandas as pd
from datetime import datetime
import pytz

In [113]:
import anvil.server

anvil.server.connect("client_KYOM4YFDIE4TMEO3UAOTEPRC-55C7JZ62MGB2UXA6")

In [114]:
from anvil.tables import app_tables

answers = app_tables.answers.search()

In [115]:
answers_list = [
    {
        "created_at": r["created_at"],
        "question_data": r["question"],
        "question_txt": r["question"]["question"],
        "question_title": r["question"]["title"],
        "question_type": r["question"]["type"],
        "question_level": r["question"]["level"],
        "got_it_right": r["got_it_right"],
        "session": r["session"],
        "user": r["user"]["email"] if r["user"] else "",
    }
    for r in answers
]

answers_list

[{'created_at': datetime.datetime(2024, 8, 15, 16, 44, 24, 571000, tzinfo=<anvil.tz.tzoffset (-3.0 hour offset)>),
  'question_data': <LiveObject: anvil.tables.Row>,
  'question_txt': 'Entropy is a measure of the average uncertainty in a set of outcomes, while cross-entropy measures the difference between two probability distributions.',
  'question_title': 'Probability - LLMs',
  'question_type': 'true_or_false',
  'question_level': 'easy',
  'got_it_right': False,
  'session': '10071df2-2c28-4c47-896d-61ac89d3883a',
  'user': 'teste@teste.com'},
 {'created_at': datetime.datetime(2024, 8, 15, 16, 44, 29, 268000, tzinfo=<anvil.tz.tzoffset (-3.0 hour offset)>),
  'question_data': <LiveObject: anvil.tables.Row>,
  'question_txt': 'Cross-entropy loss measures the difference between predicted probabilities and actual outcomes, while entropy quantifies the uncertainty in a probability distribution.',
  'question_title': 'Probability - LLMs',
  'question_type': 'true_or_false',
  'question_l

In [116]:
len(answers_list)

74

In [117]:
aq = {}
total_wrong = 0
total_right = 0

for ans in answers_list:
    key = ans["question_txt"]
    got_it_right = ans["got_it_right"]

    if key not in aq:
        aq[key] = {
            "question_data": ans["question_data"],
            "last_answered": ans["created_at"],
            "wrong": 0,
            "right": 1,
        }
    
    aq[key]["last_answered"] = max(ans["created_at"], aq[key]["last_answered"])

    if got_it_right:
        aq[key]["right"] += 1
        total_wrong += 1
    else:
        aq[key]["wrong"] += 1
        total_right += 1



In [118]:
now_aware = datetime.now(pytz.UTC)
total_time = 0
for key in aq:
    aq[key]["wrong_normalized"] = aq[key]["wrong"] / total_wrong
    aq[key]["right_normalized"] = aq[key]["right"] / total_right
    aq[key]["time_diff"] = int( (now_aware - aq[key]["last_answered"]).total_seconds() )
    total_time += aq[key]["time_diff"]

In [119]:
for key in aq:
    aq[key]["time_diff"] = aq[key]["time_diff"] / total_time

In [120]:
def score(row):
    weights = {"wrong_normalized": 1, "time_diff": 1}

    return (
        row["wrong_normalized"] * weights["wrong_normalized"]
        + row["time_diff"] * weights["time_diff"]
    )

In [121]:
for key in aq:
    aq[key]["score"] = score(aq[key])

In [122]:
aq_list = [(v["question_data"], v["score"], k) for k, v in aq.items()]
aq_list

[(<LiveObject: anvil.tables.Row>,
  0.11118388538344105,
  'Entropy is a measure of the average uncertainty in a set of outcomes, while cross-entropy measures the difference between two probability distributions.'),
 (<LiveObject: anvil.tables.Row>,
  0.07943757018892975,
  'Cross-entropy loss measures the difference between predicted probabilities and actual outcomes, while entropy quantifies the uncertainty in a probability distribution.'),
 (<LiveObject: anvil.tables.Row>,
  0.07944144398481699,
  'Entropy is a measure of uncertainty in a probability distribution, while cross-entropy measures the difference between two probability distributions.'),
 (<LiveObject: anvil.tables.Row>,
  0.07944116053633743,
  'Cross-entropy loss is the same as perplexity, used to evaluate language models.'),
 (<LiveObject: anvil.tables.Row>,
  0.0793533859905022,
  'A linear transformation preserves linear combinations of vectors, but not their magnitudes.'),
 (<LiveObject: anvil.tables.Row>,
  0.09522

In [123]:
questions_ranking = sorted( aq_list, key=lambda row: row[1], reverse=True )
questions_ranking

[(<LiveObject: anvil.tables.Row>,
  0.11118388538344105,
  'Entropy is a measure of the average uncertainty in a set of outcomes, while cross-entropy measures the difference between two probability distributions.'),
 (<LiveObject: anvil.tables.Row>,
  0.09522403979285513,
  'Cross-Entropy is always less than or equal to Entropy for a given probability distribution.'),
 (<LiveObject: anvil.tables.Row>,
  0.07944144398481699,
  'Entropy is a measure of uncertainty in a probability distribution, while cross-entropy measures the difference between two probability distributions.'),
 (<LiveObject: anvil.tables.Row>,
  0.07944116053633743,
  'Cross-entropy loss is the same as perplexity, used to evaluate language models.'),
 (<LiveObject: anvil.tables.Row>,
  0.07943757018892975,
  'Cross-entropy loss measures the difference between predicted probabilities and actual outcomes, while entropy quantifies the uncertainty in a probability distribution.'),
 (<LiveObject: anvil.tables.Row>,
  0.0793

In [124]:
questions_ranking_ = [ q[0] for q in questions_ranking]
questions_ranking_

[<LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObject: anvil.tables.Row>,
 <LiveObje

In [125]:
aq_list_2 = [(v["score"], v["wrong_normalized"], v["time_diff"] , k) for k, v in aq.items()]
questions_ranking_2 = sorted( aq_list_2, key=lambda row: row[1], reverse=True )

df = pd.DataFrame(questions_ranking_2)
df.columns = [ "score", "wrong_normalized", "time_diff", "question_txt"]
df.head()


Unnamed: 0,score,wrong_normalized,time_diff,question_txt
0,0.111184,0.031746,0.079438,Entropy is a measure of the average uncertaint...
1,0.045618,0.031746,0.013872,Precision is the ratio of true positives to th...
2,0.095224,0.015873,0.079351,Cross-Entropy is always less than or equal to ...
3,0.029851,0.015873,0.013978,PCA is a supervised learning technique.
4,0.029853,0.015873,0.013979,Regularization is a technique used to encourag...


In [126]:
df

Unnamed: 0,score,wrong_normalized,time_diff,question_txt
0,0.111184,0.031746,0.079438,Entropy is a measure of the average uncertaint...
1,0.045618,0.031746,0.013872,Precision is the ratio of true positives to th...
2,0.095224,0.015873,0.079351,Cross-Entropy is always less than or equal to ...
3,0.029851,0.015873,0.013978,PCA is a supervised learning technique.
4,0.029853,0.015873,0.013979,Regularization is a technique used to encourag...
5,0.029738,0.015873,0.013865,A forged image perturbation of an unauthorized...
6,0.016354,0.015873,0.000481,A matrix and its transpose have the same deter...
7,0.016351,0.015873,0.000478,If a vector \(\mathbf{v}\) can be expressed as...
8,0.016342,0.015873,0.000469,The zero vector cannot be part of a basis for ...
9,0.079438,0.0,0.079438,Cross-entropy loss measures the difference bet...
