# Kahoot Ranking

Este Notebook filtra os vencedores do Kahoot, retornando os primeiros n colocados.

## ▶ Pontuação
### 1. Pontos de pódio
Para cada Kahoot:

🥇1 lugar : 3 pontos

🥈2 lugar : 2 pontos

🥉3 lugar : 1 ponto

### 2. Pontos do Kahoot
O desempate é feito pela pontuação acumulada dos Kahoots. 

In [52]:
import os
import pandas as pd
from unidecode import unidecode
import re

import logging

# Configure logging
logging.basicConfig(level=logging.DEBUG, format='[%(levelname)s] %(message)s')

In [53]:
# Set folder path
folder_path = "2024_2-CONFLITO"

In [54]:
def clean_string(input_string):
    # Use a regular expression to remove spaces and numbers
    cleaned_string = re.sub(r'[\s\d]', '', input_string)

    # Remove accents using unidecode
    no_accents_string = unidecode(cleaned_string)

    # Convert the cleaned string to lowercase
    lowercased_string = no_accents_string.lower()

    return lowercased_string

### Name exception dict 
Define name aliases to substitute (check which names have more than 1 alias)

'alias': 'originalname'

In [55]:
name_alias = {
    'marbinoso': 'mardabi', 
    'bine': 'bidomar', 
    'sabrine': 'bidomar',
    'mardomar': 'johann',
    'quadra': 'johann',
    'quadrado': 'johann',
    'bigxand': 'xandao',
    'shaquilleomeal': 'johann',
    'bobberkurwa': 'johann',
    'paracetamal': 'johann',
    'quadroh': 'johann',
    'vaixco ': 'johann',
    'bagriel': 'johann',
    'mardomar': 'johann',
    'quadrado': 'johann',
    'luaraara': 'luara',
    'kakerlake': 'johann',
    'fmr': 'johann',
    'pirarucu': 'johann',
    'bambu': 'johann',
    'yej!b': 'yej!n',
    'tirisco': 'johann',
    'luuu': 'luara',
    'lua':'luara',
}

In [56]:
# Get file info
file_list = [f for f in os.listdir(folder_path)]

df_dict = {}
for file_name in file_list:
    file_path = os.path.join(folder_path, file_name)
    key = file_name
    df_dict[key] = pd.read_excel(file_path, sheet_name='Final Scores', usecols="A:C", header=2)

# Create dataframe for podium
general_podium = pd.DataFrame()

# Join the files
for key in df_dict:
    podium = df_dict[key].rename(columns={df_dict[key].columns[0]:'Podium'})
    general_podium = pd.concat([general_podium,podium])

# Rename columns
general_podium.rename(columns={'Total Score (points)':'Points'},inplace=True)
# Change data type
general_podium['Podium'] = general_podium['Podium'].astype(int)
general_podium['Points'] = general_podium['Points'].astype(int)

# Clean names
general_podium['Player'] = general_podium['Player'].apply(lambda x: clean_string(x))

# Substitute name aliases
general_podium['Player'] = general_podium['Player'].replace(name_alias)

# Assign Podium points
point_mapping = {1: 3, 2: 2, 3: 1}
general_podium['Podium_Points'] = general_podium['Podium'].map(point_mapping)



## Resolve names

In [57]:
from fuzzywuzzy import process, fuzz

# List of known names
known_names = ['einstein', 'natal', 'natalmatheus', 'bidomar', 'veruska', 'brenda', 'trentin', 'luara', 'marcell', 'angelina', 'babi', 'rws', 'yejin', 'aguilar', 'bombom', 'gabriel', 'mardabi', 'evelyn', 'otavio', 'nath', 'leleo', 'rafa', 'ali', 'xandao', 'amanda', 'machado', 'debruyne', 'jb', 'lucas', 'lilian', 'migueleli', 'maycon', 'matos', 'levis', 'amelia', 'giu', 'kim', 'louis', 'geraldo', 'pallu', 'tefi', 'da', 'tijolinho', 'r&c', 'mari', 'henrique', 'rs', 'eduardo', 'gustavo', 'thobah']

# Mapping dictionary for known variations
mapping = {
    'luaraara': 'luara',
    'babii': 'babi',
    'trentinpaola': 'trentin',
    'yej!n': 'yejin',
    'yej!b': 'yejin',
    'sabrine': 'bidomar'
    # Add more mappings as needed
}

# Threshold for similarity score to consider names completely different
similarity_threshold = 30

# Function to normalize names
def normalize_name(name, known_names, mapping, threshold):
    if name in mapping:
        return mapping[name]
    match = process.extractOne(name, known_names)
    if match and match[1] >= threshold:
        return match[0]
    return 'johann'

# Normalize names
general_podium['Player'] = general_podium['Player'].apply(lambda x: normalize_name(x, known_names, mapping, similarity_threshold))

# Display the DataFrame to verify
logging.debug(f'Unique Players:\n{general_podium['Player'].unique()}')

[DEBUG] Unique Players:
['trentin' 'natalmatheus' 'natal' 'rws' 'gabriel' 'johann' 'luara'
 'maycon' 'rs' 'einstein' 'leleo' 'levis' 'veruska' 'angelina' 'tijolinho'
 'evelyn' 'yejin' 'jb' 'nath' 'bidomar' 'babi' 'matos' 'mardabi' 'kim'
 'louis' 'aguilar' 'pallu' 'amanda' 'machado' 'brenda' 'bombom' 'mari'
 'xandao' 'lucas' 'rafa' 'otavio' 'da' 'gustavo' 'marcell' 'migueleli'
 'amelia' 'tefi' 'thobah' 'ali' 'debruyne' 'lilian' 'giu' 'geraldo' 'r&c'
 'henrique' 'eduardo']


In [58]:
# Create general podium
rank = (general_podium.loc[:,['Player','Podium_Points','Points']]
        .groupby(['Player'])
        .sum()
        .reset_index())

# Index starts at 1
rank.index = rank.index + 1

## Podium Variations

In [59]:
logging.info('Ranking por Podium Points e desempate por Points')
rank.sort_values(['Podium_Points', 'Points'], ascending=[False, False]).head(5).reset_index(drop=True)

[INFO] Ranking por Podium Points e desempate por Points


Unnamed: 0,Player,Podium_Points,Points
0,jb,10.0,26520
1,luara,9.0,31608
2,einstein,7.0,24518
3,natal,5.0,38939
4,bidomar,5.0,33409


In [60]:
# TOP 5 PODIUM PTS AND THEN BY ACC PTS
logging.info('Ranking por top 5 Podium Points e ordenado por Points')
rank.sort_values(['Podium_Points'], ascending=False).head(5).sort_values(['Points'], ascending=False).reset_index(drop=True)

[INFO] Ranking por top 5 Podium Points e ordenado por Points


Unnamed: 0,Player,Podium_Points,Points
0,natal,5.0,38939
1,luara,9.0,31608
2,natalmatheus,5.0,28621
3,jb,10.0,26520
4,einstein,7.0,24518


In [61]:
logging.info('Ranking por Points')
rank.sort_values(['Points'], ascending=False).head(5).reset_index(drop=True)

[INFO] Ranking por Points


Unnamed: 0,Player,Podium_Points,Points
0,natal,5.0,38939
1,bidomar,5.0,33409
2,rws,1.0,32947
3,luara,9.0,31608
4,gabriel,0.0,31417


In [62]:
general_podium.Player.unique()

array(['trentin', 'natalmatheus', 'natal', 'rws', 'gabriel', 'johann',
       'luara', 'maycon', 'rs', 'einstein', 'leleo', 'levis', 'veruska',
       'angelina', 'tijolinho', 'evelyn', 'yejin', 'jb', 'nath',
       'bidomar', 'babi', 'matos', 'mardabi', 'kim', 'louis', 'aguilar',
       'pallu', 'amanda', 'machado', 'brenda', 'bombom', 'mari', 'xandao',
       'lucas', 'rafa', 'otavio', 'da', 'gustavo', 'marcell', 'migueleli',
       'amelia', 'tefi', 'thobah', 'ali', 'debruyne', 'lilian', 'giu',
       'geraldo', 'r&c', 'henrique', 'eduardo'], dtype=object)

In [64]:
# [DEBUG] Assiduidade
general_podium.groupby('Player').count()

Unnamed: 0_level_0,Podium,Points,Podium_Points
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
aguilar,4,4,1
ali,1,1,0
amanda,1,1,0
amelia,1,1,0
angelina,8,8,2
babi,3,3,1
bidomar,9,9,3
bombom,2,2,1
brenda,5,5,2
da,2,2,0
