# Kahoot Ranking

Este Notebook filtra os vencedores do Kahoot, retornando os primeiros n colocados.

## ▶ Pontuação
### 1. Pontos de pódio
Para cada Kahoot:

🥇1 lugar : 3 pontos

🥈2 lugar : 2 pontos

🥉3 lugar : 1 ponto

### 2. Pontos do Kahoot
O desempate é feito pela pontuação acumulada dos Kahoots. 

In [1]:
import os
import pandas as pd
from unidecode import unidecode
import re

In [2]:
# Set folder path
folder_path = "2024_2-CONFLITO"

In [3]:
def clean_string(input_string):
    # Use a regular expression to remove spaces and numbers
    cleaned_string = re.sub(r'[\s\d]', '', input_string)

    # Remove accents using unidecode
    no_accents_string = unidecode(cleaned_string)

    # Convert the cleaned string to lowercase
    lowercased_string = no_accents_string.lower()

    return lowercased_string

In [40]:
# Get file info
file_list = [f for f in os.listdir(folder_path)]

df_dict = {}
for file_name in file_list:
    file_path = os.path.join(folder_path, file_name)
    key = file_name
    df_dict[key] = pd.read_excel(file_path, sheet_name='Final Scores', usecols="A:C", header=2)

# Create dataframe for podium
general_podium = pd.DataFrame()

# Join the files
for key in df_dict:
    podium = df_dict[key].rename(columns={df_dict[key].columns[0]:'Podium'})
    general_podium = pd.concat([general_podium,podium])

# Rename columns
general_podium.rename(columns={'Total Score (points)':'Points'},inplace=True)
# Change data type
general_podium['Podium'] = general_podium['Podium'].astype(int)
general_podium['Points'] = general_podium['Points'].astype(int)

# Clean names
general_podium['Player'] = general_podium['Player'].apply(lambda x: clean_string(x))

# Substitute name aliases
general_podium['Player'] = general_podium['Player'].replace(name_alias)

# Assign Podium points
point_mapping = {1: 3, 2: 2, 3: 1}
general_podium['Podium_Points'] = general_podium['Podium'].map(point_mapping)



## Resolve names

### Name exception dict 
Define name aliases to substitute (check which names have more than 1 alias)

'alias': 'originalname'

In [None]:
name_alias = {
    'marbinoso': 'mardabi', 
    'bine': 'bidomar', 
    'mardomar': 'johann',
    'quadra': 'johann',
    'quadrado': 'johann',
    'bigxand': 'xandao',
    'shaquilleomeal': 'johann',
    'bobberkurwa': 'johann',
    'paracetamal': 'johann',
    'quadroh': 'johann',
    'vaixco ': 'johann',
    'bagriel': 'johann',
    'mardomar': 'johann',
    'quadrado': 'johann',
    'luaraara': 'luara',
    'kakerlake': 'johann',
    'fmr': 'johann',
    'pirarucu': 'johann',
    'bambu': 'johann',
    'yej!b': 'yej!n',
    'tirisco': 'johann',
    'luuu': 'luara',
    'lua':'luara',
}

In [None]:
from fuzzywuzzy import process, fuzz

# List of known names
known_names = ['einstein', 'natal', 'sabrine', 'veruska', 'brenda', 'trentin', 'luara', 'marcell', 'angelina', 'babi', 'rws', 'yejin', 'aguilar', 'bombom', 'gabriel', 'mardabi', 'evelyn', 'otavio', 'nath', 'leleo', 'rafa', 'ali', 'xandao', 'amanda', 'machado', 'debruyne', 'jb', 'lucas', 'lilian', 'migueleli', 'bidomar', 'maycon', 'matos', 'levis', 'amelia', 'giu', 'kim', 'louis', 'geraldo', 'pallu', 'tefi', 'da', 'tijolinho', 'r&c', 'mari', 'henrique', 'rs', 'eduardo', 'gustavo', 'thobah']

# Mapping dictionary for known variations
mapping = {
    'natal': 'natal',
    'natalmatheus': 'natal',
    'luara': 'luara',
    'luaraara': 'luara',
    'babi': 'babi',
    'babii': 'babi',
    'trentin': 'trentin',
    'trentinpaola': 'trentin',
    'yej!n': 'yejin',
    'yej!b': 'yejin',
    # Add more mappings as needed
}

# Threshold for similarity score to consider names completely different
similarity_threshold = 30

# Function to normalize names
def normalize_name(name, known_names, mapping, threshold):
    if name in mapping:
        return mapping[name]
    match = process.extractOne(name, known_names)
    if match and match[1] >= threshold:
        return match[0]
    return 'johann'

# Normalize names
general_podium['Player'] = general_podium['Player'].apply(lambda x: normalize_name(x, known_names, mapping, similarity_threshold))

# Display the DataFrame to verify
print(general_podium['Player'].unique())




['johann' 'machado' 'aguilar' 'ali' 'amanda' 'amelia' 'angelina' 'babi'
 'bidomar' 'bombom' 'brenda' 'da' 'debruyne' 'eduardo' 'einstein' 'evelyn'
 'rafa' 'marcell' 'gabriel' 'geraldo' 'giu' 'gustavo' 'henrique' 'nath'
 'jb' 'kim' 'rs' 'luara' 'leleo' 'levis' 'lilian' 'louis' 'lucas' 'pallu'
 'mari' 'mardabi' 'matos' 'maycon' 'migueleli' 'natal' 'otavio' 'r&c'
 'rws' 'sabrine' 'tefi' 'thobah' 'tijolinho' 'trentin' 'veruska' 'xandao'
 'yejin']


In [None]:
# Create general podium
general_podium = (general_podium.loc[:,['Player','Podium_Points','Points']]
                  .groupby(['Player'])
                  .sum())

# Index starts at 1
general_podium.index = general_podium.index + 1

In [75]:
general_podium = general_podium.groupby('Player').sum().reset_index()

## Podium Variations

In [76]:
general_podium.sort_values(['Podium_Points', 'Points'], ascending=[False, False]).head(5).reset_index(drop=True)

Unnamed: 0,Player,Podium_Points,Points
0,natal,10.0,67560
1,luara,9.0,31608
2,einstein,7.0,24518
3,sabrine,5.0,28353
4,angelina,5.0,21617


In [79]:
# TOP 5 PODIUM PTS AND THEN BY ACC PTS
general_podium.sort_values(['Podium_Points'], ascending=False).head(5).sort_values(['Points'], ascending=False).reset_index(drop=True)

Unnamed: 0,Player,Podium_Points,Points
0,natal,10.0,67560
1,luara,9.0,31608
2,sabrine,5.0,28353
3,einstein,7.0,24518
4,marcell,5.0,11047


In [80]:
general_podium.sort_values(['Points'], ascending=False).head(5).reset_index(drop=True)

Unnamed: 0,Player,Podium_Points,Points
0,natal,10.0,67560
1,rws,1.0,36676
2,luara,9.0,31608
3,gabriel,0.0,31417
4,sabrine,5.0,28353


In [69]:
general_podium.Player.unique()

array(['johann', 'machado', 'aguilar', 'ali', 'amanda', 'amelia',
       'angelina', 'babi', 'bidomar', 'bombom', 'brenda', 'da',
       'debruyne', 'eduardo', 'einstein', 'evelyn', 'rafa', 'marcell',
       'gabriel', 'geraldo', 'giu', 'gustavo', 'henrique', 'nath', 'jb',
       'kim', 'rs', 'luara', 'leleo', 'levis', 'lilian', 'louis', 'lucas',
       'pallu', 'mari', 'mardabi', 'matos', 'maycon', 'migueleli',
       'natal', 'otavio', 'r&c', 'rws', 'sabrine', 'tefi', 'thobah',
       'tijolinho', 'trentin', 'veruska', 'xandao', 'yejin'], dtype=object)