In [43]:
import os, sys, pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns
import hvplot.pandas
from bokeh.io import curdoc

curdoc().theme = "dark_minimal"

In [25]:
grade_conversion = {
    "6B+": 4,
    "6C" : 5,
    "6C+": 5.5,
    "7A" : 6,
    "7A+": 7,
    "7B" : 8,
    "7B+": 8.5
}

In [84]:
df = pd.read_csv("full_formatted_data_231213.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 491 entries, 0 to 490
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   date       491 non-null    object 
 1   name       491 non-null    object 
 2   grade      491 non-null    object 
 3   setter     491 non-null    object 
 4   mygrade    491 non-null    object 
 5   attempts   491 non-null    int64  
 6   ticked     491 non-null    bool   
 7   comment    3 non-null      object 
 8   grade_v    491 non-null    float64
 9   mygrade_v  491 non-null    float64
dtypes: bool(1), float64(2), int64(1), object(6)
memory usage: 35.1+ KB


In [86]:
# Group by problem and report number of tries, if it is ticked or not, number of days, setter
from collections import namedtuple

def _groupby(raw_df):

    def count_days(dt_series):
        return len(set(dt_series))
    
    df = raw_df.groupby(["name","grade_v", "setter"], as_index=False).aggregate(
        total_attempts = ("attempts", np.sum),
        total_days = ("date", count_days),
        ticked = ("ticked", np.any),
        last_tried = ("date", max),
        first_tried = ("date", min)
    )

    # TODO: Not sure how to do aggregating functions on groupby using multiple columns
    # e.g. passing in both 'date' and 'ticked' as inputs. until then.. using a loop and merge

    detailed_stats = []

    DateStats = namedtuple(
        "DateStats", 
        "name \
            date_first_ticked \
                attempts_to_send \
                    days_to_send \
                        num_repeats"
    )

    for n in df.name:
        first_ticked = np.nan
        tdf = raw_df[raw_df.name == n]
        ticked_sessions = tdf[tdf.ticked]
        
        try:
            first_ticked = ticked_sessions.date.min()
            attempts_to_send = tdf.loc[tdf.date<=first_ticked, "attempts"].sum()
            days_to_send = tdf.loc[tdf.date<=first_ticked].__len__()
        
        except Exception as e:
            print(e)
            first_ticked = np.nan
            attempts_to_send = np.nan
            days_to_send = np.nan

        num_repeats = ticked_sessions.__len__()
        
        detailed_stats.append(
            DateStats(n, first_ticked, attempts_to_send, days_to_send, num_repeats)
        )

    detailed_stats_df = pd.DataFrame(detailed_stats, columns=DateStats._fields)

    merged = pd.merge(left=df, right=detailed_stats_df, how='left', on='name')

    return merged


def current_projects_ranked(raw_df, as_table=True):
    df = _groupby(raw_df)
    df = df[~df.ticked].sort_values(
        ["total_days", "total_attempts"], 
        ascending=False
    )

    if as_table:
        return df.hvplot.table(sortable=True, selectable=True)

    return df

def _calc_num_attempts_to_send(t: pd.Series, d: pd.Series):
    return d[0]

def ticked_climbs_ranked(raw_df, topk=10, by='attempts'):
    


    # Get top-k list of climbs by difficulty
    topk_climbs = raw_df.groupby(["name"], as_index=False).aggregate(
        difficulty = ("grade_v", np.max)
    ).loc[:topk, "name"]

    topk_climbs = set(topk_climbs.tolist())
    return topk_climbs

    return df
    # return df[df.ticked].hvplot.table(sortable=True, selectable=True)



# df2[~df2.ticked].groupby("setter")




In [87]:
grp = _groupby(df)

In [88]:
grp

Unnamed: 0,name,grade_v,setter,total_attempts,total_days,ticked,last_tried,first_tried,date_first_ticked,attempts_to_send,days_to_send,num_repeats
0,#2 KH,5.5,Kyle Hilton,3,2,True,2022-10-06,2022-09-01,2022-09-01,2,1,2
1,(*)(*)FAKE,4.0,Mystery Frog Man,1,1,True,2023-05-30,2023-05-30,2023-05-30,1,1,1
2,1,4.0,Hugo Hoyer,1,1,True,2023-11-26,2023-11-26,2023-11-26,1,1,1
3,1817,5.0,Randall Baum,3,1,False,2023-06-13,2023-06-13,,0,0,0
4,1K,4.0,Bryant Noble,1,1,True,2023-11-28,2023-11-28,2023-11-28,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
232,¡APUNT****!,4.0,AJ Rimes,1,1,True,2023-12-12,2023-12-12,2023-12-12,1,1,1
233,紙一重 -KAMI HITOE-,5.5,seishirou shirai,6,1,True,2022-09-01,2022-09-01,2022-09-01,6,2,2
234,課題2,5.5,atsushi takizawa,3,1,True,2023-02-16,2023-02-16,2023-02-16,3,1,1
235,課題24,4.0,atsushi takizawa,1,1,True,2023-01-24,2023-01-24,2023-01-24,1,1,1


In [76]:
tempdf

Unnamed: 0,date,name,grade,setter,mygrade,attempts,ticked,comment,grade_v,mygrade_v,date_first_ticked,attempts_to_send,days_to_send,num_repeats
0,2023-12-14,POPIFRESCO,6B+,JAVIERMENG,6B+,2,False,,4.0,4.0,2023-05-07,2,1,1
1,2023-12-14,THE SENDTRAIN,6C,Ben Rankin,6C,2,True,,5.0,5.0,2023-04-18,3,1,2
2,2023-12-14,BORNEO,7A,Stuart Ivory,6C+,1,True,,6.0,5.5,2023-12-12,6,1,2
3,2023-12-14,BUTOLOGIA,6C,nicholas farenzena,6C,1,True,,5.0,5.0,2023-11-21,6,2,2
4,2023-12-14,FINGER LICKIN EASY,6B+,Kyle Knapp,6C,1,True,,4.0,5.0,2023-12-14,4,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
486,2022-07-17,AN EASY PROBLEM,6B+,Brett Duffield,6B+,1,True,,4.0,4.0,2022-07-17,1,1,3
487,2022-07-17,WUTHERING HEIGHTS,6B+,Ben Moon,6B+,1,True,,4.0,4.0,2022-07-17,1,1,7
488,2022-07-17,BITTER,6B+,Riccardo Caprasecca,6B+,2,True,,4.0,4.0,2022-07-17,2,1,3
489,2022-07-17,KLINGON EASY,6B+,Ben Moon,6B+,1,True,,4.0,4.0,2022-07-17,1,1,4


In [44]:
proj = current_projects_ranked(df)

In [45]:
proj

In [54]:
ticked = ticked_climbs_ranked(df)

In [55]:
ticked

{'#2 KH',
 '(*)(*)FAKE',
 '1',
 '1817',
 '1K',
 '2',
 '272966',
 '7A TOO MUCH',
 '7STÖCKE',
 'A LITTLE CROSSING',
 'AB1'}