Feature Extractor:

    input: dataframe with 'Moves' column 
    output: dataframe with features
            

In [1]:
import pandas as pd
import numpy as np
import ast

## raw problems dataframe

In [2]:
# initiate problems dataframe
# read from csv file
# cast it into Pandas DataFrame
problems = pd.read_csv("problems.csv")
problems.drop(["Unnamed: 0"], axis=1, inplace=True)
problems.head()

Unnamed: 0,Name,Grade,Moves,Holdsets,Id
0,BLACK BEAUTY,8B,"[{'Id': 1542554, 'Description': 'J5', 'IsStart...","[{'Id': 0, 'Description': 'Original School Hol...",82224
1,PROJECT 2,8A+,"[{'Id': 1535606, 'Description': 'K6', 'IsStart...","[{'Id': 0, 'Description': 'Original School Hol...",62575
2,SHATTERED MERCY,8A+,"[{'Id': 1596473, 'Description': 'D3', 'IsStart...","[{'Id': 0, 'Description': 'Original School Hol...",241045
3,ATTIC ADDICT,8A,"[{'Id': 1986894, 'Description': 'E6', 'IsStart...","[{'Id': 0, 'Description': 'Original School Hol...",350783
4,BUNNY,8A,"[{'Id': 1521147, 'Description': 'B4', 'IsStart...","[{'Id': 0, 'Description': 'Original School Hol...",23184


In [3]:
# Converts type of elements of Moves columns into python list

problem_list = []
for problem in problems["Moves"].values:
    problem_as_lst = ast.literal_eval(problem)
    problem_list.append(problem_as_lst)
problems["Moves"] = problem_list   

## route feature extractor

In [4]:
# feature 1: number of holds
# extracts number of holds in route and write it into column "Number of Holds"

number_of_holds = []
for problem in problems["Moves"].values:
    number_of_holds.append(len(problem))
problems["numberOfHolds"] = number_of_holds
problems.head()


Unnamed: 0,Name,Grade,Moves,Holdsets,Id,numberOfHolds
0,BLACK BEAUTY,8B,"[{'Id': 1542554, 'Description': 'J5', 'IsStart...","[{'Id': 0, 'Description': 'Original School Hol...",82224,4
1,PROJECT 2,8A+,"[{'Id': 1535606, 'Description': 'K6', 'IsStart...","[{'Id': 0, 'Description': 'Original School Hol...",62575,6
2,SHATTERED MERCY,8A+,"[{'Id': 1596473, 'Description': 'D3', 'IsStart...","[{'Id': 0, 'Description': 'Original School Hol...",241045,8
3,ATTIC ADDICT,8A,"[{'Id': 1986894, 'Description': 'E6', 'IsStart...","[{'Id': 0, 'Description': 'Original School Hol...",350783,8
4,BUNNY,8A,"[{'Id': 1521147, 'Description': 'B4', 'IsStart...","[{'Id': 0, 'Description': 'Original School Hol...",23184,8


In [5]:
problems.loc[0]["Moves"]

[{'Id': 1542554, 'Description': 'J5', 'IsStart': True, 'IsEnd': False},
 {'Id': 1542555, 'Description': 'I9', 'IsStart': False, 'IsEnd': False},
 {'Id': 1542556, 'Description': 'E14', 'IsStart': False, 'IsEnd': False},
 {'Id': 1542557, 'Description': 'H18', 'IsStart': False, 'IsEnd': True}]

In [6]:
# feature 2: longest eucledian distance between two holds of a route
# extracts the longest move in a route


# returns number of start holds in a route
def areTwoStartHolds(moves):
    number_of_start_holds = 0
    for move in problem:
        if move["IsStart"]:
            number_of_start_holds += 1
            if number_of_start_holds == 2:
                break
    return number_of_start_holds

# list of alphabet
alphabet = ["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"]


# returns euclidean distance between two holds:
def eucl_dist(hold1, hold2):
    # hold1 as numpy array
    x_coord = alphabet.index(hold1[0].upper())
    y_coord = int(hold1[1:])
    hold1_tuple = tuple((x_coord, y_coord))
    hold1 = np.array(hold1_tuple)
    
    # hold2 as numpy array
    x_coord = alphabet.index(hold2[0].upper())
    y_coord = int(hold2[1:])
    hold2_tuple = tuple((x_coord, y_coord))  
    hold2 = np.array(hold2_tuple)  
    
    # distance
    dist = np.linalg.norm(hold1-hold2)
    
    return dist


# returns list of move lengths in a route
def len_of_moves(problem):
    len_of_moves = []
    
    num_of_moves = len(problem["Moves"])
    
    for i in range(num_of_moves - 1):
        hold1 = problem["Moves"][i]["Description"]
        hold2 = problem["Moves"][i+1]["Description"]
        len_of_moves.append(eucl_dist(hold1,hold2))
    
    return len_of_moves    


# adds len_of_moves feature column

def len_feature(DataFrame):
    lst_of_len_of_moves = []

    for i in range(len(DataFrame)):
        lst_of_len_of_moves.append(len_of_moves(DataFrame.iloc[i]))
    DataFrame["lengthOfMoves"] = lst_of_len_of_moves
    return lst_of_len_of_moves

def longestMove(DataFrame):
    longestMoves = []
    DataFrame["lengthOfMoves"] = len_feature(DataFrame)
    
    for lengthOfMoves in problems["lengthOfMoves"].values:
        longestMoves.append(max(lengthOfMoves))
    return longestMoves

problems["longestMove"] = longestMove(problems)


In [7]:
# feature 3
# longest move

def longestMove(DataFrame):
    longestMoves = []
    for lengthOfMoves in problems["lengthOfMoves"].values:
        longestMoves.append(max(lengthOfMoves))
    return longestMoves

problems["longestMove"] = longestMove(problems)

In [8]:
# feature 4
# holds mean difficulty over all problems

# önce her hold'un difficulty dağılımını elde et
# sonra rotadaki tutamakların mean difficulty dağılımlarının mean difficulty'sini elde et

def holdsOfProblems(DataFrame):
    holdsColumn = []
    for moves in problems["Moves"]:
        holdsOfProblem = []
        for move in moves:
            holdsOfProblem.append(move["Description"])
        holdsColumn.append(holdsOfProblem)
    return holdsColumn

problems["holds"] = holdsOfProblems(problems)


# hold set
def holdSet(DataFrame):
    holdSet = set()
    for moves in problems["Moves"]:
        for move in moves:
            holdSet.add(move["Description"])
    holdSet.add("H13")
    holdSet.remove("j5")
    holdSet.add("J5")
    return holdSet


# input hold
# returns tuple:
# hold grade tuple: ("nameOfTheHold": {"6B+": numberOfRoutes, "6C+":,})
# tuple (string, dictionary)
# string = nameOfTheHold
# dictionary = empyty grade distribution of the hold

def holdTuple(hold):
    return (hold, {"6B+": 0, "6C":0, "6C+":0, "7A":0, "7A+":0, "7B":0,"7B+":0, "7C":0, "7C+":0, "8A":0, "8A+":0, "8B":0})



def holdGradeDistribution(hold, DataFrame):
    holdGrades = holdTuple(hold)
    for i in range(len(DataFrame)):
        problem = DataFrame.loc[i]
        if hold in problem["holds"]:
            holdGrades[1][problem["Grade"]] += 1
    return holdGrades

# feature 5 
# move length ile hold difficulty arasında bir ilişki kur.

## hold feature extractor

In [9]:
# define hold DataFrame
# first sorts holds (holds data taken from raw problems dataframe)
# then define holds DataFrame


# hold set
# input dataframe
# return holdSet
def holdSet(DataFrame):
    holdSet = set()
    for moves in problems["Moves"]:
        for move in moves:
            holdSet.add(move["Description"])
    holdSet.add("H13")
    holdSet.remove("j5")
    holdSet.add("J5")
    return holdSet



# holdListSorter
# input DataFrame
# return sorted hold list: sortedHoldList

def holdListSorter(problems):
    sortedHoldList = []
    
    holdList = sorted(list(holdSet(problems)))

    
    holdsClassified = [[], [], [], [], [], [], [], [], [], [], []]
    

    for hold in holdList:
        if hold[0] == "A":
            holdsClassified[0].append(int(hold[1:]))
        elif hold[0] == "B":
            holdsClassified[1].append(int(hold[1:]))
        elif hold[0] == "C":
            holdsClassified[2].append(int(hold[1:]))
        elif hold[0] == "D":
            holdsClassified[3].append(int(hold[1:]))
        elif hold[0] == "E":
            holdsClassified[4].append(int(hold[1:]))
        elif hold[0] == "F":
            holdsClassified[5].append(int(hold[1:]))
        elif hold[0] == "G":
            holdsClassified[6].append(int(hold[1:]))
        elif hold[0] == "H":
            holdsClassified[7].append(int(hold[1:]))
        elif hold[0] == "I":
            holdsClassified[8].append(int(hold[1:]))
        elif hold[0] == "J":
            holdsClassified[9].append(int(hold[1:]))
        elif hold[0] == "K":
            holdsClassified[10].append(int(hold[1:]))

    
    holdsClassifiedSorted = []
    
    for holdList in holdsClassified:
        holdsClassifiedSorted.append(sorted(holdList))
    

    counter = 0
    for lst in holdsClassifiedSorted:
        for holdNumber in lst:
            if counter == 0:
                sortedHoldList.append("A" + str(holdNumber))
            elif counter == 1:
                sortedHoldList.append("B" + str(holdNumber))
            elif counter == 2:
                sortedHoldList.append("C" + str(holdNumber))
            elif counter == 3:
                sortedHoldList.append("D" + str(holdNumber))
            elif counter == 4:
                sortedHoldList.append("E" + str(holdNumber))
            elif counter == 5:
                sortedHoldList.append("F" + str(holdNumber))
            elif counter == 6:
                sortedHoldList.append("G" + str(holdNumber))
            elif counter == 7:
                sortedHoldList.append("H" + str(holdNumber))
            elif counter == 8:
                sortedHoldList.append("I" + str(holdNumber))
            elif counter == 9:
                sortedHoldList.append("J" + str(holdNumber))
            elif counter == 10:
                sortedHoldList.append("K" + str(holdNumber))
        counter += 1

    
    return sortedHoldList                    




# holds DataFrame

sortedHoldDict = {"Name":[]}
sortedHoldDict["Name"] = holdListSorter(problems)

holds = pd.DataFrame(sortedHoldDict)

In [10]:
# feature 1
# grade distribution of hold





# holdsOfProblems
# input dataframe
# return holdsColumn list

def holdsOfProblems(DataFrame):
    holdsColumn = []
    for moves in problems["Moves"]:
        holdsOfProblem = []
        for move in moves:
            holdsOfProblem.append(move["Description"])
        holdsColumn.append(holdsOfProblem)
    return holdsColumn

problems["holds"] = holdsOfProblems(problems)

# ------------------------ #

# holdTuple
# input hold
# returns tuple:
# hold grade tuple: ("nameOfTheHold": {"6B+": numberOfRoutes, "6C+":,})
# tuple (string, dictionary)
# string = nameOfTheHold
# dictionary = empyty grade distribution of the hold

def holdTuple(hold):
    return (hold, {"6B+": 0, "6C":0, "6C+":0, "7A":0, "7A+":0, "7B":0,"7B+":0, "7C":0, "7C+":0, "8A":0, "8A+":0, "8B":0})

# ------------------------ #

# holdGradeDistribution
# input hold, DataFrame
# return holdGrades tuple
# "F5", {"6B+": 0, "6C":0, "6C+":0, "7A":0, "7A+":0, "7B":0,"7B+":0, "7C":0, "7C+":0, "8A":0, "8A+":0, "8B":0}

def holdGradeDistribution(hold, DataFrame):
    holdGrades = holdTuple(hold)
    for i in range(len(DataFrame)):
        problem = DataFrame.loc[i]
        if hold in problem["holds"]:
            holdGrades[1][problem["Grade"]] += 1
    return holdGrades


# input holds dataframe
# return dataframe with grade distribution column

def gradeDistributionFeature(holds):
    holdsGradeDistributionList = []
    
    for i in range(len(holds)):
        hold = holds.loc[i]["Name"]
        gradeDistribution = holdGradeDistribution(hold, problems)
        
        holdsGradeDistributionList.append(gradeDistribution[1])
    
    holds["GradeDistribution"] = holdsGradeDistributionList
    
    return holds

holds = gradeDistributionFeature(holds)

In [15]:
holdColorDictionary["A5"]

'white'

In [25]:
# feature 2
# hold color

# column Color: string
# columns; black: 0, 1; white: 0, 1; yellow: 0, 1  


holdColorDictionary = {'A5':"white", 'A9':"black", 'A10':"yellow", 'A11':"yellow", 'A12':"white", 'A13':"yellow", 'A14':"black", 'A15':"white", 'A16':"yellow", 'A18':"black", 'B3':"white", 'B4':"yellow", 'B6':"black", 'B7':"yellow", 'B8':"white", 'B9':"white", 'B10':"black", 'B11':"white", 'B12':"black", 'B13':"white", 'B15':"black", 'B16':"white", 'B18':"white", 'C5':"black", 'C6':"white", 'C7':"yellow", 'C8':"black", 'C9':"yellow", 'C10':"white", 'C11':"white", 'C12':"yellow", 'C13':"black", 'C14':"white", 'C15':"yellow", 'C16':"black", 'C18':"yellow", 'D3':"yellow", 'D5':"white", 'D6':"yellow", 'D7':"black", 'D8':"yellow", 'D9':"white", 'D10':"yellow", 'D11':"black", 'D12':"white", 'D13':"yellow", 'D14':"white", 'D15':"black", 'D16':"yellow", 'D17':"white", 'D18':"black", 'E6':"black", 'E7':"white", 'E8':"black", 'E9':"black", 'E10':"white", 'E11':"white", 'E12':"black", 'E13':"white", 'E14':"black", 'E15':"white", 'E16':"black", 'E18':"white", 'F5':"white", 'F6':"yellow", 'F7':"white", 'F8':"yellow", 'F9':"yellow", 'F10':"white", 'F11':"black", 'F12':"white", 'F13':"black", 'F14':"white", 'F15':"yellow", 'F16':"white", 'G2':"white", 'G4':"black", 'G6':"black", 'G7':"yellow", 'G8':"white", 'G9':"black", 'G10':"black", 'G11':"yellow", 'G12':"white", 'G13':"white", 'G14':"black", 'G15':"black", 'G16':"yellow", 'G17':"black", 'G18':"white", 'H5':"black", 'H7':"yellow", 'H8':"black", 'H9':"yellow", 'H10':"black", 'H11':"white", 'H12':"black", 'H13':"black", 'H14':"yellow", 'H15':"yellow", 'H16':"black", 'H18':"yellow", 'I4':"white", 'I5':"yellow", 'I6':"white", 'I7':"black", 'I8':"yellow", 'I9':"black", 'I10':"black", 'I11':"white", 'I12':"yellow", 'I13':"white", 'I14':"black", 'I15':"black", 'I16':"white", 'I18':"black", 'J2':"white", 'J5':"black", 'J6':"white", 'J7':"black", 'J8':"white", 'J9':"white", 'J10':"white", 'J11':"yellow", 'J12':"black", 'J13':"black", 'J14':"yellow", 'J16':"black", 'K5':"white", 'K6':"yellow", 'K7':"yellow", 'K8':"yellow", 'K9':"black", 'K10':"yellow", 'K11':"white", 'K12':"yellow", 'K13':"yellow", 'K14':"black", 'K16':"black", 'K18':"white"}


# adds Color column
# input holds dataframe
# return holds dataframe with column Color

def holdColor(holds):
    holdsColorList = []
    for i in range(len(holds)):
        hold = holds.iloc[i]
        holdsColorList.append(holdColorDictionary[hold["Name"]])
    
    holds["Color"] = holdsColorList
    
    return holds

holds = holdColor(holds)


# add 3 color columns
# black white yellow
# 0 or 1

def holdColorNumeric(holds):
    blackColorList = []
    whiteColorList = []
    yellowColorList = []
    
    for i in range(len(holds)):
        hold = holds.iloc[i]
        
        if hold["Color"] == "black":
            blackColorList.append(1)
            whiteColorList.append(0)
            yellowColorList.append(0)  
        elif hold["Color"] == "white":
            blackColorList.append(0)
            whiteColorList.append(1)
            yellowColorList.append(0)
        else:
            blackColorList.append(0)
            whiteColorList.append(0)
            yellowColorList.append(1)
            
    holds["Black"] = blackColorList
    holds["White"] = whiteColorList
    holds["Yellow"] = yellowColorList
    
    return holds

holds = holdColorNumeric(holds)

In [27]:
holds.to_csv("holds.csv")