# Imports

In [1]:
import pandas as pd
import re
import numpy as np
import pandas as pd
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
from selenium.common.exceptions import NoSuchElementException

pd.options.mode.chained_assignment = None

# Data Cleaning

We've scraped raw data from workouts, we now need to put this data in a useable form. We start by breaking the workouts up into individual movements.

In [2]:
rawLoc = '../data/raw/namesAndText.csv'
rawWod = pd.read_csv(rawLoc)
rawWod.head()

Unnamed: 0,Name,Text
0,MURPH,MURPH\nCrossFit Hero WOD\nFor Time\n1 mile Run...
1,DT,DT\nCrossFit Hero WOD\n5 Rounds For Time\n12 D...
2,CINDY,"CINDY\nCrossFit Benchmark ""Girl"" WOD\nAMRAP in..."
3,BEAR COMPLEX,BEAR COMPLEX\nCrossFit Benchmark WOD\n5 Rounds...
4,FRAN,"FRAN\nCrossFit ""Girl"" Benchmark WOD\n21-15-9 R..."


In [3]:
moveWod = {}
for desc in rawWod['Text']:
    lines = desc.split('\n')
    for i in range(1,len(lines)):
        if lines[i] in moveWod.keys():
            moveWod[lines[i]].append(lines[0])
        else:
            moveWod[lines[i]] = [lines[0]]
movementDF = pd.DataFrame(moveWod.keys(),columns=['Movement'])
workouts = []
for move in movementDF['Movement']:
    workouts.append(moveWod[move])
movementDF['Workouts'] = workouts
movementDF

Unnamed: 0,Movement,Workouts
0,CrossFit Hero WOD,"[MURPH, DT, LOREDO, DEL, BERT, MANION, HOLLEYM..."
1,For Time,"[MURPH, ZACHARY TELLIER, GRACE, DEL, JACKIE, F..."
2,1 mile Run,"[MURPH, MURPH, BULL, JERRY, JERRY, ABBATE, ABB..."
3,100 Pull-Ups,"[MURPH, ANGIE, CAROL SWANSON, TRIPLE-G CHIPPER..."
4,200 Push-Ups,[MURPH]
...,...,...
14845,9 Toes to Bar,[HOCUS POCUS – THE SANDERSON SISTERS]
14846,3 Man Makers,[HOCUS POCUS – THE SANDERSON SISTERS]
14847,Finisher,[HOCUS POCUS – THE SANDERSON SISTERS]
14848,5 Minute Cardio of Choice,[HOCUS POCUS – THE SANDERSON SISTERS]


In [4]:
weights = []
new_moves = []
for move in movementDF['Movement']:
    match = re.search('\(.*/.*\)',move)
    if match:
        weights.append(match.group(0))
        new_moves.append(re.sub('\(.*/.*\)','',move))
    else:
        weights.append('0')
        new_moves.append(move)
movementDF['Movement'] = new_moves
movementDF['Weight'] = weights
movementDF

Unnamed: 0,Movement,Workouts,Weight
0,CrossFit Hero WOD,"[MURPH, DT, LOREDO, DEL, BERT, MANION, HOLLEYM...",0
1,For Time,"[MURPH, ZACHARY TELLIER, GRACE, DEL, JACKIE, F...",0
2,1 mile Run,"[MURPH, MURPH, BULL, JERRY, JERRY, ABBATE, ABB...",0
3,100 Pull-Ups,"[MURPH, ANGIE, CAROL SWANSON, TRIPLE-G CHIPPER...",0
4,200 Push-Ups,[MURPH],0
...,...,...,...
14845,9 Toes to Bar,[HOCUS POCUS – THE SANDERSON SISTERS],0
14846,3 Man Makers,[HOCUS POCUS – THE SANDERSON SISTERS],0
14847,Finisher,[HOCUS POCUS – THE SANDERSON SISTERS],0
14848,5 Minute Cardio of Choice,[HOCUS POCUS – THE SANDERSON SISTERS],0


In [5]:
volume = []
movements = []
timeDistMeasures = ['mile','miles','minute','minutes','meter','meters','calorie','calories','min','mins','(calories)','(calories)','(meters)','(miles)']
for move in movementDF['Movement']:
    temp_vol = ''
    temp_mov = ''
    splits = move.split(' ')
    for split in splits:
        if split.isdigit():
            temp_vol += split
        elif split.lower() in timeDistMeasures:
            temp_vol += ' ' + split.lower()
        else:
            temp_mov += split.lower() + ' '
    volume.append(temp_vol)
    movements.append(temp_mov)
movementDF['Movement'] = movements
movementDF['Volume'] = volume
movementDF

Unnamed: 0,Movement,Workouts,Weight,Volume
0,crossfit hero wod,"[MURPH, DT, LOREDO, DEL, BERT, MANION, HOLLEYM...",0,
1,for time,"[MURPH, ZACHARY TELLIER, GRACE, DEL, JACKIE, F...",0,
2,run,"[MURPH, MURPH, BULL, JERRY, JERRY, ABBATE, ABB...",0,1 mile
3,pull-ups,"[MURPH, ANGIE, CAROL SWANSON, TRIPLE-G CHIPPER...",0,100
4,push-ups,[MURPH],0,200
...,...,...,...,...
14845,toes to bar,[HOCUS POCUS – THE SANDERSON SISTERS],0,9
14846,man makers,[HOCUS POCUS – THE SANDERSON SISTERS],0,3
14847,finisher,[HOCUS POCUS – THE SANDERSON SISTERS],0,
14848,cardio of choice,[HOCUS POCUS – THE SANDERSON SISTERS],0,5 minute


In [6]:
movementDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14850 entries, 0 to 14849
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Movement  14850 non-null  object
 1   Workouts  14850 non-null  object
 2   Weight    14850 non-null  object
 3   Volume    14850 non-null  object
dtypes: object(4)
memory usage: 464.2+ KB


In [7]:
new_vol = movementDF['Volume'].replace('',np.nan)
movementDF['Volume'] = new_vol

In [8]:
movementDF['Volume'].isnull().sum()

3822

In [9]:
dashDF = movementDF[movementDF.Movement.str.match('.*-.*-.*')]
repSchemeDF = dashDF[dashDF["Volume"].isnull()]
repWodPairs = {}
for move, wod in zip(repSchemeDF['Movement'],repSchemeDF['Workouts']):
    repWodPairs[move] = wod

In [10]:
wodVol = {}
for move in repWodPairs.keys():
    spaceMove = move.replace('-',' ')
    spaceMove = spaceMove.replace('(','')
    spaceMove = spaceMove.replace(')','')
    repCount = 0
    splitMove = spaceMove.split()
    if 'reps' in spaceMove:
        for word in splitMove:
            if word.isnumeric():
                repCount += int(word)
        for wod in repWodPairs[move]:
            if wod in wodVol.keys() and repCount != 0:
                wodVol[wod] += repCount
            elif repCount != 0:
                wodVol[wod] = repCount
    else:
        tempMove = ''
        for word in splitMove:
            if word.isnumeric():
                repCount += int(word)
            else:
                tempMove += word + ' '
        tempMove = tempMove.strip()
        movementDF.loc[movementDF.Movement == move, 'Volume'] = repCount
        movementDF.loc[movementDF.Movement == move, 'Movement'] = tempMove

In [11]:
movementDFNoNull = movementDF.dropna()

In [12]:
movementDFNoNull.reset_index(drop=True, inplace=True)

In [13]:
new_vol = []
for vol in movementDFNoNull['Volume']:
    if str(vol).isnumeric():
        new_vol.append(int(vol))
    else:
        brokenVol = vol.split(' ')
        if str(brokenVol[0]).isnumeric():
            if 'minute' in brokenVol or 'minutes' in brokenVol or 'min' in brokenVol or 'mins' in brokenVol:
                new_vol.append(int(brokenVol[0]))
            elif 'meter' in brokenVol or 'meters' in brokenVol:
                new_vol.append(int(brokenVol[0]))
            elif 'mile' in brokenVol or 'miles' in brokenVol:
                new_vol.append(int(brokenVol[0])*1600)
            elif 'calorie' in brokenVol or 'calories' in brokenVol:
                new_vol.append(int(brokenVol[0])*16)
            else:
                new_vol.append(int(brokenVol[0]))
        else:
            new_vol.append(np.nan)
movementDFNoNull['Volume'] = new_vol
movementDFNoNull.dropna(inplace=True)

In [14]:
#create a visited list to prevent duplicates and a dictionary for reconstructing workouts
visited = []
reconstructDict = {}
#loop through each workout name, skipping duplicates
for name in rawWod['Name']:
    if name not in visited:
        visited.append(name)
        visMove = []
        #for each movement with a volume, if that workout appears in its workout list, add that movement as a key,
        #paired with its weight and volume to the key of the workout name
        for i in range(len(movementDFNoNull)):
            if movementDFNoNull['Movement'].iloc[i] not in visMove:
                visMove.append(movementDFNoNull['Movement'].iloc[i])
                if name in movementDFNoNull['Workouts'].iloc[i]:
                    if name in reconstructDict.keys():
                        reconstructDict[name][movementDFNoNull['Movement'].iloc[i]] = [movementDFNoNull['Weight'].iloc[i],movementDFNoNull['Volume'].iloc[i]]
                    else:
                        reconstructDict[name] = {movementDFNoNull['Movement'].iloc[i]:[movementDFNoNull['Weight'].iloc[i],movementDFNoNull['Volume'].iloc[i]]}
print(len(reconstructDict.keys()))

2604


In [15]:
uniMoves = []
for wod in reconstructDict.keys():
    for move in reconstructDict[wod].keys():
        if move not in uniMoves:
            uniMoves.append(move)
print(len(uniMoves))

4119


In [16]:
for move in uniMoves:
    if (re.search('.*wod.*',move) or re.search('.*round.*',move) or re.search('.*amrap.*',move) or re.search('.*tabata.*',move)) and not re.search('.*ground.*',move):
        uniMoves.remove(move)
        
len(uniMoves)

3662

In [17]:
col_names = ['Name','href']
col_names += uniMoves
wodsDF = pd.DataFrame(columns=col_names)
wodsDF

Unnamed: 0,Name,href,run,pull-ups,push-ups,air squats,deadlifts,hang power cleans,push jerks,amrap in,...,swings,bear crawl kb pull through,bear crawl shoulder tap,- row cal,kettlebell box step-ups,"run (add ""0""...so meters-200-100, etc)",db deadlifts,curtis p,db overhead lunge (alternating lunge),cardio of choice


In [18]:
def textClean(text):
        text = text.replace('-',' ')
        text = text.replace(':',' ')
        text = text.replace('/',' ')
        text = text.replace('\n',' ')
        text = text.replace('–',' ')
        text = text.replace('*',' ')
        text = text.replace('=',' ')
        text = text.replace('.',' ')
        text = text.replace(',',' ')
        text = text.replace('…',' ')
        text = text.strip()
        return text

In [19]:
def concatLikeCols(base,cols,searched,mappingDict):
    toRet = []
    for col in cols:
        if base not in mappingDict.keys():
            mappingDict[base] = []
        newCol = textClean(col)
        regex = '.*'+base+'.*'
        if base == newCol:
            mappingDict[base].append(col)
            if base not in toRet:
                toRet.append(base)
        elif not re.search(regex,newCol):
            toRet.append(col)
        else:
            if col in searched and col not in toRet:
                toRet.append(col)
            else:
                mappingDict[base].append(col)
                if base not in toRet:
                    toRet.append(base)
    if base not in toRet:
        print(base + ' not added')
    return toRet

In [20]:
expectedMoves = ['burpee pull up','pull up','handstand push up','push up','air squat','back squat','front squat',\
                 'overhead squat','clean and jerk','clean','snatch','squat','sumo deadlift high pull','deadlift','strict press','push press'\
                 ,'jerk','shoulder press','bench press','run','row cal','row','ski','bike','swim','rest','overhead lunge'\
                ,'lunge','thruster','muscle up','handstand walk','double under','wall ball','box jump','burpee',\
                'rope climb','kettlebell swing','ghd sit up','sit up','turkish get up','side plank','plank','toes to bar',\
                'pistol','shoulder to overhead','ground to overhead','ring dip','dip','step up','wall sit','broad jump'\
                ,'farmer carry','triple under','v up','hollow rock','hollow hold','mountain climber','box step over',\
                 'hip extension','back extension','bear crawl','cluster','devil press','jumping jack','complex','situp'\
                ,'pullup','pushup','single under','knees to elbow','good morning','slam ball','sled drag','devils press',\
                 'makers','kettlebell high pull','waiter','suitcase','sprint','farmer\'s carry','up down','sled pull',\
                 'sled push','kb swing','ttb','curl','curtis p','farmer\'s walk','wall walk','rmu','bmu']
searched = []
cleanedMoves = {}
for move in expectedMoves:
    col_names = concatLikeCols(move,col_names,searched,cleanedMoves)
    if move in col_names:
        searched.append(move)
repeat = True
while repeat == True:
    repeat = False
    for move in col_names:
        cleanMove = textClean(move)
        if (re.search('.*wod.*',cleanMove) or (re.search('.*round.*',cleanMove) and not re.search('.*ground.*',cleanMove)) or re.search('.*amrap.*',cleanMove)\
            or re.search('.*tabata.*',cleanMove) or re.search('.*workout.*',cleanMove) or re.search('.*rep.*',cleanMove)\
            or re.search('.*emom.*',cleanMove) or re.search('.*min.*',cleanMove) or re.search('.*time cap.*',cleanMove)\
            or re.search('.*interval.*',cleanMove) or re.search('.*time.*',cleanMove) or (move == '') or re.search('.*male.*',cleanMove)\
            or re.search('.*female.*',move) or re.search('.*men.*',move) or re.search('.*women.*',move) or re.search('.*complete.*',move) or re.search('.*scored.*',move)\
            or re.search('.*perform.*',cleanMove) or cleanMove.isnumeric() or re.search('.*set.*',cleanMove) or re.search('.*start.*',cleanMove) or re.search('.*cash out.*',cleanMove)\
            or re.search('.*weight vest.*',cleanMove) or re.search('.*part.*',cleanMove) or col_names.count(move)>1):
            repeat = True
            while move in col_names:
                i+=1
                col_names.remove(move)
    print(len(col_names))
for move in col_names:
    if move not in cleanedMoves:
        cleanedMoves[move] = [move]

676
599
583
580
580


In [21]:
wodsDF = pd.DataFrame(columns=col_names)
wodsDF

Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,chin ups,second pelvic tilt (while holding baby above chest),- russian twists,kettlebell russian swings,second recovery jog,bar carry,spull -ups / banded,– – – 100m,swings,cardio of choice


In [22]:
wodsDF['Name'] = reconstructDict.keys()
hrefBase = 'https://wodwell.com/wod/'
new_href = []
for wod in wodsDF['Name']:
    hrefEnd = wod.lower().replace(' ','-')
    new_href.append(hrefBase + hrefEnd)
wodsDF['href'] = new_href
wodsDF.head()

Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,chin ups,second pelvic tilt (while holding baby above chest),- russian twists,kettlebell russian swings,second recovery jog,bar carry,spull -ups / banded,– – – 100m,swings,cardio of choice
0,MURPH,https://wodwell.com/wod/murph,,,,,,,,,...,,,,,,,,,,
1,DT,https://wodwell.com/wod/dt,,,,,,,,,...,,,,,,,,,,
2,CINDY,https://wodwell.com/wod/cindy,,,,,,,,,...,,,,,,,,,,
3,BEAR COMPLEX,https://wodwell.com/wod/bear-complex,,,,,,,,,...,,,,,,,,,,
4,FIGHT GONE BAD,https://wodwell.com/wod/fight-gone-bad,,,,,,,,,...,,,,,,,,,,


In [23]:
for index in range(len(wodsDF['Name'])):
    wodName = wodsDF['Name'].iloc[index]
    for dirtyMove in reconstructDict[wodName].keys():
        if dirtyMove in cleanedMoves.keys():
            try:
                wodsDF[cleanedMoves[dirtyMove]].iloc[index] = reconstructDict[wodName][dirtyMove][1]
            except:
                print('error on '+dirtyMove)
        else:
            for cleanMove in col_names:
                if dirtyMove in cleanedMoves[cleanMove]:
                    wodsDF[cleanMove].iloc[index] = reconstructDict[wodName][dirtyMove][1]
wodsDF.head()

error on toes to bar
error on overhead squat
error on deadlift


Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,chin ups,second pelvic tilt (while holding baby above chest),- russian twists,kettlebell russian swings,second recovery jog,bar carry,spull -ups / banded,– – – 100m,swings,cardio of choice
0,MURPH,https://wodwell.com/wod/murph,1600.0,100.0,200.0,300.0,,,,,...,,,,,,,,,,
1,DT,https://wodwell.com/wod/dt,,,,,12.0,9.0,6.0,,...,,,,,,,,,,
2,CINDY,https://wodwell.com/wod/cindy,,,,,,,,,...,,,,,,,,,,
3,BEAR COMPLEX,https://wodwell.com/wod/bear-complex,,,,,,1.0,,7.0,...,,,,,,,,,,
4,FIGHT GONE BAD,https://wodwell.com/wod/fight-gone-bad,,,,,,,,,...,,,,,,,,,,


In [24]:
wodsDF['pull up'].iloc[2] = 5
wodsDF['push up'].iloc[2] = 10
wodsDF['air squat'].iloc[2] = 15
wodsDF

Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,chin ups,second pelvic tilt (while holding baby above chest),- russian twists,kettlebell russian swings,second recovery jog,bar carry,spull -ups / banded,– – – 100m,swings,cardio of choice
0,MURPH,https://wodwell.com/wod/murph,1600,100,200,300,,,,,...,,,,,,,,,,
1,DT,https://wodwell.com/wod/dt,,,,,12,9,6,,...,,,,,,,,,,
2,CINDY,https://wodwell.com/wod/cindy,,5,10,15,,,,,...,,,,,,,,,,
3,BEAR COMPLEX,https://wodwell.com/wod/bear-complex,,,,,,1,,7,...,,,,,,,,,,
4,FIGHT GONE BAD,https://wodwell.com/wod/fight-gone-bad,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2599,WEIGHTS FOR WARRIORS,https://wodwell.com/wod/weights-for-warriors,,,,,,,,,...,,,,,,,,,,
2600,BABY WHALE,https://wodwell.com/wod/baby-whale,300,,,,,,,,...,,,,,,,,,,
2601,BARE COVE TRAVEL WOD 3,https://wodwell.com/wod/bare-cove-travel-wod-3,,,,,,,,,...,,,,,,,,,,
2602,BARE COVE TRAVEL WOD 7,https://wodwell.com/wod/bare-cove-travel-wod-7,,,,,,,,,...,,,,,,,,,,


In [25]:
wodsDF.fillna(0,inplace=True)
wodsDF

Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,chin ups,second pelvic tilt (while holding baby above chest),- russian twists,kettlebell russian swings,second recovery jog,bar carry,spull -ups / banded,– – – 100m,swings,cardio of choice
0,MURPH,https://wodwell.com/wod/murph,1600.0,100.0,200.0,300.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
1,DT,https://wodwell.com/wod/dt,0.0,0.0,0.0,0.0,12.0,9.0,6.0,0.0,...,0,0,0,0,0,0,0,0,0,0
2,CINDY,https://wodwell.com/wod/cindy,0.0,5.0,10.0,15.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
3,BEAR COMPLEX,https://wodwell.com/wod/bear-complex,0.0,0.0,0.0,0.0,0.0,1.0,0.0,7.0,...,0,0,0,0,0,0,0,0,0,0
4,FIGHT GONE BAD,https://wodwell.com/wod/fight-gone-bad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2599,WEIGHTS FOR WARRIORS,https://wodwell.com/wod/weights-for-warriors,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
2600,BABY WHALE,https://wodwell.com/wod/baby-whale,300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
2601,BARE COVE TRAVEL WOD 3,https://wodwell.com/wod/bare-cove-travel-wod-3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
2602,BARE COVE TRAVEL WOD 7,https://wodwell.com/wod/bare-cove-travel-wod-7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
wodsDF[wodsDF['Name'] == '(HOME)WORK #21']

Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,chin ups,second pelvic tilt (while holding baby above chest),- russian twists,kettlebell russian swings,second recovery jog,bar carry,spull -ups / banded,– – – 100m,swings,cardio of choice
2304,(HOME)WORK #21,https://wodwell.com/wod/(home)work-#21,0.0,0.0,804020.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
roundsDF = movementDFNoNull[movementDFNoNull.Movement.str.match('round.*for time')]

In [28]:
roundsDF.reset_index(inplace=True,drop=True)
moves = list(wodsDF)[2:]
removing=True
while removing:
    visited = []
    removing = False
    for ind in range(len(roundsDF)):
        for wod in roundsDF['Workouts'].iloc[ind]:
            if wod in visited:
                roundsDF['Workouts'].iloc[ind].remove(wod)
                removing=True
            else:
                visited.append(wod)

In [29]:
nameRefDF = wodsDF[['Name','href']]
volsDF = wodsDF[moves]
for loc in range(len(roundsDF)):
    numRounds = roundsDF['Volume'].iloc[loc]
    if numRounds > 1:
        inds = nameRefDF.index[nameRefDF['Name'].isin(roundsDF['Workouts'].iloc[loc])]
        volsDF.iloc[inds] *= numRounds
    else:
        print('outside of block')
wodsDF = pd.concat([nameRefDF,volsDF],axis=1)

outside of block


In [30]:
newCols = []
for col in wodsDF.columns:
    tempCol = textClean(col)
    newCols.append(tempCol)
wodsDF.columns = newCols
wodsDF

Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,chin ups,second pelvic tilt (while holding baby above chest),russian twists,kettlebell russian swings,second recovery jog,bar carry,spull ups banded,100m,swings,cardio of choice
0,MURPH,https://wodwell.com/wod/murph,1600.0,100.0,200.0,300.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,DT,https://wodwell.com/wod/dt,0.0,0.0,0.0,0.0,60.0,45.0,30.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,CINDY,https://wodwell.com/wod/cindy,0.0,5.0,10.0,15.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,BEAR COMPLEX,https://wodwell.com/wod/bear-complex,0.0,0.0,0.0,0.0,0.0,1.0,0.0,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,FIGHT GONE BAD,https://wodwell.com/wod/fight-gone-bad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2599,WEIGHTS FOR WARRIORS,https://wodwell.com/wod/weights-for-warriors,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2600,BABY WHALE,https://wodwell.com/wod/baby-whale,300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2601,BARE COVE TRAVEL WOD 3,https://wodwell.com/wod/bare-cove-travel-wod-3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2602,BARE COVE TRAVEL WOD 7,https://wodwell.com/wod/bare-cove-travel-wod-7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
wodsDF[wodsDF['Name'] == '(HOME)WORK #21']

Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,chin ups,second pelvic tilt (while holding baby above chest),russian twists,kettlebell russian swings,second recovery jog,bar carry,spull ups banded,100m,swings,cardio of choice
2304,(HOME)WORK #21,https://wodwell.com/wod/(home)work-#21,0.0,0.0,804020.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
colsToRemove = ['100m','every for','by lemmon box','second silence','accumulate as fast as possible',\
                'buy out  to relax in the sun and reflect on the day','crossfit games quarterfinals test','16   etc',\
                'jack','queen','every on the for','amcap','random act of kindness','max distance in',\
                'every on the hour in','weight  x 15 10kg dbs','d0 10  right leg +   left leg','of',\
                '(break  wall climb)','every in','e2mom for','for max total weight in','then  in establish',\
                'buy in  second silence','every on the for as long as possible','team','then  seconds of silence',\
                'in teams of','e2mom x','every for as long as possible','rft','buy in  secs of silence','then',\
                '(you only have attempts and you can’t go down in weight)','','cap','amraap in','6  etc','60   etc',\
                '600   etc','20\'s etc','every x','each','from 0 00 15 00  etc','from 20 00 35 00  etc',\
                'from 40 00 55 00  etc','rft  ea  on the','for total distance in',\
                'if bodyweight is over lb  use bodyweight','into','day','then  to establish','seconds of silence',\
                'seconds clapping']
wodsDF.drop(labels=colsToRemove,axis=1,inplace=True)

In [33]:
notUnique = []
searched=[]
for col in wodsDF.columns:
    if col in searched and col not in notUnique:
        notUnique.append(col)
    else:
        searched.append(col)

In [34]:
def mergeDups(df,colName):
    numMerged = 0
    ind = 0
    for bol in df.columns.get_loc(colName):
        if bol and numMerged == 0:
            tempCol = df.iloc[:,ind]
            numMerged+=1
        elif bol:
            tempCol += df.iloc[:,ind]
            numMerged +=1
        ind += 1
    df.drop(colName,axis=1,inplace=True)
    df[colName] = tempCol

In [35]:
for col in notUnique:
    mergeDups(wodsDF,col)

In [36]:
def mergeCols(df,keep,drop):
    if keep == drop:
        return
    df[keep] += df[drop]
    df.drop(drop,axis=1,inplace=True)

In [37]:
t2b = []
for col in wodsDF.columns:
    if (re.search('.*t2b.*',col) or re.search('.*ttb.*',col) or re.search('.*toe.*to.*',col)) and\
    not re.search('.*touch.*',col):
        t2b.append(col)
t2b.append('toes through rings')
for toes in t2b:
    mergeCols(wodsDF,t2b[0],toes)

In [38]:
pullup = []
for col in wodsDF.columns:
    if re.search('.*pull.*up.*',col) and not re.search('.*burpee.*',col):
        pullup.append(col)
for pull in pullup:
    mergeCols(wodsDF,pullup[0],pull)

In [39]:
du = []
for col in wodsDF.columns:
    if col == 'du' or re.search('.*doub.*und.*',col):
        du.append(col)
for dub in du:
    mergeCols(wodsDF,du[0],dub)

In [40]:
carry = []
for col in wodsDF.columns:
    if re.search('.*carry.*',col) or re.search('.*farmer.*',col):
        carry.append(col)
for car in carry:
    mergeCols(wodsDF,'burden carry',car)

In [41]:
kb = []
for col in wodsDF.columns:
    if re.search('.*kb.*swing.*',col) or re.search('.*kettlebell.*swing.*',col):
        kb.append(col)
for bell in kb:
    mergeCols(wodsDF,kb[0],bell)

In [42]:
ohs = []
for col in wodsDF.columns:
    if re.search('.*ohs.*',col) or re.search('.*over.*head.*squat.*',col):
        ohs.append(col)
for oh in ohs:
    mergeCols(wodsDF,ohs[0],oh)

In [43]:
twist = []
for col in wodsDF.columns:
    if re.search('.*twist.*',col):
        twist.append(col)
for tw in twist:
    mergeCols(wodsDF,'russian twists',tw)

In [44]:
g2o = []
for col in wodsDF.columns:
    if re.search('.*ground.*over.*',col):
        g2o.append(col)
for g in g2o:
    mergeCols(wodsDF,g2o[0],g)

In [45]:
overShoulder = []
for col in wodsDF.columns:
    if re.search('.*over.*shoulder.*',col):
        overShoulder.append(col)
for os in overShoulder:
    mergeCols(wodsDF,'d ball over shoulders',os)

In [46]:
bridge = []
for col in wodsDF.columns:
    if re.search('.*glut.*bridge.*',col):
        bridge.append(col)
for br in bridge:
    mergeCols(wodsDF,'glute bridges',br)

In [47]:
s2o = []
for col in wodsDF.columns:
    if re.search('.*stoh.*',col) or re.search('.*shoulder.*over.*head.*',col):
        s2o.append(col)
for s in s2o:
    mergeCols(wodsDF,s2o[0],s)

In [48]:
hspu = []
for col in wodsDF.columns:
    if re.search('.*hspu.*',col) or re.search('.*hand.*stand.*push.*up.*',col):
        hspu.append(col)
for hs in hspu:
    mergeCols(wodsDF,hspu[0],hs)

In [49]:
dl = []
for col in wodsDF.columns:
    if (re.search('.*dl.*',col) or re.search('.*dead.*lift.*',col)) and not (re.search('.*candle.*',col) or re.search('.*high pull.*',col) or col == 'paddle'):
        dl.append(col)
for d in dl:
    mergeCols(wodsDF,dl[0],d)

In [50]:
mergeCols(wodsDF,'pull up','chest to bars')
mergeCols(wodsDF,'push up','pushup')
mergeCols(wodsDF,'shoulder press','strict press')
mergeCols(wodsDF,'double under','du   tc')
mergeCols(wodsDF,'double under','du (2 tc)')
mergeCols(wodsDF,'ground to overhead','gtoh')
mergeCols(wodsDF,'single under','jump ropes')
wodsDF.drop('etc',axis=1,inplace=True)

In [51]:
wodsDF[wodsDF['Name'] == '(HOME)WORK #21']

Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,flutter kicks,kb headcutters,kb taters,shoulder taps,arch ups,dumbbell floor presses,a jumps,crab walk,tuck ups,chin ups
2304,(HOME)WORK #21,https://wodwell.com/wod/(home)work-#21,0.0,0.0,804020.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
newRef = []
for href in wodsDF['href']:
    newRef.append(href.replace('\'',''))
wodsDF['href'] = newRef

In [53]:
views = []
likes = []
missedWod = []
#create the driver and store it in cache
c = webdriver.ChromeOptions()
c.add_argument("--incognito")
c.add_argument('--headless')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=c)
for href in wodsDF['href']:
    driver.get(href)
    time.sleep(2)
    try:
        views.append(driver.find_element(By.XPATH, '//*[@id="single-wod-wrapper"]/div/div[1]/div[2]/div[2]/div[1]/div[1]').text)
        likes.append(driver.find_element(By.XPATH, '//*[@id="single-wod-wrapper"]/div/div[1]/div[2]/div[2]/div[1]/div[2]').text)
    except:
        views.append(np.nan)
        likes.append(np.nan)
        missedWod.append(href)
driver.close()
print(len(missedWod))




[WDM] - Current google-chrome version is 103.0.5060
[WDM] - Get LATEST chromedriver version for 103.0.5060 google-chrome
[WDM] - Driver [/Users/jake/.wdm/drivers/chromedriver/mac64/103.0.5060.53/chromedriver] found in cache


178


In [54]:
print(len(wodsDF))

2604


In [55]:
wodsDF[wodsDF['href']=='https://wodwell.com/wod/1.-mistle-toad']

Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,flutter kicks,kb headcutters,kb taters,shoulder taps,arch ups,dumbbell floor presses,a jumps,crab walk,tuck ups,chin ups
2156,1. MISTLE-TOAD,https://wodwell.com/wod/1.-mistle-toad,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [56]:
intViews = []
for view in views:
    if isinstance(view,str):
        if ('K' in view or 'k' in view):
            tempView=view.replace('K','')
            tempView = float(tempView) * 1000
            intViews.append(tempView)
        else:
            intViews.append(float(view))
    elif pd.isna(view):
        intViews.append(view)
    else:
        intViews.append(float(view))
intLikes = []
for like in likes:
    if isinstance(like,str):
        if ('K' in like or 'k' in like):
            tempLike=like.replace('K','')
            tempLike = float(tempLike) * 1000
            intLikes.append(tempLike)
        else:
            intLikes.append(float(like))
    elif pd.isna(like):
        intLikes.append(view)
    else:
        intLikes.append(float(like))

In [57]:
wodsDF['Views'] = intViews
wodsDF['Likes'] = intLikes

In [58]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=c)
fixNANS = {}
for name in wodsDF[wodsDF['Views'].isnull()].Name:
    driver.get('https://wodwell.com/wods/?q='+name)
    time.sleep(2)
    links = driver.find_elements(By.XPATH, '//a[@class="wod-filter-item__link"]')
    for itemLink in links:
        if itemLink.find_element(By.TAG_NAME,'h1').text.upper() == name:
            fixNANS[name] = itemLink.get_attribute("href")
driver.close()




[WDM] - Current google-chrome version is 103.0.5060
[WDM] - Get LATEST chromedriver version for 103.0.5060 google-chrome
[WDM] - Driver [/Users/jake/.wdm/drivers/chromedriver/mac64/103.0.5060.53/chromedriver] found in cache


In [59]:
for name in fixNANS.keys():
    index = wodsDF.index[wodsDF['Name'] == name]
    wodsDF['href'].iloc[index] = fixNANS[name]

In [60]:
secViews = []
secLikes = []
missedWod = []
#create the driver and store it in cache
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=c)
for href in wodsDF[wodsDF['Views'].isnull()].href:
    driver.get(href)
    time.sleep(2)
    try:
        secViews.append(driver.find_element(By.XPATH, '//*[@id="single-wod-wrapper"]/div/div[1]/div[2]/div[2]/div[1]/div[1]').text)
        secLikes.append(driver.find_element(By.XPATH, '//*[@id="single-wod-wrapper"]/div/div[1]/div[2]/div[2]/div[1]/div[2]').text)
    except:
        secViews.append(np.nan)
        secLikes.append(np.nan)
        missedWod.append(href)
driver.close()
print(len(missedWod))




[WDM] - Current google-chrome version is 103.0.5060
[WDM] - Get LATEST chromedriver version for 103.0.5060 google-chrome
[WDM] - Driver [/Users/jake/.wdm/drivers/chromedriver/mac64/103.0.5060.53/chromedriver] found in cache


18


In [61]:
missedWod

['https://wodwell.com/wod/emom-100’s-(handstand-push-ups)',
 'https://wodwell.com/wod/nish',
 'https://wodwell.com/wod/#bylerstrong',
 'https://wodwell.com/wod/i’mma-tired',
 'https://wodwell.com/wod/100%',
 'https://wodwell.com/wod/80’s-child',
 'https://wodwell.com/wod/19’s-heroes',
 'https://wodwell.com/wod/andré',
 'https://wodwell.com/wod/gren’s',
 'https://wodwell.com/wod/2×2',
 'https://wodwell.com/wod/50’s-chipper',
 'https://wodwell.com/wod/light-&-heavy',
 'https://wodwell.com/wod/three-two-one',
 'https://wodwell.com/wod/func’y',
 'https://wodwell.com/wod/swim-&-bike',
 'https://wodwell.com/wod/the-repeater',
 'https://wodwell.com/wod/the-sprint',
 'https://wodwell.com/wod/finals']

In [62]:
secIntViews = []
for view in secViews:
    if isinstance(view,str):
        if ('K' in view or 'k' in view):
            tempView=view.replace('K','')
            tempView = float(tempView) * 1000
            secIntViews.append(tempView)
        else:
            secIntViews.append(float(view))
    elif pd.isna(view):
        secIntViews.append(view)
    else:
        secIntViews.append(float(view))
secIntLikes = []
for like in secLikes:
    if isinstance(like,str):
        if ('K' in like or 'k' in like):
            tempLike=like.replace('K','')
            tempLike = float(tempLike) * 1000
            secIntLikes.append(tempLike)
        else:
            secIntLikes.append(float(like))
    elif pd.isna(like):
        secIntLikes.append(view)
    else:
        secIntLikes.append(float(like))

In [63]:
i=0
for name in wodsDF[wodsDF['Views'].isnull()].Name:
    index = wodsDF.index[wodsDF['Name'] == name]
    wodsDF['Views'].iloc[index] = intViews[i]
    wodsDF['Likes'].iloc[index] = intLikes[i]
    i+=1

In [64]:
missingHrefs = ['https://wodwell.com/wod/emom-100s-handstand-push-ups/','https://wodwell.com/wod/the-john-nish-nishimura/','https://wodwell.com/wod/bylerstrong/','https://wodwell.com/wod/imma-tired/','https://wodwell.com/wod/100/','https://wodwell.com/wod/80s-child/','https://wodwell.com/wod/19s-heroes/','https://wodwell.com/wod/andre/','https://wodwell.com/wod/2x2/','https://wodwell.com/wod/50s-chipper/','https://wodwell.com/wod/dxb-finals-17-9/','https://wodwell.com/wod/321-lift/','https://wodwell.com/wod/funcy/','https://wodwell.com/wod/dxb-finals-17-3-17-4/','https://wodwell.com/wod/dxb-finals-17-8/','https://wodwell.com/wod/sab-17-3/','https://wodwell.com/wod/rcc-sanctional-19-9/']
i=0
for name in wodsDF[wodsDF['Views'].isnull()].Name:
    index = wodsDF.index[wodsDF['Name'] == name]
    wodsDF['href'].iloc[index] = missingHrefs[i]
    i+=1



In [65]:
hardcodeViews = []
hardcodeLikes = []
missedWod = []
#create the driver and store it in cache
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=c)
for href in wodsDF[wodsDF['Views'].isnull()].href:
    driver.get(href)
    time.sleep(2)
    try:
        hardcodeViews.append(driver.find_element(By.XPATH, '//*[@id="single-wod-wrapper"]/div/div[1]/div[2]/div[2]/div[1]/div[1]').text)
        hardcodeLikes.append(driver.find_element(By.XPATH, '//*[@id="single-wod-wrapper"]/div/div[1]/div[2]/div[2]/div[1]/div[2]').text)
    except:
        hardcodeViews.append(np.nan)
        hardcodeLikes.append(np.nan)
        missedWod.append(href)
driver.close()
print(len(missedWod))




[WDM] - Current google-chrome version is 103.0.5060
[WDM] - Get LATEST chromedriver version for 103.0.5060 google-chrome
[WDM] - Driver [/Users/jake/.wdm/drivers/chromedriver/mac64/103.0.5060.53/chromedriver] found in cache


0


In [66]:
fixedViews = []
for view in hardcodeViews:
    if isinstance(view,str):
        if ('K' in view or 'k' in view):
            tempView=view.replace('K','')
            tempView = float(tempView) * 1000
            fixedViews.append(tempView)
        else:
            fixedViews.append(float(view))
    elif pd.isna(view):
        fixedViews.append(view)
    else:
        fixedViews.append(float(view))
fixedLikes = []
for like in hardcodeLikes:
    if isinstance(like,str):
        if ('K' in like or 'k' in like):
            tempLike=like.replace('K','')
            tempLike = float(tempLike) * 1000
            fixedLikes.append(tempLike)
        else:
            fixedLikes.append(float(like))
    elif pd.isna(like):
        fixedLikes.append(view)
    else:
        fixedLikes.append(float(like))

In [67]:
i=0
for name in wodsDF[wodsDF['Views'].isnull()].Name:
    index = wodsDF.index[wodsDF['Name'] == name]
    wodsDF['Views'].iloc[index] = intViews[i]
    wodsDF['Likes'].iloc[index] = intLikes[i]
    i+=1

In [68]:
wodsDF[wodsDF['Views'].isnull()]

Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,kb taters,shoulder taps,arch ups,dumbbell floor presses,a jumps,crab walk,tuck ups,chin ups,Views,Likes


In [69]:
forTime = []
for loc in range(len(roundsDF)):
    for wod in roundsDF['Workouts'].iloc[loc]:
        if wod not in forTime:
            forTime.append(wod)

In [70]:
forTimeBool = []
for wod in wodsDF['Name']:
    if wod in forTime:
        forTimeBool.append(True)
    else:
        forTimeBool.append(False)

In [71]:
wodsDF['ForTime'] = forTimeBool

In [72]:
emomDF = movementDFNoNull[movementDFNoNull.Movement.str.match('.*e.*mom.*')]

In [73]:
emomDF

Unnamed: 0,Movement,Workouts,Weight,Volume
102,emom for,"[CHELSEA, HEAT WAVE, BARE COVE TRAVEL WOD 46, ...",0,30.0
120,emom for,[MIKKO’S TRIANGLE],0,39.0
251,emom for,"[BKG, ORBISON, MICHAEL KIEFER, DEATH ROW, SAFF...",0,20.0
607,emom for,"[EVIL EMOM, DENNIS O’BERG, FULL BODY #28]",0,14.0
992,emom in,[POWER CINDY],0,30.0
...,...,...,...,...
10311,emom,[DOUBLE DIP],0,10.0
10555,emom,[CURFEW SOLUTION (HOME WORKOUT)],0,30.0
10708,e2:30mom x rounds,[I WANT TO PLAY A GAME],0,10.0
10711,e2:30mom x rounds,[I WANT TO PLAY A GAME],0,6.0


In [74]:
byHand = {}
emomDict = {}
for loc in range(len(emomDF)):
    if re.search('.*emom.*',emomDF['Movement'].iloc[loc]):
        for wod in emomDF['Workouts'].iloc[loc]:
            if wod not in emomDict.keys():
                emomDict[wod] = emomDF['Volume'].iloc[loc]
    else:
        byHand[emomDF['Movement'].iloc[loc]] = emomDF['Workouts'].iloc[loc]
        for wod in emomDF['Workouts'].iloc[loc]:
            if wod not in emomDict.keys():
                emomDict[wod] = emomDF['Volume'].iloc[loc]


In [75]:
for loc in range(len(emomDF)):
    if emomDF['Movement'].iloc[loc] in byHand.keys():
        print("EMOM: "+emomDF['Movement'].iloc[loc] + 'volume: '+str(emomDF['Volume'].iloc[loc]))

EMOM: buy-in: second moment of silence volume: 65.0
EMOM: e3mom (every minutes) volume: 3.0
EMOM: e2mom for volume: 10.0
EMOM: e2mom x rounds volume: 5.0
EMOM: *e3mom single unders volume: 100.0
EMOM: e2mom x volume: 40.0
EMOM: rest seconds (moment of silence) volume: 9.0
EMOM: e2:30mom x rounds volume: 10.0
EMOM: e2:30mom x rounds volume: 6.0


In [76]:
for wod in byHand['buy-in: second moment of silence ']:
    emomDict[wod] = -1
for wod in byHand['e2mom for ']:
    emomDict[wod] = 5.0
for wod in byHand['e2mom x ']:
    emomDict[wod] = 20.0
for wod in byHand['rest seconds (moment of silence) ']:
    emomDict[wod] = -1

In [77]:
emomList = []
for loc in range(len(wodsDF)):
    wod = wodsDF['Name'].iloc[loc]
    forTime = wodsDF['ForTime'].iloc[loc]
    if not forTime and wod in emomDict.keys():
        if emomDict[wod] > 0:
            emomList.append(True)
        else:
            emomList.append(False)
    else:
        emomList.append(False)


In [78]:
wodsDF['EMOM'] = emomList

In [79]:
amrapDF = movementDFNoNull[movementDFNoNull.Movement.str.match('.*amrap.*')]
asManyRoundsDF = movementDFNoNull[movementDFNoNull.Movement.str.match('.*many.*round.*')]

In [80]:
amrapDF

Unnamed: 0,Movement,Workouts,Weight,Volume
8,amrap in,"[CINDY, JACK, NICOLE, INCREDIBLE HULK, MARY, N...",0,20.0
73,five 3-minute amraps in,"[THE CHIEF, GUADALUPE, STRONGBOW, ANTIPOLIS HO...",0,19.0
74,amrap in,"[THE CHIEF, THE END, GUADALUPE, FREEDOM SAUCE,...",0,3.0
147,amrap in,"[OPEN 22.1, BASIC B*TCH BOOTY BLASTER, ANTIPOL...",0,15.0
221,amrap (with a partner) in,"[YOU GO I GO, CUPID SHUFFLE]",0,12.0
...,...,...,...,...
11035,amrap,[SILLY PUTTY ENEMIES],0,25.0
11053,amrap in,[HACKSAW],0,75.0
11104,"in minutes, amrap of:",[211206-MAYHEM],0,4.0
11114,amrap:,[11. LET US ROW LET US ROW LET US ROW],0,15.0


In [81]:
asManyRoundsDF

Unnamed: 0,Movement,Workouts,Weight,Volume
281,as many repetitions as possible in rounds of:,[LYNNE],0,5.0
1994,"if you fall behind the clock, keep going for a...",[HALF CHELSEA],0,15.0
10637,as many rounds as possible in,[WOD2012001],0,20.0


In [82]:
amrapDict = {}
for loc in range(len(amrapDF)):
    for wod in amrapDF['Workouts'].iloc[loc]:
        if wod not in amrapDict.keys():
            amrapDict[wod] = amrapDF['Volume'].iloc[loc]


In [83]:
amrapDict['WOD2012001'] = 20.0

In [84]:
amrapList = []
for loc in range(len(wodsDF)):
    wod = wodsDF['Name'].iloc[loc]
    forTime = wodsDF['ForTime'].iloc[loc]
    emom = wodsDF['EMOM'].iloc[loc]
    if not forTime and not emom and wod in amrapDict.keys():
        amrapList.append(True)
    else:
        amrapList.append(False)


In [85]:
wodsDF['AMRAP'] = amrapList

In [86]:
for loc in range(len(wodsDF)):
    if not wodsDF['ForTime'].iloc[loc] and not wodsDF['EMOM'].iloc[loc] and not wodsDF['AMRAP'].iloc[loc]:
        wodsDF['ForTime'].iloc[loc] = True

In [87]:
wodsDF[wodsDF['Name'] == '(HOME)WORK #21']

Unnamed: 0,Name,href,run,pull up,push up,air squat,deadlift,clean,jerk,complex,...,dumbbell floor presses,a jumps,crab walk,tuck ups,chin ups,Views,Likes,ForTime,EMOM,AMRAP
2304,(HOME)WORK #21,https://wodwell.com/wod/(home)work-#21,0.0,0.0,804020.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,8400.0,72.0,True,False,False


In [88]:
nonMoveDF = wodsDF[['Name','href','Views','Likes','ForTime','EMOM','AMRAP']]
movementColumns = list(wodsDF.columns)
movementColumns = movementColumns[2:-5]
moveDF = wodsDF[movementColumns]

In [89]:
for loc in range(len(wodsDF)):
    wod = nonMoveDF['Name'].iloc[loc]
    if not nonMoveDF['ForTime'].iloc[loc]:
        if nonMoveDF['EMOM'].iloc[loc]:
            moveDF.iloc[loc] *= emomDict[wod]
        else:
            moveDF.iloc[loc] *= amrapDict[wod]//3

In [90]:
wodsDF = pd.concat([nonMoveDF,moveDF],axis=1)

In [91]:
wodsDF[wodsDF['Name'] == '(HOME)WORK #21']

Unnamed: 0,Name,href,Views,Likes,ForTime,EMOM,AMRAP,run,pull up,push up,...,flutter kicks,kb headcutters,kb taters,shoulder taps,arch ups,dumbbell floor presses,a jumps,crab walk,tuck ups,chin ups
2304,(HOME)WORK #21,https://wodwell.com/wod/(home)work-#21,8400.0,72.0,True,False,False,0.0,0.0,804020.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [92]:
wodsDF.to_csv('../data/processed/wodsDF.csv',index=False)