# START

In [5]:
import pickle
from collections import Counter
from datetime import time, datetime

from tqdm import tqdm 

import attr
import pandas as pd

from pprint import pprint

import wrestling
from wrestling.base import Mark

This notebook is for exploring the data stored in the pickle files to uncover trends needed in parsing.

## Events Section

In [None]:
# exploring events
with open('all_events.p', 'rb') as f:
    events = pickle.load(f) 
print(len(events))

In [None]:
ALLEVENTS = []
for event in events:
    e = wrestling.Event(
        name=event['name'],
        kind=Mark(event['kind'])
    )
    ALLEVENTS.append(e)
    
ALLEVENTS[0]

In [None]:
ALLEVENTS[0].to_dict()

In [None]:
Counter([e.kind for e in ALLEVENTS]).most_common()

In [None]:
del events

### Events finished!  :)

## Roster Section

In [None]:
with open('all_rosters.p', 'rb') as f:
    rosters = pickle.load(f)
print(len(rosters))

In [None]:
rosters[0]

In [None]:
ALLWRESTLERS = []
for wrestler in rosters:
    w = wrestling.Wrestler(
        name=wrestler['name'],
        team='PLACEHOLDER', 
        grade=Mark(wrestler['grade'])
    )
    ALLWRESTLERS.append(w)
    
ALLWRESTLERS[0]

In [None]:
ALLWRESTLERS[0].to_dict()

In [None]:
Counter([w.grade for w in ALLWRESTLERS if w._grade.isvalid]).most_common()

In [None]:
del rosters

### Rosters finished! :)

## Matches Section

In [6]:
with open('all_matches.p', 'rb') as f:
    matches = pickle.load(f)
print(len(matches))

815758


In [9]:
Counter([m['result'].split()[0] for m in matches])

Counter({'Decision': 214926,
         'Tech': 25422,
         'Maj': 57025,
         'Fall': 430649,
         'Forfeit': 73630,
         'Default': 10482,
         'Double': 2635,
         'Disq': 810,
         'Dec': 170,
         'Decision:': 5,
         'default': 1,
         'forfeit': 2,
         'DEFAULT': 1})

In [None]:
Counter([m['level'] for m in matches]).most_common()

In [17]:
matches[1000]

{'id': '1567603412:2019-2020:0:1576851265.755613:1567603412.1838334878321:2019-12-20 14:16:56 +0000',
 'date': '2019-12-20',
 'weight': '132',
 'scoring_events': '1*11*0*T2*Green#2*0*0*R2*Red#2*30*0*P1*Red#2*50*0*N2*Red#5*17*0*R2*Green#5*17*0*P1*Green#5*48*0*P1*Red#6*9*0*E1*Red',
 'winner': 1,
 'result': 'Decision 7-5',
 'red_wrestler': '1567603412.1838334878321#Ivy, Tre#(null)#Ada#(null)',
 'green_wrestler': '(null)#Doe, John#(null)#Niceville#(null)#',
 'level': 'High School Boys'}

### TimeSeries first

In [3]:
# proprietary function
def parse_api_scoring_events(event_string):
    main_list = []
    for action in event_string.split('#'):
        if len(action.split('*')) == 1 or action == '****':
            continue
        A = action.split('*')
        if not A[0].isdigit() or int(A[0]) not in range(0, 60):
            minute = 0
        else:
            minute = int(A[0])
        if not A[1].isdigit() or int(A[1]) not in range(0, 60):
            second = 0
        else:
            second = int(A[1])
        period = int(A[2]) + 1 if A[2] else 0
        label = A[3]
        color = A[4].lower() if A[4] in {'Red', 'Green'} else 'green'
        main_list.append((minute, second, period, label, color))
    return tuple(main_list)

In [None]:
parse_api_scoring_events(matches[1000]['scoring_events'])

In [6]:
MASTER = []
for match in matches:
    for y in match['scoring_events'].split('#'):
        MASTER.append(y.split('*'))
len(MASTER)

4823676

In [7]:
MASTER[801]

['5', '52', '2', 'E1', 'Red']

In [9]:
Counter(x[2] for x in MASTER if len(x) == 5).most_common()

[('0', 1937167),
 ('1', 1762223),
 ('2', 1072160),
 ('3', 10782),
 ('4', 5957),
 ('5', 4689),
 ('6', 1321),
 ('', 514),
 ('7', 123),
 ('8', 97),
 ('9', 5),
 ('10', 2),
 ('11', 2)]

In [12]:
ALLTIMESERIES = []

for match in tqdm(matches):
    ts_gen = parse_api_scoring_events(match['scoring_events'])
    for action in ts_gen:
        if match['level'].startswith('High School'):
            # todo: compare to ourWrestlerID instead of 'isdigit'
            if match['red_wrestler'][0].isdigit():
                focus = 'red'
            elif match['green_wrestler'][0].isdigit():
                focus = 'green'
            else:
                continue
            sc = wrestling.HSScoring(
                initiator=action[4],
                focus_color=focus,
                period=action[2],
                time_stamp=time(minute=action[0], second=action[1]),
                label=wrestling.base.HSLabel(action[3]),
            )

        elif match['level'].startswith('College'):
            if match['red_wrestler'][0].isdigit():
                focus = 'red'
            elif match['green_wrestler'][0].isdigit():
                focus = 'green'
            else:
                continue
            sc = wrestling.CollegeScoring(
                initiator=action[4],
                focus_color=focus,
                period=action[2],
                time_stamp=time(minute=action[0], second=action[1]),
                label=wrestling.base.CollegeLabel(action[3]),
            )
        ALLTIMESERIES.append(sc)

ALLTIMESERIES[0]

100%|██████████| 815758/815758 [00:59<00:00, 13802.93it/s]


In [14]:
ALLTIMESERIES[0].to_dict()

{'time': '00:20',
 'period': 1,
 'str_label': 'fT2',
 'label': HSLabel(tag='T2', isvalid=True)}

In [15]:
del ALLTIMESERIESSERIES

### Scoring Events done!
Use above code and just add in match creation!

### Matches Section combining everything together

In [4]:
ALLMATCHES = []

for match in tqdm(matches):
    if match['level'].startswith('High School'):
        # todo: compare to ourWrestlerID instead of 'isdigit'
        if match['red_wrestler'][0].isdigit():   # can modularize this
            focus = 'red'
        elif match['green_wrestler'][0].isdigit():
            focus = 'green'
        else:
            continue
        # create ts
        ts_gen = parse_api_scoring_events(match['scoring_events'])  # modularize
        ts_list = []
        for action in ts_gen:
            sc = wrestling.HSScoring(
                initiator=action[4],
                focus_color=focus,
                period=action[2],
                time_stamp=time(minute=action[0], second=action[1]),
                label=wrestling.base.HSLabel(action[3]),
            )
            ts_list.append(sc)
        TS = tuple(sorted(ts_list))
        m = wrestling.HSMatch(
            id=match['id'],
            event=wrestling.Event(
                name='Testing',
                kind=wrestling.base.Mark('Tournament'),
            ),
            date=datetime.now(),  # need date parser here
            result=wrestling.base.Result.WD,  # need result parser here
            focus=wrestling.Wrestler(  # need to link to established roster here
                name='Anthony, Nick',
                team='Eagles',
                grade=wrestling.base.Mark('RS Sr.')
            ),
            opponent=wrestling.Wrestler(
                name='Duong, Michelle',
                team='Hawks',
                grade=wrestling.base.Mark('RS Sr')
            ),
            weight=wrestling.base.Mark(match['weight']),
            time_series=TS,  # can modularize all the work we did for this field
        )
        ALLMATCHES.append(m)
    elif match['level'].startswith('College'):
        if match['red_wrestler'][0].isdigit():
            focus = 'red'
        elif match['green_wrestler'][0].isdigit():
            focus = 'green'
        else:
            continue
        ts_gen = parse_api_scoring_events(match['scoring_events'])
        ts_list = []
        for action in ts_gen:
            sc = wrestling.CollegeScoring(
                initiator=action[4],
                focus_color=focus,
                period=action[2],
                time_stamp=time(minute=action[0], second=action[1]),
                label=wrestling.base.CollegeLabel(action[3]),
            )
            ts_list.append(sc)
        TS = tuple(sorted(ts_list))
        m = wrestling.CollegeMatch(
            id=match['id'],
            event=wrestling.Event(
                name='Testing',
                kind=wrestling.base.Mark('Dual Meet'),
            ),
            date=datetime.now(),
            result=wrestling.base.Result.LT,
            focus=wrestling.Wrestler(
                name='Vicic, Kingsley',
                team='Eagles',
                grade=wrestling.base.Mark('RS Sr.')
            ),
            opponent=wrestling.Wrestler(
                name='Ironman, Timothy',
                team='Hawks',
                grade=wrestling.base.Mark('RS Sr')
            ),
            weight=wrestling.base.Mark(match['weight']),
            time_series=TS,
        )
        ALLMATCHES.append(m)
        
ALLMATCHES[1000]

100%|██████████| 815758/815758 [01:39<00:00, 8158.05it/s] 


In [9]:
ALLMATCHES[1000].event._kind.isvalid

True

In [11]:
ALLMATCHES[1000].to_dict()

AttributeError: 'HSMatch' object has no attribute 'date_'

In [12]:
ALLMATCHES[1000].to_dict(ts_only=True)

({'time': '00:52',
  'period': 1,
  'str_label': 'fT2',
  'label': HSLabel(tag='T2', isvalid=True),
  'focus_name': 'Anthony, Nick',
  'opp_name': 'Duong, Michelle',
  'event_name': 'Testing'},
 {'time': '01:28',
  'period': 1,
  'str_label': 'oE1',
  'label': HSLabel(tag='E1', isvalid=True),
  'focus_name': 'Anthony, Nick',
  'opp_name': 'Duong, Michelle',
  'event_name': 'Testing'},
 {'time': '01:44',
  'period': 1,
  'str_label': 'fT2',
  'label': HSLabel(tag='T2', isvalid=True),
  'focus_name': 'Anthony, Nick',
  'opp_name': 'Duong, Michelle',
  'event_name': 'Testing'},
 {'time': '01:45',
  'period': 1,
  'str_label': 'fFall',
  'label': HSLabel(tag='Fall', isvalid=True),
  'focus_name': 'Anthony, Nick',
  'opp_name': 'Duong, Michelle',
  'event_name': 'Testing'})

In [14]:
ALLMATCHES[1000].to_dict(results_only=True)

{'binary': 'Win', 'method': 'Dec'}

In [15]:
del matches

### Matches section done!

Now we need to modularize these functions and assemble them in an order inside the Django project