In [1]:
import os
import re
import glob
import json
from pathlib import Path
import pandas as pd

def convert_to_list_of_lists(input_list, chunk_size):
    result = [input_list[i:i + chunk_size]
                for i in range(0, len(input_list), chunk_size)] 
    return result

def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)

# BEFORE RUNNING, DO THE FOLLOWING:
- Make sure to adjust headers in standings.txt
- Make sure 'vs Bye' appears on separate lines. Usually happens
  for the week where the first round of playoffs occurs.
- Delete special characters for medals in standings.txt

In [2]:
df_managers = pd.read_excel('team_names.xlsx', sheet_name='Names')
df_managers

Unnamed: 0,team,manager,year
0,AK shine squad,AK,2014
1,Ashley's Pouncey,Burg,2014
2,Down Set Smut,Ciggy,2014
3,Can I Get Some Reps?,Het,2014
4,WE DEM BOYZ,Kyle,2014
...,...,...,...
107,Sergeant Friendly,Masi,2024
108,AngelLikeMoniBoloni,Moni,2024
109,Auto draft,Shoey,2024
110,Mollywhoppers,Stew,2024


# Weekly stats

In [3]:
def parse_weekly_matchups(filename):
    print(filename)

    year = Path(filename).parent.name
    week = os.path.splitext(os.path.basename(filename))[0][4:]

    with open(filename) as file_in:
        content = file_in.read()

        content = content.replace('\t', '')
        content = content.split('\n')

        # Remove headers if they exist
        try:
            content.pop(content.index('Championship Bracket'))
        except ValueError:
            pass
    
        try:
            content.pop(content.index('Consolation Bracket'))
        except ValueError:
            pass
    
    # Check for bye weeks and add dummy data so convert_to_list_of_lists
    # does not need to be modified.
    byes = [i for i, c in enumerate(content) if 'Bye' in c]
    
    if byes:
        byes.reverse()
        for index in byes:
            for i in range(1, 6):
                content.insert(index+i, 'DUMMY')

    # Stuff to do for the playoff weeks
    if content[0] == '':
        content.pop(0)

    lists = convert_to_list_of_lists(content, chunk_size=13)

    weekly_stats = []
    for l in lists:
        new_list = [l[0]] + l[4:9] + [l[11]]

        team1 = new_list[:3]
        team1.insert(0, week)
        team1.insert(0, year)
        weekly_stats.append(team1)

        team2 = new_list[4:]
        if 'Bye' not in team2:
            team2.insert(0, team2.pop())
            team2.insert(0, week)
            team2.insert(0, year)
            weekly_stats.append(team2)

    return weekly_stats


weekly_files = natural_sort(glob.glob('./seasons/*/week*.txt'))

scores = []
for f in weekly_files: 
    week = int(os.path.splitext(os.path.basename(f))[0][4:])
    weekly_score = parse_weekly_matchups(f)
    for row in weekly_score:
        scores.append(row)


df = pd.DataFrame(scores, columns=['year', 'week', 'team', 'points', 'estimated points'])
df['year'] = df['year'].astype(int)
df['week'] = df['week'].astype(int)
df['points'] = df['points'].astype(float)
df['estimated points'] = df['estimated points'].astype(float)

# merge owners
df_weekly = df.merge(df_managers, on=['team', 'year'])
df_weekly = df_weekly.sort_values(['manager', 'year', 'week'], ascending=[True, True, True], ignore_index=True)
df_weekly

./seasons/2014/week1.txt
./seasons/2014/week2.txt
./seasons/2014/week3.txt
./seasons/2014/week4.txt
./seasons/2014/week5.txt
./seasons/2014/week6.txt
./seasons/2014/week7.txt
./seasons/2014/week8.txt
./seasons/2014/week9.txt
./seasons/2014/week10.txt
./seasons/2014/week11.txt
./seasons/2014/week12.txt
./seasons/2014/week13.txt
./seasons/2014/week14.txt
./seasons/2014/week15.txt
./seasons/2014/week16.txt
./seasons/2015/week1.txt
./seasons/2015/week2.txt
./seasons/2015/week3.txt
./seasons/2015/week4.txt
./seasons/2015/week5.txt
./seasons/2015/week6.txt
./seasons/2015/week7.txt
./seasons/2015/week8.txt
./seasons/2015/week9.txt
./seasons/2015/week10.txt
./seasons/2015/week11.txt
./seasons/2015/week12.txt
./seasons/2015/week13.txt
./seasons/2015/week14.txt
./seasons/2015/week15.txt
./seasons/2015/week16.txt
./seasons/2016/week1.txt
./seasons/2016/week2.txt
./seasons/2016/week3.txt
./seasons/2016/week4.txt
./seasons/2016/week5.txt
./seasons/2016/week6.txt
./seasons/2016/week7.txt
./seasons/2

Unnamed: 0,year,week,team,points,estimated points,manager
0,2015,1,BALLZ DEEP,128.94,103.77,?
1,2015,2,BALLZ DEEP,112.54,107.53,?
2,2015,3,BALLZ DEEP,91.90,98.70,?
3,2015,4,BALLZ DEEP,89.68,110.94,?
4,2015,5,BALLZ DEEP,116.82,92.21,?
...,...,...,...,...,...,...
1803,2024,13,Shmo Stars,111.02,123.88,Zach
1804,2024,14,Shmo Stars,117.42,118.81,Zach
1805,2024,15,Shmo Stars,134.00,124.06,Zach
1806,2024,16,Shmo Stars,148.04,126.63,Zach


In [4]:
def process_standings(filename):
    print(filename)

    year = Path(filename).parent.name

    data = []
    with open(filename, 'r') as file_in:
        for line in file_in.readlines():
            split_line = line.strip().split()
            if split_line[1] == 'logo':
                split_line.remove('logo')
            data.append(split_line)

    processed_data = []
    for i, line in enumerate(data):
        if i == 0:
            new_line = line[0:2] + [line[2]] + [' '.join(line[3:5])] + [' '.join(line[5:7])] + line[-3:]
            processed_data.append(new_line)
        else:
            stop = 0
            for j, item in enumerate(line):
                if len(item.split('-')) > 2:
                    stop = j
            new_line = [line[0]] + [' '.join(line[1:stop])] + line[stop:]
            processed_data.append(new_line)

    processed_data[0].append('Clinched Playoffs')

    final_data = []
    for i, line in enumerate(processed_data):
        if '*' in line[0]:
            line[0] = line[0].replace('*', '')
            line.append('Y')
        elif i > 0:
            line.append('N')
        final_data.append(line)
    
    df = pd.DataFrame(final_data[1:], columns=final_data[0])

    df['Year'] = year
    df[['W','L', 'T']] = df['W-L-T'].str.split('-', expand=True)
    df['Moves'] = df['Moves'].replace('-', 0)

    convert_to_int = ['Rank', 'W', 'L', 'T', 'Waiver', 'Moves']
    for i in convert_to_int:
        df[i] = df[i].astype(int)
    
    convert_to_float = ['Pts For', 'Pts Agnst']
    for i in convert_to_float:
        df[i] = df[i].astype(float)

    df.drop(['W-L-T'], axis=1, inplace=True)

    df = df.rename(columns={
        'Rank': 'rank',
        'Team': 'team',
        'Pts For': 'points for',
        'Pts Agnst': 'points against',
        'Streak': 'streak',
        'Waiver': 'waiver',
        'Moves': 'moves',
        'Clinched Playoffs': 'clinched playoffs',
        'Year': 'year',
        'W': 'wins',
        'L': 'losses',
        'T': 'ties'
    })

    df = df[[
        'year', 
        'rank', 
        'team', 
        'wins', 
        'losses', 
        'ties', 
        'points for', 
        'points against', 
        'clinched playoffs', 
        # 'streak',
        'waiver', 
        'moves'
        ]]
    
    df['year'] = df['year'].astype(int)

    return df

standings_files = natural_sort(glob.glob('./seasons/*/standings.txt'))

frames = [process_standings(filename) for filename in standings_files]

df_standings = pd.concat(frames)
df_standings = df_standings.merge(df_managers, on=['team', 'year'])
df_standings = df_standings.sort_values(by=['year', 'rank'], ascending=[True, True])
df_standings

./seasons/2014/standings.txt
./seasons/2015/standings.txt
./seasons/2016/standings.txt
./seasons/2017/standings.txt
./seasons/2018/standings.txt
./seasons/2019/standings.txt
./seasons/2020/standings.txt
./seasons/2021/standings.txt
./seasons/2022/standings.txt
./seasons/2023/standings.txt
./seasons/2024/standings.txt


Unnamed: 0,year,rank,team,wins,losses,ties,points for,points against,clinched playoffs,waiver,moves,manager
0,2014,1,Shmo Stars,9,4,0,1739.14,1628.26,Y,9,20,Zach
1,2014,2,Officer Friendly,8,5,0,1726.60,1618.60,Y,10,17,Masi
2,2014,3,Down Set Smut,10,3,0,1870.04,1556.32,Y,4,12,Ciggy
3,2014,4,mollywhoppin,9,4,0,1854.68,1666.06,Y,8,17,Stew
4,2014,5,AK shine squad,7,6,0,1597.66,1612.82,Y,7,22,AK
...,...,...,...,...,...,...,...,...,...,...,...,...
107,2024,6,Natural Athlete,8,6,0,1677.28,1756.82,Y,4,27,Burg
108,2024,7,Head Coach,6,8,0,1709.26,1639.86,N,3,32,Ciggy
109,2024,8,Mollywhoppers,6,8,0,1757.16,1783.24,N,8,29,Stew
110,2024,9,Trash sandwich,6,8,0,1690.86,1731.92,N,1,11,Jake


# Statistics

## Number of seasons in the league

In [5]:
df_seasons_played = df_standings['manager'].value_counts().to_frame(name='total seasons').reset_index()
df_seasons_played

Unnamed: 0,manager,total seasons
0,Zach,11
1,Masi,11
2,Ciggy,11
3,Stew,11
4,AK,11
5,Burg,11
6,Marsh,11
7,Moni,11
8,Jake,7
9,Shoey,7


## Average rank

In [6]:
df_average_rank = df_standings.groupby('manager')['rank'].mean().to_frame().reset_index()
df_average_rank['rank'] = round(df_average_rank['rank'], 2)
df_average_rank

Unnamed: 0,manager,rank
0,?,12.0
1,AK,5.27
2,Burg,5.82
3,Ciggy,7.36
4,Dane,10.0
5,Het,9.25
6,Jake,5.43
7,Kyle,4.0
8,Marsh,4.36
9,Masi,2.91


## Most top 3 finishes

In [7]:
df_top_three = df_standings[df_standings['rank'] <= 3]
df_top_three = df_top_three.groupby(['manager', 'rank']).size().unstack(fill_value=0).reset_index()
df_top_three = df_top_three.rename(columns={1: 'first', 2: 'second', 3: 'third'})
df_top_three['total'] = df_top_three['first'] + df_top_three['second'] + df_top_three['third']
df_top_three = df_top_three.merge(df_seasons_played, on='manager')
df_top_three['profit'] = (df_top_three['first'] * 600 + df_top_three['second']
                          * 300 + df_top_three['third'] * 100) - (df_top_three['total seasons'] * 100)
df_top_three

Unnamed: 0,manager,first,second,third,total,total seasons,profit
0,AK,0,4,0,4,11,100
1,Burg,1,0,2,3,11,-300
2,Ciggy,0,0,1,1,11,-1000
3,Jake,2,0,0,2,7,500
4,Kyle,0,1,0,1,3,0
5,Marsh,2,0,3,5,11,400
6,Masi,3,3,2,8,11,1800
7,Moni,2,1,0,3,11,400
8,Shoey,0,1,2,3,7,-200
9,Stew,0,0,1,1,11,-1000


## Most playoff appearances

In [8]:
df_clinched_playoffs = df_standings[df_standings['clinched playoffs'] == 'Y']
df_clinched_playoffs = df_clinched_playoffs.groupby('manager').size().reset_index()
df_clinched_playoffs = df_clinched_playoffs.rename(columns={0: 'count'})
df_clinched_playoffs = df_clinched_playoffs.merge(df_seasons_played, on='manager')
df_clinched_playoffs['percentage'] = round(df_clinched_playoffs['count'] / df_clinched_playoffs['total seasons'] * 100, 1)
df_clinched_playoffs

Unnamed: 0,manager,count,total seasons,percentage
0,AK,7,11,63.6
1,Burg,7,11,63.6
2,Ciggy,3,11,27.3
3,Jake,4,7,57.1
4,Kyle,3,3,100.0
5,Marsh,9,11,81.8
6,Masi,10,11,90.9
7,Moni,7,11,63.6
8,Shoey,5,7,71.4
9,Stew,5,11,45.5


## Highest points for season total

In [9]:
df_pts_total_season_high = df_standings.loc[df_standings.groupby('manager')['points for'].idxmax()]
df_pts_total_season_high = df_pts_total_season_high[['year', 'rank', 'manager', 'points for']]
df_pts_total_season_high

Unnamed: 0,year,rank,manager,points for
21,2015,12,?,1450.74
93,2023,2,AK,1901.92
85,2022,4,Burg,1829.92
2,2014,3,Ciggy,1870.04
31,2016,10,Dane,429.84
20,2015,11,Het,1602.22
52,2019,1,Jake,1720.46
13,2015,4,Kyle,1563.28
44,2018,3,Marsh,1962.32
82,2022,1,Masi,2030.34


## Highest weekly score

In [10]:
df_weekly_high_score = df_weekly.loc[df_weekly.groupby('manager')['points'].idxmax()]
df_weekly_high_score

Unnamed: 0,year,week,team,points,estimated points,manager
5,2015,6,BALLZ DEEP,150.14,107.14,?
76,2017,15,Handy Man Kanaan,188.0,130.75,AK
301,2020,16,CeeDeezNutsAsh,184.62,136.57,Burg
382,2014,14,Down Set Smut,198.2,146.36,Ciggy
548,2016,1,Arken rens,134.38,130.42,Dane
580,2015,1,Chew...PILL!,177.94,128.24,Het
657,2019,15,My team,192.02,129.67,Jake
760,2015,5,kyle's Team,161.34,103.92,Kyle
860,2018,11,The Environmentalist,204.22,146.48,Marsh
1013,2016,16,Officer Friendly,208.48,132.15,Masi


## Lowest weekly score

In [11]:
df_weekly_low_score = df_weekly.loc[df_weekly.groupby('manager')['points'].idxmin()]
df_weekly_low_score

Unnamed: 0,year,week,team,points,estimated points,manager
3,2015,4,BALLZ DEEP,89.68,110.94,?
146,2022,5,Waiver Wire Wieners,63.04,122.46,AK
317,2021,16,Burg’sHugeDitkaInAsh,49.14,97.45,Burg
400,2015,16,Down Set Smut,54.74,91.13,Ciggy
552,2016,5,Arken rens,0.0,0.0,Dane
608,2016,14,SWOLE Team Six,67.4,121.8,Het
711,2023,5,Trash sandwich,61.22,105.13,Jake
752,2014,12,WE DEM BOYZ,62.5,101.29,Kyle
921,2022,7,Your Coach,70.9,128.99,Marsh
971,2014,6,Officer Friendly,68.88,134.19,Masi


# Save files

In [12]:
final = {}

final['weekly'] = df_weekly.to_dict(orient='records')
final['standings'] = df_standings.to_dict(orient='records')

final['statistics'] = {
    'average_rank': df_average_rank.to_dict(orient='records'),
    'top_three_finishes': df_top_three.to_dict(orient='records'),
    'playoff_appearances': df_clinched_playoffs.to_dict(orient='records'),
    'highest_season_total': df_pts_total_season_high.to_dict(orient='records'),
    'highest_weekly_total': df_weekly_high_score.to_dict(orient='records'),
    'lowest_weekly_total': df_weekly_low_score.to_dict(orient='records')
}

with open("../src/data/data.json", "w") as fp:
    json.dump(final, fp, indent=2)

final

{'weekly': [{'year': 2015,
   'week': 1,
   'team': 'BALLZ DEEP',
   'points': 128.94,
   'estimated points': 103.77,
   'manager': '?'},
  {'year': 2015,
   'week': 2,
   'team': 'BALLZ DEEP',
   'points': 112.54,
   'estimated points': 107.53,
   'manager': '?'},
  {'year': 2015,
   'week': 3,
   'team': 'BALLZ DEEP',
   'points': 91.9,
   'estimated points': 98.7,
   'manager': '?'},
  {'year': 2015,
   'week': 4,
   'team': 'BALLZ DEEP',
   'points': 89.68,
   'estimated points': 110.94,
   'manager': '?'},
  {'year': 2015,
   'week': 5,
   'team': 'BALLZ DEEP',
   'points': 116.82,
   'estimated points': 92.21,
   'manager': '?'},
  {'year': 2015,
   'week': 6,
   'team': 'BALLZ DEEP',
   'points': 150.14,
   'estimated points': 107.14,
   'manager': '?'},
  {'year': 2015,
   'week': 7,
   'team': 'BALLZ DEEP',
   'points': 110.4,
   'estimated points': 117.58,
   'manager': '?'},
  {'year': 2015,
   'week': 8,
   'team': 'BALLZ DEEP',
   'points': 101.2,
   'estimated points': 10