In [1]:
import glob
from pathlib import Path
import pandas as pd

In [2]:
files = sorted(glob.glob('seasons/*.txt'))
files

['seasons/2014.txt',
 'seasons/2015.txt',
 'seasons/2016.txt',
 'seasons/2017.txt',
 'seasons/2018.txt',
 'seasons/2019.txt',
 'seasons/2020.txt',
 'seasons/2021.txt',
 'seasons/2022.txt']

In [3]:
def process_data(filename):
    data = []
    with open(filename, 'r') as file_in:
        for line in file_in.readlines():
            split_line = line.strip().split()
            if split_line[1] == 'logo':
                split_line.remove('logo')
            data.append(split_line)

    processed_data = []
    for i, line in enumerate(data):
        if i == 0:
            new_line = line[0:2] + [line[2]] + [' '.join(line[3:5])] + [' '.join(line[5:7])] + line[-3:]
            processed_data.append(new_line)
        else:
            stop = 0
            for j, item in enumerate(line):
                if len(item.split('-')) > 2:
                    stop = j
            new_line = [line[0]] + [' '.join(line[1:stop])] + line[stop:]
            processed_data.append(new_line)

    processed_data[0].append('Clinched Playoffs')

    final_data = []
    for i, line in enumerate(processed_data):
        if '*' in line[0]:
            line[0] = line[0].replace('*', '')
            line.append('Y')
        elif i > 0:
            line.append('N')
        final_data.append(line)
    
    df = pd.DataFrame(final_data[1:], columns=final_data[0])

    df['Year'] = int(Path(filename).stem)
    df[['W','L', 'T']] = df['W-L-T'].str.split('-', expand=True)
    df['Moves'] = df['Moves'].replace('-', 0)

    convert_to_int = ['Rank', 'W', 'L', 'T', 'Waiver', 'Moves']
    for i in convert_to_int:
        df[i] = df[i].astype(int)
    
    convert_to_float = ['Pts For', 'Pts Agnst']
    for i in convert_to_float:
        df[i] = df[i].astype(float)

    df.drop(['W-L-T'], axis=1, inplace=True)

    return df

frames = []
for filename in files:
    df = process_data(filename)
    frames.append(df)

df = pd.concat(frames)
df = df.sort_values(by=['Year', 'Rank'], ascending=[False, True])
df

Unnamed: 0,Rank,Team,Pts For,Pts Agnst,Streak,Waiver,Moves,Clinched Playoffs,Year,W,L,T
0,1,Officer Friendly,2030.34,1678.48,W-1,9,17,Y,2022,11,3,0
1,2,Highlands Hooligan,1830.16,1650.04,W-2,10,19,Y,2022,10,4,0
2,3,Your Coach,1848.00,1708.72,L-3,4,18,Y,2022,7,7,0
3,4,Natural Athlete,1829.92,1800.84,W-2,7,25,Y,2022,8,6,0
4,5,Head Coach,1767.10,1704.20,L-1,3,41,Y,2022,8,6,0
...,...,...,...,...,...,...,...,...,...,...,...,...
5,6,WE DEM BOYZ,1521.54,1590.54,L-4,5,25,Y,2014,6,7,0
6,7,Ashley's Pouncey,1604.58,1684.96,L-2,1,11,N,2014,5,8,0
7,8,More Than You,1478.38,1749.38,L-7,2,12,N,2014,2,11,0
8,9,Can I Get Some Reps?,1508.00,1649.48,W-3,3,16,N,2014,4,9,0


In [4]:
names = pd.read_excel('team_names.xlsx')
names = names.sort_values(['Year', 'Manager'], ascending=[False, True])
names


Unnamed: 0,Team,Manager,Year
89,Waiver Wire Weiners,AK,2022
85,Natural Athlete,Burg,2022
86,Head Coach,Ciggy,2022
87,Trash sandwich,Jake,2022
84,Your Coach,Marsh,2022
...,...,...,...
37,More Than You,Marsh,2014
45,Officer Friendly,Masi,2014
53,Shine Squad,Moni,2014
66,mollywhoppin,Stew,2014


In [5]:
df = df.merge(names, on=['Team', 'Year'])
df = df[['Year', 'Rank', 'Team', 'Manager', 'W', 'L', 'T', 'Clinched Playoffs', 'Pts For', 'Pts Agnst', 'Waiver', 'Moves', 'Streak']]
df = df.rename(columns={'Year': 'YEAR',
 'Rank': 'RANK',
 'Team': 'TEAM',
 'Manager': 'MANAGER',
 'W': 'W',
 'L': 'L',
 'T': 'T',
 'Clinched Playoffs': 'CLINCHED_PLAYOFFS',
 'Pts For': 'PTS_FOR',
 'Pts Agnst': 'PTS_AGNST',
 'Waiver': 'WAIVER',
 'Moves': 'MOVES',
 'Streak': 'STREAK'})

In [6]:
writer = pd.ExcelWriter('data.xlsx', engine='openpyxl')
df.to_excel(writer, index=False)
writer.close()

df.to_json('../src/data/historical_data.json', orient='records', indent=2)