## Load in data and remove extra columns ##

In [1]:
import pandas as pd

from datetime import datetime

df = pd.read_csv("WSoccerReadiness.csv")
df = df[['Date', 'Last', 'First', 'Score', 'Sleep Hours']]
df.head()

Unnamed: 0,Date,Last,First,Score,Sleep Hours
0,2023-02-20,Allen,Emmie,95.75,8.0
1,2023-02-20,Cheesman,Sydney,70.4,8.0
2,2023-02-20,Colton,Emily,86.25,9.0
3,2023-02-20,Dahlien,Maddie,70.37,9.0
4,2023-02-20,Dellaperuta,Talia,94.0,8.0


## Add rows with week averages ##

In [2]:
dates = df['Date'].unique()
weeks = []
week = []

# Separate days into weeks (Mon-Sun)
for date in dates:
    clean_date = datetime.strptime(date, "%Y-%m-%d")
    # Start of first full week
    if clean_date.weekday() == 6:
        week = []
        week.append(date)
    # Add day to first full week
    if len(week) != 0 and clean_date.weekday() != 6:
        week.append(date)
    # Add week to weeks on last day of full week
    if len(week) != 0 and clean_date.weekday() == 0:
        weeks.append(week)

# Add row with team averages for each full week
for week in weeks:
    score_mean = df.loc[df['Date'].isin(week), 'Score'].mean()
    sleep_mean = df.loc[df['Date'].isin(week), 'Sleep Hours'].mean()
    row = ({
        'Date': f"Week of {week[-1][5:10]} to {week[0][5:10]}",
        'Last': 'Team',
        'First': 'Average',
        'Score': score_mean,
        'Sleep Hours': sleep_mean
    })
    idx = df.loc[df['Date'] == week[0]].index.array[0]
    df2 = pd.DataFrame(row, index=[idx])
    df = pd.concat([df, df2]).sort_index().reset_index(drop=True)
df.head(50)
    



Unnamed: 0,Date,Last,First,Score,Sleep Hours
0,2023-02-20,Allen,Emmie,95.75,8.0
1,2023-02-20,Cheesman,Sydney,70.4,8.0
2,2023-02-20,Colton,Emily,86.25,9.0
3,2023-02-20,Dahlien,Maddie,70.37,9.0
4,2023-02-20,Dellaperuta,Talia,94.0,8.0
5,2023-02-20,Dellaperuta,Tori,61.34,10.0
6,2023-02-20,Dellarose,Tessa,89.0,8.5
7,2023-02-20,Elgin,Emerson,88.75,9.0
8,2023-02-20,Faasse,Kate,60.35,7.0
9,2023-02-20,Hayes,Kai,71.52,9.0


## Add conditional formatting and export to excel ##

In [3]:
# Date conditional formatting
def date(df):
    highlight = ''
    if int(df['Date'][-1]) % 2 == 0:
        highlight = 'background-color: #D59CFF'
    else:
        highlight = 'background-color: #EFDAFF'
    
    if df['Date'][0] == 'W':
        highlight = 'background-color: #7BAFD4'
    return [highlight]

# Score conditional formatting
def score(df):
    highlight = ''
    if df['Score'] < 60:
        highlight = 'background-color: red'
    elif df['Score'] >= 60 and df['Score'] < 70:
        highlight = 'background-color: orange'
    elif df['Score'] >= 70 and df['Score'] < 85:
        highlight = 'background-color: yellow'
    else:
        highlight =  'background-color: #15DE00'
    return [highlight]

# Sleep conditional formatting
def sleep(df):
    highlight = ''
    if df['Sleep Hours'] < 6:
        highlight = 'background-color: red'
    elif df['Sleep Hours'] >= 6 and df['Sleep Hours'] < 8:
        highlight = 'background-color: yellow'
    else:
        highlight =  'background-color: #15DE00'
    return [highlight]

new_df = (df
 .style
 .apply(date, subset=['Date'], axis=1)
 .apply(score, subset=['Score'], axis=1)
 .apply(sleep, subset=['Sleep Hours'], axis=1)
)

new_df.to_excel('WSoccer_Readiness.xlsx', index=False)

In [4]:
%load_ext watermark
%watermark -v -m -p pandas,numpy,seaborn,matplotlib,openpyxl

Python implementation: CPython
Python version       : 3.9.6
IPython version      : 7.28.0

pandas    : 1.3.4
numpy     : 1.21.3
seaborn   : 0.11.2
matplotlib: 3.4.3
openpyxl  : 3.0.10

Compiler    : Clang 6.0 (clang-600.0.57)
OS          : Darwin
Release     : 22.2.0
Machine     : x86_64
Processor   : i386
CPU cores   : 10
Architecture: 64bit

