# Notebook for exploring and testing the data 

In [4]:
import sqlwrapper
import pandas as pd 
from datetime import datetime 
from dateutil.relativedelta import relativedelta, MO
import plotly_express as px

def connect_to_database(path: str) -> sqlwrapper.SQLConnection: 
    """Establishes a connection with the local database to allow querying of the data  

    Args:
        path (str): the path to the database

    Returns:
        sqlwrapper.SQLConnection: the connection established with the database
    """

    try:
        db = sqlwrapper.SQLConnection(path)
    except:
        print("Connection failed")
    
    return db

db = connect_to_database('/Users/seb/Desktop/passion_projects/fantasy_tennis/players_points.db')

## Backfilling the database with the previous weeks data 

In [29]:
from main import get_html_soup, parse_tags_from_soup

def create_dataframe_of_this_weeks_total_points(player_names: list, player_points: list, week_begin: str) -> pd.DataFrame:
    new_player_names = player_names
    # for name in player_names:
    #     split_name = name.split(' ') # splits name 
    #     first_name = split_name[len(split_name) - 1] # takes last word (the players first name)
    #     last_name = name.replace(f' {first_name}', '') # removes first name from original string to leave last name only 
    #     new_player_names.append(f"{first_name} {last_name}") # recombines first and last name and adds to new list

    # week_begin = str((datetime.today() - relativedelta(weekday=MO(-1))).date())
    week_begin_list = [week_begin for i in range(250)]
    df = pd.DataFrame(list(zip(new_player_names, player_points, week_begin_list)), columns=['player_name', 'player_total_points', 'week_begin'])

    return df


In [6]:
week_begin = '2023-01-30'
URLS = {
    "0-50":"https://www.tennisexplorer.com/ranking/atp-men/?date=2023-01-30&t=race", 
    "51-100":"https://www.tennisexplorer.com/ranking/atp-men/?t=race&date=2023-01-30&page=2",
    "101-150":"https://www.tennisexplorer.com/ranking/atp-men/?t=race&date=2023-01-30&page=3",
    "151-200":"https://www.tennisexplorer.com/ranking/atp-men/?t=race&date=2023-01-30&page=4",
    "201-250":"https://www.tennisexplorer.com/ranking/atp-men/?t=race&date=2023-01-30&page=5"
}
weeks_names = []
weeks_points = []
for url in URLS:
    soup = get_html_soup(URLS[url])
    weeks_names = parse_tags_from_soup(soup, 't-name', weeks_names)
    weeks_points = parse_tags_from_soup(soup, 'long-point', weeks_points)
    weeks_points.remove('Points')
df = create_dataframe_of_this_weeks_total_points(weeks_names, weeks_points, week_begin)
print(db.append(df))

New points already uploaded this week


## Setting the top 250 at the end of the AO to 0 points at the start of the calendar year 

In [31]:
# initial_250 = db.select("""SELECT * FROM players_points WHERE week_begin == '2023-01-30'""")
# names = list(initial_250['player_name'])
# points = [0]*250
# week_begin = '2023-01-02'
# df_first = create_dataframe_of_this_weeks_total_points(names, points, week_begin)
# print(db.append(df_first))

['Novak Djokovic', 'Stefanos Tsitsipas', 'Karen Khachanov', 'Tommy Paul', 'Ben Shelton', 'Sebastian Korda', 'Jiri Lehecka', 'Cameron Norrie', 'Taylor Fritz', 'Andrey Rublev', 'Roberto Bautista-Agut', 'Frances Tiafoe', 'Tallon Griekspoor', 'Marton Fucsovics', 'Woo Kwon Soon', 'Yoshihito Nishioka', 'Yosuke Watanuki', 'Richard Gasquet', 'Hubert Hurkacz', 'Leandro Riedi', 'Benjamin Bonzi', 'Alex De Minaur', 'Jannik Sinner', 'Michael Mmoh', 'John Wolf Jeffrey', 'Alexei Popyrin', 'Felix Auger Aliassime', 'Holger Rune', 'Jenson Brooksby', 'Daniil Medvedev', 'Arthur Fils', 'Matteo Berrettini', 'David Goffin', 'Gregoire Barrere', 'Luca Van Assche', 'Aleksandar Vukic', 'Christopher Eubanks', 'Thanasi Kokkinakis', 'Mikael Ymer', 'Jason Kubler', 'Denis Shapovalov', 'Botic Van De Zandschulp', 'Constant Lestienne', 'Oleksii Krutykh', 'Mackenzie McDonald', 'Manuel Cerundolo Juan', 'Tomas Machac', 'Daniel Evans', 'Enzo Couacaud', 'Lloyd Harris', 'Jack Draper', 'Joris De Loore', 'Ferreira Silva Frederi

## Current Top 10 Players

In [39]:
current_top_10 = db.select("""
    SELECT * FROM players_points
    ORDER BY week_begin DESC, player_total_points DESC
    LIMIT 10
""")

print(current_top_10)

          player_name  player_total_points  week_begin
0      Novak Djokovic                 2430  2023-03-06
1  Stefanos Tsitsipas                 1470  2023-03-06
2     Daniil Medvedev                 1430  2023-03-06
3          Tommy Paul                 1085  2023-03-06
4      Cameron Norrie                 1065  2023-03-06
5        Taylor Fritz                  915  2023-03-06
6      Alex De Minaur                  870  2023-03-06
7     Karen Khachanov                  810  2023-03-06
8       Jannik Sinner                  775  2023-03-06
9       Andrey Rublev                  705  2023-03-06


In [51]:
# Had to first select Novak in order to be able to concat afterwards. Looking for a cleaner way to do this 

all_top_10_points = db.select(f"""
        SELECT * FROM players_points
        WHERE player_name = 'Novak Djokovic'
    """) 
top_10 = list(current_top_10['player_name'])
top_10.pop(0)

for name in top_10:
    all_top_10_points = pd.concat([all_top_10_points, db.select(f"""
        SELECT * FROM players_points
        WHERE player_name = '{name}'
    """)])


In [54]:
# This looks pretty class
fig = px.line(
    all_top_10_points, 
    x='week_begin', 
    y='player_total_points', 
    markers=True, 
    color='player_name',
    labels = {
    'week_begin':'Week Beginning',
    'player_total_points':'Total Points',
    'player_name':'Player'
    })
fig