# Notebook for exploring and testing the data 

In [20]:
import sqlwrapper
import pandas as pd 
from datetime import datetime 
from dateutil.relativedelta import relativedelta, MO

def connect_to_database(path: str) -> sqlwrapper.SQLConnection: 
    """Establishes a connection with the local database to allow querying of the data  

    Args:
        path (str): the path to the database

    Returns:
        sqlwrapper.SQLConnection: the connection established with the database
    """

    try:
        db = sqlwrapper.SQLConnection(path)
    except:
        print("Connection failed")
    
    return db

db = connect_to_database('/Users/seb/Desktop/passion_projects/fantasy_tennis/players_points.db')

## Backfilling the database with the previous weeks data 

In [6]:
from main import get_html_soup, parse_tags_from_soup

def create_dataframe_of_this_weeks_total_points(player_names: list, player_points: list, week_begin: str) -> pd.DataFrame:
    new_player_names = []
    for name in player_names:
        split_name = name.split(' ') # splits name 
        first_name = split_name[len(split_name) - 1] # takes last word (the players first name)
        last_name = name.replace(f' {first_name}', '') # removes first name from original string to leave last name only 
        new_player_names.append(f"{first_name} {last_name}") # recombines first and last name and adds to new list

    # week_begin = str((datetime.today() - relativedelta(weekday=MO(-1))).date())
    week_begin_list = [week_begin for i in range(250)]
    df = pd.DataFrame(list(zip(new_player_names, player_points, week_begin_list)), columns=['player_name', 'player_total_points', 'week_begin'])

    return df


In [15]:
week_begin = '2023-01-30'
URLS = {
    "0-50":"https://www.tennisexplorer.com/ranking/atp-men/?date=2023-01-30&t=race", 
    "51-100":"https://www.tennisexplorer.com/ranking/atp-men/?t=race&date=2023-01-30&page=2",
    "101-150":"https://www.tennisexplorer.com/ranking/atp-men/?t=race&date=2023-01-30&page=3",
    "151-200":"https://www.tennisexplorer.com/ranking/atp-men/?t=race&date=2023-01-30&page=4",
    "201-250":"https://www.tennisexplorer.com/ranking/atp-men/?t=race&date=2023-01-30&page=5"
}
weeks_names = []
weeks_points = []
for url in URLS:
    soup = get_html_soup(URLS[url])
    weeks_names = parse_tags_from_soup(soup, 't-name', weeks_names)
    weeks_points = parse_tags_from_soup(soup, 'long-point', weeks_points)
    weeks_points.remove('Points')
df = create_dataframe_of_this_weeks_total_points(weeks_names, weeks_points, week_begin)
print(db.append(df))

Successfully appended new points totals


## Querying data from database

In [21]:
res = db.select("""
    SELECT * FROM players_points
    WHERE player_name == 'Sebastian Baez'
""")

print(res)

      player_name  player_total_points  week_begin
0  Sebastian Baez                  260  2023-02-13
1  Sebastian Baez                  260  2023-02-20
2  Sebastian Baez                  350  2023-02-27
3  Sebastian Baez                  440  2023-03-06
