In [1]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import pandas as pd
import numpy as np
import os
import psycopg2
from dotenv import load_dotenv

from models import Player, GameStats, KtcValue
from datetime import datetime
import requests
from bs4 import BeautifulSoup
import time
import random
import re
import json
from tqdm.notebook import tqdm

In [2]:
load_dotenv()
DB_USERNAME = os.environ['DB_USERNAME']
DB_PASSWORD = os.environ['DB_PASSWORD']
DB_HOST = os.environ['DB_HOST']
DB_PORT = os.environ['DB_PORT']
DB_NAME = os.environ['DB_NAME']

DB_URL = f"postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(DB_URL, echo=False)

Session = sessionmaker(bind=engine)
session = Session()

In [9]:
def fetch_soup(url) -> BeautifulSoup:
    time.sleep(random.uniform(1,5))
    resp = requests.get(url)
    soup = BeautifulSoup(resp.text, 'html.parser')
    return soup

def find_value_data(soup) -> list[dict]:
    pattern = re.compile(r'var playerSuperflex', re.MULTILINE | re.DOTALL)
    
    content = soup.find('script', text=pattern)
    content = str(content)
    bottomHalf = content.split("var playerSuperflex = ",1)[1]
    fullCut = bottomHalf.split('var playerOneQB = ',1)[0].strip()
    json_text= fullCut[:-1]
    js = json.loads(json_text)
    data = js['overallValue']
    return data

def scrape(player: Player):
    url = f'https://keeptradecut.com/dynasty-rankings/players/{player.ktc_id}'

    if player:
        player_soup = fetch_soup(url)
        ktc_value_history = find_value_data(player_soup)
        
        for ktc_value in ktc_value_history:
            date = datetime.strptime(ktc_value['d'], '%Y-%m-%d')
            value = int(ktc_value['v'])
            ktc_value_obj = KtcValue(
                player_id = player.id,
                date = date,
                value = value
            )
            session.merge(ktc_value_obj)
        session.commit()
        print(f'{player.display_name} KTC data commited')
        return
    print(f"{player.display_name} not found.")

In [10]:
ktc_players = session.query(Player).filter(Player.ktc_id.is_not(None)).all()
errors =[]
for player in tqdm(ktc_players, desc='Scraping...'):
    
    try:
        scrape(player)
    except Exception as error:
        print(f"Error with {player.display_name}: {error}")
        errors.append(player)
errors

Scraping...:   0%|          | 0/657 [00:00<?, ?it/s]

  content = soup.find('script', text=pattern)


Joe Burrow KTC data commited
Jalen Hurts KTC data commited
A.T. Perry KTC data commited
Justin Herbert KTC data commited
Justin Jefferson KTC data commited
Bijan Robinson KTC data commited
Adam Trautman KTC data commited
Albert Okwuegbunam KTC data commited
Amari Rodgers KTC data commited
Anthony Schwartz KTC data commited
Anthony Richardson KTC data commited
Bailey Zappe KTC data commited
Benny Snell KTC data commited
Bo Melton KTC data commited
Brenton Strange KTC data commited
Brock Wright KTC data commited
Bryan Edwards KTC data commited
Brycen Hopkins KTC data commited
C.J. Stroud KTC data commited
Jordan Love KTC data commited
Caleb Huntley KTC data commited
Calvin Austin KTC data commited
Cam Akers KTC data commited
Cameron Brate KTC data commited
Cedrick Wilson KTC data commited
Charlie Jones KTC data commited
Chase Claypool KTC data commited
Chuba Hubbard KTC data commited
Cole Turner KTC data commited
Craig Reynolds KTC data commited
Puka Nacua KTC data commited
D'Andre Swift

[<models.Player at 0x1b75cf5a020>, <models.Player at 0x1b75cfa2200>]