In [8]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import pandas as pd
import numpy as np
import os
import psycopg2
from dotenv import load_dotenv

from models import Player, GameStats, KtcValue
from datetime import datetime
import requests
from bs4 import BeautifulSoup
import time
import random
import re
import json

In [9]:
load_dotenv()
DB_USERNAME = os.environ['DB_USERNAME']
DB_PASSWORD = os.environ['DB_PASSWORD']
DB_HOST = os.environ['DB_HOST']
DB_PORT = os.environ['DB_PORT']
DB_NAME = os.environ['DB_NAME']

DB_URL = f"postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(DB_URL, echo=False)

Session = sessionmaker(bind=engine)
session = Session()

In [10]:
def fetch_soup(url) -> BeautifulSoup:
    time.sleep(random.uniform(1,5))
    resp = requests.get(url)
    soup = BeautifulSoup(resp.text, 'html.parser')
    return soup

In [11]:
def get_player_page(page_n: str) -> BeautifulSoup:
    dynasty_url = f'https://keeptradecut.com/dynasty-rankings?page={page_n}&filters=QB|WR|RB|TE|RDP&format=2'
    return fetch_soup(dynasty_url)


player_urls = []
for page in range(10):
    player_page = get_player_page(str(page))
    
    player_divs = player_page.find_all('div', {'class': 'onePlayer'})
    for div in player_divs:
        player_urls.append(div.find('a')['href'])


In [12]:
not_found = []

for url in player_urls:
    player_id = url.rsplit('/', 1)[-1]
    player_name = player_id.rsplit('-', 1)[0].replace('-', ' ').title()

    player_query = session.query(Player).filter_by(display_name=player_name).all()
    if len(player_query) == 1:
        player = player_query[0]
        player.ktc_id = player_id
        session.commit()
    else:
        not_found.append(player_id)


In [17]:
def find_value_data(soup) -> list[dict]:
    pattern = re.compile(r'var playerSuperflex', re.MULTILINE | re.DOTALL)
    
    content = soup.find('script', text=pattern)
    content = str(content)
    bottomHalf = content.split("var playerSuperflex = ",1)[1]
    fullCut = bottomHalf.split('var playerOneQB = ',1)[0].strip()
    json_text= fullCut[:-1]
    js = json.loads(json_text)
    data = js['overallValue']
    return data

def scrape(player_url: str):
    player_id = player_url.rsplit('/', 1)[-1]
    player = session.query(Player).filter_by(ktc_id=player_id).first()

    if player:
        player_soup = fetch_soup('https://keeptradecut.com/' + player_url)
        ktc_value_history = find_value_data(player_soup)
        
        for ktc_value in ktc_value_history:
            date = datetime.strptime(ktc_value['d'], '%Y-%m-%d')
            value = int(ktc_value['v'])
            ktc_value_obj = KtcValue(
                player_id = player.id,
                date = date,
                value = value
            )
            session.merge(ktc_value_obj)
        session.commit()
        print(f'{player.display_name} KTC data commited')


for player_url in player_urls:
    scrape(player_url)

  content = soup.find('script', text=pattern)


Patrick Mahomes KTC data commited
Josh Allen KTC data commited
C.J. Stroud KTC data commited
Justin Jefferson KTC data commited
Ja'Marr Chase KTC data commited
CeeDee Lamb KTC data commited
Lamar Jackson KTC data commited
Jalen Hurts KTC data commited
Amon-Ra St. Brown KTC data commited
Joe Burrow KTC data commited
Justin Herbert KTC data commited
Bijan Robinson KTC data commited
Anthony Richardson KTC data commited
Jordan Love KTC data commited
Puka Nacua KTC data commited
Breece Hall KTC data commited
Jahmyr Gibbs KTC data commited
Garrett Wilson KTC data commited
Sam LaPorta KTC data commited
Kyler Murray KTC data commited
A.J. Brown KTC data commited
Christian McCaffrey KTC data commited
Trevor Lawrence KTC data commited
Chris Olave KTC data commited
