In [3]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import pandas as pd
import numpy as np
import os
import psycopg2
from dotenv import load_dotenv

from models import Player, GameStats, KtcValue
from datetime import datetime
import requests
from bs4 import BeautifulSoup
import time
import random
import re
import json


In [4]:
load_dotenv()
DB_USERNAME = os.environ['DB_USERNAME']
DB_PASSWORD = os.environ['DB_PASSWORD']
DB_HOST = os.environ['DB_HOST']
DB_PORT = os.environ['DB_PORT']
DB_NAME = os.environ['DB_NAME']

DB_URL = f"postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(DB_URL, echo=False)

Session = sessionmaker(bind=engine)
session = Session()

In [6]:
def fetch_soup(url) -> BeautifulSoup:
    time.sleep(random.uniform(1,5))
    resp = requests.get(url)
    soup = BeautifulSoup(resp.text, 'html.parser')
    return soup

def find_value_data(soup) -> list[dict]:
    pattern = re.compile(r'var playerSuperflex', re.MULTILINE | re.DOTALL)
    
    content = soup.find('script', text=pattern)
    content = str(content)
    bottomHalf = content.split("var playerSuperflex = ",1)[1]
    fullCut = bottomHalf.split('var playerOneQB = ',1)[0].strip()
    json_text= fullCut[:-1]
    js = json.loads(json_text)
    data = js['overallValue']
    return data

def scrape(player_id: int, url):
    player = session.query(Player).get(player_id)

    if player:
        player_soup = fetch_soup(url)
        ktc_value_history = find_value_data(player_soup)
        
        for ktc_value in ktc_value_history:
            date = datetime.strptime(ktc_value['d'], '%Y-%m-%d')
            value = int(ktc_value['v'])
            ktc_value_obj = KtcValue(
                player_id = player.id,
                date = date,
                value = value
            )
            session.merge(ktc_value_obj)
        session.commit()
        print(f'{player.display_name} KTC data commited')

In [7]:
ktc_players = session.query(Player).filter(Player.ktc_id.is_not(None)).all()

errors = []
for player in ktc_players:
    url = f'https://keeptradecut.com/dynasty-rankings/players/{player.ktc_id}'
    try:
        scrape(player.id, url)
    except:
        print(f"Error with {player.display_name}")
        errors.append(player)
errors

  content = soup.find('script', text=pattern)


Darrius Heyward-Bey KTC data commited
Mike Iupati KTC data commited
Brandon Williams KTC data commited
Johnthan Banks KTC data commited
Jack Doyle KTC data commited
Dezmen Southward KTC data commited
E.J. Bibbs KTC data commited
Xavier Williams KTC data commited
Andrew Billings KTC data commited
Brian Price KTC data commited
Devante Mays KTC data commited
Kyle Kalis KTC data commited
Maurquice Shakir KTC data commited
Andrew Lauderdale KTC data commited
Pasoni Tasini KTC data commited
Greg Joseph KTC data commited
Jeff Badet KTC data commited
D'Juan Hines KTC data commited
De'Lance Turner KTC data commited
Will House KTC data commited
Mason Gentry KTC data commited
Ryan Santoso KTC data commited
Carl Bradford KTC data commited
Deneric Prince KTC data commited
Steve Longa KTC data commited
Matt Jones KTC data commited
Christian Kirksey KTC data commited
Nick Keizer KTC data commited
Ralph Webb KTC data commited
Nelson Agholor KTC data commited
Kevin Byard KTC data commited
Trey Edmunds 

[]