In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the pretrained model and tokenizer
model_name = 'gpt2'
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Generate a response
def generate_response(user_input):
    # Tokenize user input
    input_ids = tokenizer.encode(user_input, return_tensors='pt')

    # Generate response using the model
    output = model.generate(input_ids, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2)

    # Decode and return the response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

# Test the chatbot
while True:
    user_input = input("User: ")
    if user_input.lower() == "exit":
        print("Chatbot: Goodbye!")
        break
    response = generate_response(user_input)
    print("Chatbot:", response)


ModuleNotFoundError: No module named 'torch'

In [15]:
# Scraping data with Selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from datetime import datetime
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
import requests

In [None]:
NFL_TEAMS = [
    "azcardinals",
    "atlantafalcons",
    "baltimoreravens",
    "buffalobills",
    "panthers",
    "chicagobears",
    "bengals",
    "clevelandbrowns",
    "dallascowboys",
    "denverbroncos",
    "detroitlions",
    "packers",
    "houstontexans",
    "colts",
    "jaguars",
    "chiefs",
    "raiders",
    "chargers",
    "therams",
    "miamidolphins",
    "vikings",
    "patriots",
    "neworleanssaints",
    "giants",
    "newyorkjets",
    "philadelphiaeagles",
    "steelers",
    "49ers",
    "seahawks",
    "buccaneers",
    "tennesseetitans",
    "commanders"
]


In [2]:
def new_driver():
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(options=chrome_options)
    return driver

In [3]:
def collect_nfl_team_stats(start_year):
    '''
    Collecting yearly NFL team stats, individual payer totals

    Args:
        driver (selenium)
        start_year (int)
    
    Returns:
        team_stats (dict)
    '''
    driver = new_driver()
    team_stats = {}
    for team in NFL_TEAMS:
        print(team)
        for year in range(start_year, datetime.now().year):
            try:
                driver.get(f'https://www.{team}.com/team/stats/{year}/REG')
            except WebDriverException:
                print("Error occurred. Creating a new driver instance...")
                driver.quit()
                driver = new_driver()
                driver.get(f'https://www.{team}.com/team/stats/{year}/REG')
            team_stats[team + ' ' + str(year)] = collect_team(driver)

In [4]:
def scrape_team(soup):
    # scraping stat names
    html = soup.find_all('ul', class_='nfl-o-team-h2h-stats__list')
    if len(html) > 0:
        html = html[0]
    else:
        return {}
    labels = []
    rel_tags = ['nfl-o-team-h2h-stats__label--full',  'nfl-o-team-h2h-stats__label--first-child nfl-o-team-h2h-stats__label--child',
    'nfl-o-team-h2h-stats__label--child', 'nfl-o-team-h2h-stats__label--last-child nfl-o-team-h2h-stats__label--child']
    # dict describing how many children stats a descriptor has
    descriptors = {'FIRST DOWNS': 3, 'OFFENSE': 2, 'RUSHING': 2, 'PASSING': 4, 'TDs': 4}
    for tag in html.find_all('span'):
        tag_classes = tag.get('class')
        if not tag_classes or any(cls in rel_tags for cls in tag_classes):
            text = tag.get_text(strip=True)
            if not is_number(text):
                labels.append(text)
    # organizing stat names to be keys
    labels[28] = 'TDs'
    new_labels = []
    descriptor_counter = 0
    descriptor = ''
    sub_list = []
    for label in labels:
        if label in ['Completions', 'Attempts', 'Interceptions', 'Average']:
            continue
        elif label in descriptors.keys() and descriptor_counter == 0:
            descriptor_counter += descriptors[label]
            descriptor = label
        elif descriptor_counter == 0:
            descriptor_list = []
            new_labels.extend(with_opponent(label))
        else:
            new_labels.append(descriptor + ' ' + label)
            sub_list.append(label)
            descriptor_counter -= 1
            if descriptor_counter == 0:
                for sub in sub_list:
                    new_labels.append(descriptor + ' ' + sub + ' Opponent')
                sub_list = []

    # getting the values of the stats   
    # Find the value and label elements within each element
    value_elements = html.find_all('div', class_='nfl-o-team-h2h-stats__value')
    # Extract and print the values and labels
    values = []
    for value_element in value_elements:
        if value_element.span:
            # Extract each individual value from the <span> tags
            span_values = [span.get_text(strip=True) for span in value_element.find_all('span')]
            values.extend(span_values)
        else:
            # If there are no <span> tags, extract the text directly
            value_text = value_element.get_text(strip=True)
            values.append(value_text)

    stats_dict = dict(zip(new_labels, values))
    return stats_dict

In [5]:
def scrape_player(soup):
    player_stats = soup.find_all('div', class_='nfl-o-teamstats')
    passing = player_stats[0]
    rushing = player_stats[1]
    receiving = player_stats[2]
    player_data = {}
    player_data['passing'] = table_scraper(passing)
    player_data['rushing'] = table_scraper(rushing)
    player_data['receiving'] = table_scraper(receiving)
    return player_data
    

In [6]:
def table_scraper(html):
    data = {}
    headers = None

    for row in html.find_all('tr'):
        cells = row.find_all(['th', 'td'])
        if cells:
            if headers is None:
                headers = [cell.get_text(strip=True) for cell in cells[1:]]
            else:
                key = cells[0].get_text(strip=True)
                values = [cell.get_text(strip=True) for cell in cells[1:]]
                data[key] = dict(zip(headers, values))
    return data

In [7]:
def collect_team(driver):
    '''
    parses html and produces json for the given year and team
    '''
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')
    team_stats = scrape_team(soup)
    player_stats = scrape_player(soup)
    team_stats.update(player_stats)
    return team_stats

    

In [None]:
nfl_team_stats = collect_nfl_team_stats(2017)

In [8]:
def is_number(string):
    try:
        float(string)
        return True
    except ValueError:
        return False


In [9]:
def with_opponent(stat):
    return [stat, stat + ' Opponent']

In [10]:
#fantasy football consensus rankings scraping
import requests

response = requests.get('https://www.fantasypros.com/nfl/rankings/consensus-cheatsheets.php')

In [16]:
text = response.text
soup = BeautifulSoup(text, 'html.parser')


In [17]:
html = soup.find('div', class_='container')

In [19]:
a = html.find('div', class_='main-content main-content--rankings-page')

In [20]:
b = a.find('div', class_='inner')

In [21]:
b

<div class="inner">
<div class="ranking-header-wrap--padding-one-banner">
<div class="ranking-header-wrap clearfix hide-print">
<div class="ranking-header">
<div class="rankings-page__heading-wrap">
<h1 class="rankings-page__heading">Fantasy Football Draft Rankings (2023)</h1>
<h2 class="rankings-page__sub-heading" v-if="false">Consensus Rankings</h2>
</div>
<div class="ranking-page__header-button-container hide-print">
<button aria-label="Open experts modal" class="btn header-btn header-btn--edit-experts">Pick Experts</button>
<button aria-label="Open upgrade modal" class="btn header-btn header-btn--upgrade-ecr">Upgrade</button>
</div>
</div> <!-- .ranking-header -->
</div>
<div class="feature-items__container hide-print">
<div class="feature-primary feature-primary--live-draft">
<div class="select-advanced__ecr-wrapper left-side-bar-hidden">
<div class="select-advanced select-advanced--rankings" v-if="false">
<span class="select-advanced__select-label select-advanced__select-label--t

In [22]:
c = b.find('section', class_='rankings-tabe rankings-table__container mobile-table sticky-table')

In [23]:
c

In [24]:
html[0].find_all('tr')

KeyError: 0

In [None]:
data = {}
headers = None

for row in html[0].find_all('tr'):
    cells = row.find_all(['th', 'td'])
    print(cells)
    if cells:
        if headers is None:
            headers = [cell.get_text(strip=True) for cell in cells[1:]]
        else:
            key = cells[0].get_text(strip=True)
            values = [cell.get_text(strip=True) for cell in cells[1:]]
            data[key] = dict(zip(headers, values))

In [42]:
def scrape_fantasy_pros(start_year=2002):
    final_data = {}
    for year in range(start_year, datetime.now().year):
        for pos in ['rb', 'wr', 'te', 'k', 'dst']:
            final_data[pos + str(year)] = (scrape_fp(pos, year))
    return final_data
            

In [45]:
def scrape_fp(pos, year):
    df = pd.read_html(f'https://www.fantasypros.com/nfl/stats/{pos}.php?year={year}scoring=HALF')[0]
    df.columns = [' '.join(col).strip() for col in df.columns.values]
    data = []
    for _, row in df.iterrows():
        player_data = {}
        for column in df.columns:
            player_data[column] = row[column]
        data.append(player_data)
    return data

In [46]:
scrape_fantasy_pros(2022)

TypeError: can only concatenate str (not "int") to str

In [35]:
rb_stats = pd.read_html('https://www.fantasypros.com/nfl/stats/rb.php?scoring=HALF')[0]
wr_stats = pd.read_html('https://www.fantasypros.com/nfl/stats/wr.php?scoring=HALF')[0]
te_stats = pd.read_html('https://www.fantasypros.com/nfl/stats/te.php?scoring=HALF')[0]
qb_stats = pd.read_html('https://www.fantasypros.com/nfl/stats/qb.php?scoring=HALF')[0]


In [41]:
rb_stats

Unnamed: 0_level_0,Unnamed: 0_level_0,Unnamed: 1_level_0,RUSHING,RUSHING,RUSHING,RUSHING,RUSHING,RUSHING,RECEIVING,RECEIVING,RECEIVING,RECEIVING,RECEIVING,MISC,MISC,MISC,MISC,MISC
Unnamed: 0_level_1,Rank,Player,ATT,YDS,Y/A,LG,20+,TD,REC,TGT,YDS,Y/R,TD,FL,G,FPTS,FPTS/G,ROST
0,1,Austin Ekeler (LAC),204,915,4.5,72,10,13,107,127,722,6.7,5,3,17,319.2,18.8,100.0%
1,2,Christian McCaffrey (SF),244,1139,4.7,49,14,8,85,108,741,8.7,5,0,17,313.9,18.5,100.0%
2,3,Josh Jacobs (LV),340,1653,4.9,86,14,12,53,64,400,7.5,0,1,17,301.8,17.8,99.9%
3,4,Derrick Henry (TEN),349,1538,4.4,56,20,13,33,41,398,12.1,0,3,16,286.3,17.9,99.9%
4,5,Nick Chubb (CLE),302,1525,5.0,41,18,12,27,37,239,8.9,1,1,17,267.9,15.8,99.9%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
262,263,John Kelly Jr. (CLE),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.2%
263,264,Tim Flanders (NO),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0%
264,265,Willie Carter (CHI),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0%
265,266,Brennan Clay (DEN),0,0,0.0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0%


In [36]:
import json

data = []
for _, row in rb_stats.iterrows():
    player_data = {}
    for column in rb_stats.columns:
        player_data[column] = row[column]
    data.append(player_data)

TypeError: keys must be str, int, float, bool or None, not tuple

In [37]:
data

[{('Unnamed: 0_level_0', 'Rank'): 1,
  ('Unnamed: 1_level_0', 'Player'): 'Austin Ekeler (LAC)',
  ('RUSHING', 'ATT'): 204,
  ('RUSHING', 'YDS'): 915,
  ('RUSHING', 'Y/A'): 4.5,
  ('RUSHING', 'LG'): 72,
  ('RUSHING', '20+'): 10,
  ('RUSHING', 'TD'): 13,
  ('RECEIVING', 'REC'): 107,
  ('RECEIVING', 'TGT'): 127,
  ('RECEIVING', 'YDS'): 722,
  ('RECEIVING', 'Y/R'): 6.7,
  ('RECEIVING', 'TD'): 5,
  ('MISC', 'FL'): 3,
  ('MISC', 'G'): 17,
  ('MISC', 'FPTS'): 319.2,
  ('MISC', 'FPTS/G'): 18.8,
  ('MISC', 'ROST'): '100.0%'},
 {('Unnamed: 0_level_0', 'Rank'): 2,
  ('Unnamed: 1_level_0', 'Player'): 'Christian McCaffrey (SF)',
  ('RUSHING', 'ATT'): 244,
  ('RUSHING', 'YDS'): 1139,
  ('RUSHING', 'Y/A'): 4.7,
  ('RUSHING', 'LG'): 49,
  ('RUSHING', '20+'): 14,
  ('RUSHING', 'TD'): 8,
  ('RECEIVING', 'REC'): 85,
  ('RECEIVING', 'TGT'): 108,
  ('RECEIVING', 'YDS'): 741,
  ('RECEIVING', 'Y/R'): 8.7,
  ('RECEIVING', 'TD'): 5,
  ('MISC', 'FL'): 0,
  ('MISC', 'G'): 17,
  ('MISC', 'FPTS'): 313.9,
  ('MISC'