In [1]:
import pandas as pd

In [2]:
# Load the data
data = pd.read_csv('data/dataset.csv')

In [8]:
from langchain.document_loaders import DataFrameLoader

In [9]:
loader = DataFrameLoader(data, page_content_column="Player Bio")

In [10]:
data = loader.load()

In [2]:
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
from config import openai_api_key, pinecone_api_key

In [3]:
OPENAI_API_KEY = openai_api_key
PINECONE_API_KEY = pinecone_api_key

In [11]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [4]:
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment="northamerica-northeast1-gcp"
)
index_name = "nfl-prospects-1"

In [12]:
docsearch = Pinecone.from_texts(embeddings, index_name=index_name)

TypeError: from_texts() missing 1 required positional argument: 'embedding'

In [31]:
query = "Did Bradley Chubb's dad play in the NFL?"
docs  = docsearch.similarity_search(query, include_metadata=True)

In [32]:
docs

[Document(page_content='Bradley Chubb', metadata={}),
 Document(page_content='Chase Young', metadata={}),
 Document(page_content='DeForest Buckner', metadata={}),
 Document(page_content='Jadeveon Clowney', metadata={})]

In [93]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

In [94]:
llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")

In [100]:
query = "Tell me about Justin Fields, would you recommend drafting him?"
docs  = docsearch.similarity_search(query, include_metadata=True)

In [101]:
chain.run(input_documents=docs, question=query)

' Justin Fields is a pure dropback, strong-armed thrower who has starter talent in a vertical, downfield passing attack. He burst onto the scene as a junior-college transfer in 2012 and has clear starting-caliber traits. He has won Heisman Trophy finalist, second-team Associated Press All-American and Big Ten Offensive Player of the Year honors and has finished the year ranked sixth in the country in completion percentage. He will require patient, confidence-building play-calling and a clean pocket to function at a high level in the NFL, and how he interviews with teams could go a long way toward determining his draft status. I would recommend drafting Justin Fields as he has the potential to be a quality starter in the NFL.'

In [102]:
docs

[Document(page_content='Pure dropback, strong-armed thrower who will require patient, confidence-building play-calling and a clean pocket to function at a high level in the NFL. Has starter talent in a vertical, downfield passing attack if he can learn to take command of a huddle and continue progressing as a decision-maker. How he interviews with teams could go a long way toward determining his draft status.', metadata={}),
 Document(page_content='Burst onto the scene as a junior-college transfer in 2012 when he led the nation in completion percentage (72.5), executing a quarterback-friendly system and putting himself on the NFL radar. Concerns about his height and arm strength will limit his appeal, but at worst should be a quality backup in a precision-matchup system.', metadata={}),
 Document(page_content='Will tease evaluators with his arm and athletic talent, but has yet to prove he can throw with the precision needed to sustain a starting job in the pros. Has clear starting-cali

In [17]:
from langchain.retrievers import PineconeHybridSearchRetriever


In [13]:
index = pinecone.Index(index_name)

In [14]:
from transformers import BertTokenizerFast

In [15]:
tokenizer = BertTokenizerFast.from_pretrained(
    'bert-base-uncased'
)


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [18]:
retriever = PineconeHybridSearchRetriever(
    embeddings=embeddings, index=index, tokenizer=tokenizer)

In [11]:
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager
import requests
import time


# Set up Splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)




In [12]:
url = "https://www.nfl.com/prospects/jake-witt/32005749-5436-4970-1de4-f50308ff2d91"
browser.visit(url)
time.sleep(5)
html = browser.html
soup = bs(html, 'html.parser')
player_bio_div = soup.find('div', class_='css-k9c8dc')
if player_bio_div:
    for idx, child in enumerate(player_bio_div.children):
        if idx == 1:  # Player bio (second child)
            player_bio = child.strip()


In [13]:
player_bio

'Witt’s athletic testing was off the charts and it helps that teams recognize he is still filling out his long frame. The tight end convert has only been at the tackle position for one full season, so teams understand he’s going to be behind from an instinct and technique standpoint. He’ll need to make progress as a Year 1 practice squad candidate for teams to continue the investment. He’s nowhere near ready to play pro football, but he will be a coveted free agent if he goes undrafted given his traits and athleticism.'