In [None]:
import spacy
from typing import Dict, Any

# Load the SpaCy model (make sure you have the correct model installed, e.g., 'en_core_web_sm')
nlp = spacy.load('en_core_web_sm')

def extract_entities(text: str) -> Dict[str, Any]:
    doc = nlp(text)
    entities = {
        "Name": None,
        "Title": None,
        "Availability_Per_Week": None,
        "Involved_Since": None,
        "Equity_Percent": None,
        "Salary_Percent": None,
        "Years_of_Experience": None,
        "Academic_Degree": {
            "Undergraduate": False,
            "Graduate_Degree": False,
            "Masters": False,
            "PhD_or_More": False,
        },
        "Startup_Experience": {
            "Has_Been_Part_of_a_Startup_Team": False,
            "Has_Been_the_Founder_of_a_Startup": False,
            "Has_Previous_C_Level_Position": False,
            "Has_Been_Part_of_a_Successful_Exit": False,
        },
        "Role": {
            "Marketing": False,
            "Sales": False,
            "Product": False,
            "Creative": False,
            "Technical": False,
            "Operation": False,
            "Other": None,
        },
        # Add more fields as needed
    }

    for ent in doc.ents:
        # Example mappings (adjust these based on your specific entity recognition needs)
        if ent.label_ == "PERSON":
            entities["Name"] = ent.text
        elif ent.label_ == "TITLE":
            entities["Title"] = ent.text
        # Map other entities accordingly

    return entities


In [None]:
@app.route('/process_data', methods=['POST'])
def process_data():
    if 'file' not in request.files:
        flash('No file part')
        return redirect(request.url)

    file = request.files['file']

    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(filepath)

        # Process the PDF file and extract text
        text_data = process_pdf(filepath)

        # Extract entities from the text
        extracted_entities = extract_entities(text_data)

        # Pass the extracted data to your data structure
        team_member_data = TeamMember(**extracted_entities)

        # Example: Printing extracted data for debugging
        print(team_member_data.json())

        # Further processing...
        return jsonify(team_member_data.dict())

    return redirect(request.url)


In [None]:
def process_llm_and_pdf_responses(llm_response: str, pdf_text: str) -> Dict[str, Any]:
    # Extract entities from LLM response
    llm_entities = extract_entities(llm_response)

    # Extract entities from PDF text
    pdf_entities = extract_entities(pdf_text)

    # Combine both dictionaries
    combined_entities = {**llm_entities, **pdf_entities}

    return combined_entities

@app.route('/process_data_with_llm', methods=['POST'])
def process_data_with_llm():
    if 'file' not in request.files or 'llm_response' not in request.form:
        flash('No file part or LLM response')
        return redirect(request.url)

    file = request.files['file']
    llm_response = request.form['llm_response']

    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(filepath)

        # Process the PDF file and extract text
        pdf_text = process_pdf(filepath)

        # Combine LLM response with PDF extraction
        combined_entities = process_llm_and_pdf_responses(llm_response, pdf_text)

        # Pass the combined data to your data structure
        team_member_data = TeamMember(**combined_entities)

        # Example: Printing extracted data for debugging
        print(team_member_data.json())

        # Further processing...
        return jsonify(team_member_data.dict())

    return redirect(request.url)
