In [6]:
!pip install -U langchain-community



In [7]:
!pip install faiss-cpu



In [8]:
!pip install gradio



In [9]:
!pip install pymupdf



In [10]:
!pip install tools

Collecting tools
  Downloading tools-0.1.9.tar.gz (34 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting pytils (from tools)
  Downloading pytils-0.4.3.tar.gz (101 kB)
     ---------------------------------------- 0.0/101.4 kB ? eta -:--:--
     -------------------------------------- 101.4/101.4 kB 6.1 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting lxml (from tools)
  Downloading lxml-5.3.2-cp311-cp311-win_amd64.whl.metadata (3.7 kB)
Downloading lxml-5.3.2-cp311-cp311-win_amd64.whl (3.8 MB)
   ---------------------------------------- 0.0/3.8 MB ? eta -:--:--
   ------ --------------------------------- 0.6/3.8 MB 12.

In [11]:
!pip install langdetect



In [21]:
import pandas as pd
import fitz  # PyMuPDF
import gradio as gr
import openai
from openai import OpenAI
import json

from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI as LangOpenAI
from langdetect import detect
from langchain.schema import Document

# 1. Initialize the embedding model and load the vector library

embedding_model = SentenceTransformerEmbeddings(model_name='paraphrase-multilingual-MiniLM-L12-v2')
# vector_store = FAISS.load_local("my_course_index", embedding_model)

# course_id, course_name, description, term, department 
df = pd.read_excel('courses.xlsx')

# 2. Generate text for embedding (here only the course name and description are concatenated, metadata stored separately)
df['text'] = df['course_name'] + " " + df['description']

# 3. Prepare metadata and save term and department in dictionary form
metadatas = df[['term', 'course_name', 'description']].to_dict(orient='records')

# 4. Build the FAISS vector library
vector_store = FAISS.from_texts(df['text'].tolist(), embedding_model, metadatas=metadatas)

# 5. Initialize LLM
# llm = LangOpenAI(temperature=0, openai_api_key="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")  # 用于 RetrievalQA


# Resume PDF → Text
def extract_text_from_pdf(file_obj):
    import fitz
    doc = fitz.open(file_obj.name)  
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# upload excel
def upload_course_excel(file_obj):
    try:
        new_df = pd.read_excel(file_obj.name)

        required_cols = {"course_id", "course_name", "description", "term", "department"}
        if not required_cols.issubset(set(new_df.columns)):
            return "Please upload an Excel file with the following columns: course_id, course_name, description, term, department"

        new_df['text'] = new_df['course_name'].astype(str) + " " + new_df['description'].astype(str)

        new_df = new_df[new_df['text'].notnull()]
        new_df = new_df[new_df['text'].str.strip() != ""]

        new_metadatas = new_df[['term', 'department', 'course_name', 'description']].to_dict(orient='records')

        new_documents = [
            Document(page_content=text, metadata=meta)
            for text, meta in zip(new_df['text'].tolist(), new_metadatas)
        ]

        vector_store.add_documents(new_documents)

        vector_store.save_local("my_course_index")

        return f"✅ Successfully uploaded and added {len(new_df)} new courses to the database!"

    except Exception as e:
        return f"Wrong:{str(e)}"


# Use ai to analyze resume content and generate summaries
'''
#Chatgpt
client = OpenAI(api_key="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")
#Deepseek
client = OpenAI(api_key="sk-bf08eb9cb3934015a84b08a564189e09", base_url="https://api.deepseek.com")
'''

def get_client(model_choice: str) -> OpenAI:
    if model_choice == "Deepseek":
        return OpenAI(
            api_key="sk-bf08eb9cb3934015a84b08a564189e09",
            base_url="https://api.deepseek.com"
        )
    elif model_choice == "ChatGPT4":
        return OpenAI(
            api_key="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"  # 替换为你的 OpenAI API Key
        )
    else:
        raise ValueError(f"Unsupported model choice: {model_choice}")

        
def summarize_resume_with_gpt(resume_text, client, model):
    prompt = f"""
You are a career consultant. Please summarize the core content of the resume below, including:
- Educational background
- Skills and programming languages
- Project experience or course experience
- Key skills that may be missing
Please return the summary in concise and natural language, do not list JSON or bullet points.
The resume content is as follows:
{resume_text}
"""
    response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}]
        )
    return response.choices[0].message.content


def recommend_from_resume(resume_file, user_goal_text, term, num, model_choice):
    try:
        if user_goal_text.strip() == "":
            return "What do you want to learn?"
        client = get_client(model_choice)
        # detect the language
        try:
            lang = detect(user_goal_text)
        except:
            lang = "en"

        lang_instruction = "请用中文回复：" if lang.startswith("zh") else "Please respond in English:"

        
        if model_choice=="Deepseek":
            m = "deepseek-chat"
        elif model_choice=='ChatGPT4':
            m = "gpt-4-turbo"
        # resume + goal    
        if resume_file is not None:
            resume_text = extract_text_from_pdf(resume_file)
            resume_summary = summarize_resume_with_gpt(resume_text, client, m)
            user_summary = f"My background: {resume_summary}\nMy goal:{user_goal_text}"
            summary_info = f"Resume_summary：\n{resume_summary}\n\n"
        else:
            user_summary = f"My goal is:{user_goal_text}"
            summary_info = "(No resume uploaded, recommendation based on learning objectives only)\n\n"

         # Set similarity (distance) threshold("l like football" with no matches)
        SIMILARITY_THRESHOLD = 25 
        k0=4*num
        
        if k0 < 10:
            k0=10
        matches = vector_store.similarity_search_with_score(
            user_summary,
            k=k0,
            filter={"term": term}
         )

         # for doc, score in matches:
         #     print(f"matches: {doc.page_content[:50]}..., distence: {score:.3f}")

         # closest result too far away
        if not matches or matches[0][1] > SIMILARITY_THRESHOLD:
            return "Sorry, our course library dones't contain relavent courses. You may upload more courses through our app." 
            # return matches[0][1]
 

        # Extract course name and description
        course_docs: list[Document] = [doc for doc, _ in matches]

        course_names = []
        for i, doc in enumerate(course_docs):
            name = doc.metadata.get("course_name", doc.page_content[:50])  
            course_names.append(f"{i+1}. {name.strip()}")


        # Only pass the actual courses and let it write reasons
        course_text_block = "\n".join([f"{i+1}. {doc.page_content}" for i, doc in enumerate(course_docs)])
        prompt = f"""
        {lang_instruction}.
        
        Below is the background information and goals: {user_summary}
        We matched the following real courses for users (the course names are all real):
        {course_text_block}.
        Please select exact {num} courses from the above based on their background and goals, and explain why you recommend them. 
        Do not make up course names.Please be aware that: if there is no fit, proper courses to suggest based on the provided information, 
        please output nothing but: "Sorry, our course library dones't contain relavent courses. You can upload more courses through our app." 
        If their goal is not a question for course recommendation assistant, please output nothing but: "Sorry, your goal is not relevant to course selection."
"""

        response = client.chat.completions.create(
            model=m,
            messages=[{"role":"system", 
                       "content":"You are a university course recommendation assistant, aiming to give the most proper suggestion to university students to meet their goal. "},
                {"role": "user", "content": prompt}]
        )
        reason = response.choices[0].message.content.strip()

        return f"📝 Recommendations and Reasons :\n{reason}"


    except Exception as e:
        return f"Wrong:{str(e)}"



# 6. Building the Gradio interface
term_options = ["Fall", "Spring", "Summer"]
model_options = ['Deepseek', 'ChatGPT4']

with gr.Blocks() as demo:
    '''
    with gr.Tab("🎓 Course Recommendations"):
        gr.Markdown("### Upload resume and get course suggestions")
        resume_file = gr.File(label="Upload your resume PDF")
        model_choice = gr.Dropdown(choices=model_options, label="Please select a model")
        goal = gr.Textbox(label="Your goals", placeholder="e.g., I want to study data science or quantitative finance")
        term = gr.Dropdown(choices=term_options, label="Please select a semester")
        recommendation_number = gr.Number(
            value=3,          
            label="Please choose the number of recommendations you want (integer only)",
            precision=0,  
        )
        
        submit_btn = gr.Button("🚀 Recommend Courses")
        output = gr.Textbox(label="Recommended results")
        submit_btn.click(
            fn=recommend_from_resume,
            inputs=[resume_file, goal, term, recommendation_number, model_choice],
            outputs=output
        )
'''
    with gr.Tab("🎓 Course Recommendations"):
        gr.Markdown('<h2 style="text-align: center; font-size: 2em;">Upload Resume and Get Course Suggestions</h2>')
        resume_file = gr.File(label="Upload your resume PDF")
        model_choice = gr.Dropdown(choices=model_options, label="Please select a model")
        goal = gr.Textbox(label="Your goals", placeholder="e.g., I want to study data science or quantitative finance (try to be specific)")
        term = gr.Dropdown(choices=term_options, label="Please select a semester")
        recommendation_number = gr.Number(
            value=3,          
            label="Please choose the number of recommendations you want (integer only)",
            precision=0)
        submit_btn = gr.Button("🚀 Recommend Courses")
    
        loading_animation = gr.Image(
            visible=False,
            height=300,
            width=300,
            elem_classes="loading-animation",
            show_share_button = False,
            show_download_button = False,
            show_label = False
        )
    
        output = gr.Textbox(label="Recommended results")

        def recommend_with_loading(resume_file, goal_text, term, num, model_choice):
            import random
            r = random.randint(1, 4)


            yield {loading_animation: gr.Image(visible=True, value=f"line-dog{r}.gif"), 
                   output: ""}
            try:
                result = recommend_from_resume(resume_file, goal_text, term, num, model_choice)
                yield {
                    loading_animation: gr.Image(visible=False),
                    output: result
                }
            except Exception as e:
                yield {
                    loading_animation: gr.Image(visible=False),
                    output: f"Error: {str(e)}"
                }

        submit_btn.click(
            fn=recommend_with_loading,
            inputs=[resume_file, goal, term, recommendation_number, model_choice],
            outputs=[loading_animation, output]
        )

    

    
    with gr.Tab("📂 Upload new courses"):
        gr.Markdown("### Upload a course Excel file to expand the database")
        course_file = gr.File(label="Upload Excel with course_id, course_name, description, term, department", file_types=[".xlsx"])
        upload_btn = gr.Button("📥 Upload Course Table")
        upload_output = gr.Textbox(label="Upload Result")

        upload_btn.click(
            fn=upload_course_excel,
            inputs=course_file,
            outputs=upload_output
        )

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7868
* Running on public URL: https://38d2102b3595816bb2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


