**OpenAi**

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

Openai_api_key = os.getenv("OPENAI_API_KEY")

In [None]:
from langchain.llms import OpenAI

llm = OpenAI(temperature=0.7)

  llm = OpenAI(temperature=0.7)


**Hugging face- LLaMa**

In [None]:
import os
from dotenv import load_dotenv
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from langchain.llms import HuggingFacePipeline
import torch
model_name = "Qwen/Qwen3-0.6B"

HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")

# Configure 4-bit quantization
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_threshold=6.0,
    llm_int8_skip_modules=["lm_head"],
)

tokenizer = AutoTokenizer.from_pretrained(
    model_name, 
    use_fast=True, 
    use_auth_token=HUGGINGFACE_TOKEN)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config,
    device_map="auto",
    torch_dtype=torch.float16,
    use_auth_token=HUGGINGFACE_TOKEN
)

# Create a pipeline for text generation
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=256,
    temperature=0.6,
    top_k=10,
    top_p=0.7,
    repetition_penalty=1.2
)

# Wrap the pipeline in a LangChain-compatible LLM
llm = HuggingFacePipeline(pipeline=generator)


Use the below part after either OpenAI and Hugging face LLM instance is loaded

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

prompt_template_name = PromptTemplate(
    input_variables =['Job_role'],
    template = "Provide a roadmap for a beginner based on the {Job_role}."
        "Include the languages, frameworks, and packages they must learn." 
        "Make it summarized limit to 128 tokens"
)
chain = LLMChain(llm=llm, prompt=prompt_template_name)
response = chain.run("Data Engineer")
print(response)

**LLama 2**

In [4]:
import torch

if torch.cuda.is_available():
    print(f"GPU Detected: {torch.cuda.get_device_name(0)}")
else:
    print("No GPU detected, using CPU.")

GPU Detected: NVIDIA GeForce RTX 3050 Laptop GPU


In [1]:
import os
from langchain_community.llms import CTransformers
from langchain.prompts import PromptTemplate

In [None]:
from ctransformers import AutoModelForCausalLM
from langchain.prompts import PromptTemplate

def getLLamaresponse(Job_role):

    MODEL_PATH = "E:\\AIML\\Roadmap\\Models\\llama-2-7b-chat.ggmlv3.q8_0.bin"

    # Load the model with GPU optimization and streaming
    llm = AutoModelForCausalLM.from_pretrained(
        MODEL_PATH,
        model_type='llama',
        gpu_layers=20,  # Adjust based on VRAM. 24 is too high for 4GB with 8-bit quantization
    )

    
    # Prompt Template
    template = """
        Provide a roadmap for a beginner based on the {Job_role}. 
        Include the languages, frameworks, and packages they must learn. Make it summarized limit to 128 tokens.
    """
    
    prompt = PromptTemplate(input_variables=["Job_role"], template=template)
    formatted_prompt = prompt.format(Job_role=Job_role)

    response = ""
    for text_chunk in llm(
        formatted_prompt,
        max_new_tokens=200,  # Limit the response length for faster output
        temperature=0.01,  # Lower temperature for faster, more deterministic output
        repetition_penalty=1.05,  # Slightly lower to reduce unnecessary repetition
        stream=True  # Enable streaming for faster first response
    ):
    
        print(text_chunk, end="", flush=True)


In [3]:
getLLamaresponse("Data Engineer")


    Beginner Roadmap:
    
    1. Learn Python: Essential language for data engineering.
    2. Familiarize with pandas and NumPy.
    3. Learn SQL: Data manipulation and querying.
    4. Explore data visualization tools like Matplotlib and Seaborn.
    5. Learn about data storage solutions like HDFS and AWS S3.
    6. Familiarize with big data processing frameworks like Apache Spark and Apache Flink.
    7. Learn about data governance and quality control.
    8. Explore machine learning libraries like scikit-learn and TensorFlow.
    9. Learn about cloud computing platforms like AWS and GCP.
    10. Practice with real-world projects and datasets.
    
    Total tokens: 128

**QWEN**

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import streamlit as st

MODEL_DIR = "E:\\AIML\\Qwen3-1.7B"  # Path to the model directory

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_threshold=6.0,
    llm_int8_skip_modules=["lm_head"]
)

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_DIR,
    quantization_config=quant_config,
    torch_dtype=torch.float16,
    device_map=0
)

def get_llama_response(job_role):
    """Generate roadmap using Qwen3-1.7B asynchronously."""

    # Construct the chat-style prompt with thinking mode enabled
    prompt = (
        "You are a career advisor. Generate a structured learning roadmap for a beginner aspiring to be a {Job_role}. "
        "The response should include the following sections:\n"
        "1. Programming Languages\n"
        "2. Frameworks and Libraries\n"
        "3. Tools and Platforms\n"
        "4. Suggested Projects\n\n"
        "Ensure clarity and conciseness. Avoid markdown formatting. "
        "Start the response with '### Career Roadmap:' and end with '##'.\n\n"
        "### Career Roadmap:\n"
    ).format(Job_role=job_role)

    # Construct the input message for Qwen3-1.7B
    messages = [
        {"role": "user", "content": prompt}
    ]

    # Apply the chat template with thinking enabled
    input_text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False
    )

    # Prepare model input
    model_inputs = tokenizer([input_text], return_tensors="pt").to(model.device)

    # Generate response
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512,  # Adjust token limit as required
        temperature=0.05,
        repetition_penalty=1.1
    )

    # Extract generated output
    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()

    # Parse thinking content and main response
    try:
        # Identify the 'thinking' token (151668)
        think_index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        think_index = 0

    # thinking_content = tokenizer.decode(output_ids[:think_index], skip_special_tokens=True).strip()
    response_content = tokenizer.decode(output_ids[think_index:], skip_special_tokens=True).strip()

    return response_content

print(get_llama_response('Data Engineer'))

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Career Roadmap:

**Programming Languages**  
- **Python**: Core language for data processing, analysis, and scripting.  
- **Java/Scala**: For distributed systems and big data frameworks.  
- **C#**: For backend development and microservices.  
- **SQL**: For querying databases and managing data storage.  

**Frameworks and Libraries**  
- **Python**: Pandas, NumPy, Scikit-learn, SQLAlchemy, and PySpark.  
- **Java**: Apache Kafka, Hadoop, Spark, and JDBC.  
- **C#**: Entity Framework, LINQ, and SQL Server.  
- **Web Development**: Django, Flask, or Spring Boot.  

**Tools and Platforms**  
- **Jupyter Notebooks**: For interactive data exploration.  
- **Git & GitHub**: Version control and collaboration.  
- **Docker**: Containerization of applications.  
- **Kubernetes**: For orchestration of services.  
- **AWS/Azure/GCP**: Cloud platforms for deployment and scaling.  
- **Apache Airflow**: For workflow automation.  

**Suggested Projects**  
1. **Data Cleaning and Analysis** – U