In [1]:
!pip install -q -U transformers
!pip install -q -U accelerate
!pip install -q -U bitsandbytes

In [5]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM,pipeline

In [3]:
# Specify the model name path
model_name = '/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1'

# Load the tokenizer for the specified model
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the model for causal language modeling
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,  # Use bfloat16 data type
    device_map="auto",  # Automatically manage device placement
    trust_remote_code=True,  # Trust remote code during loading
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Credit:
 - Adapted from https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1

In [6]:
pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        use_cache=True,
        device_map="auto",
        max_length=5000,
        do_sample=True,
        top_k=5,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
)

In [7]:
!pip install -q -U langchain

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [8]:
from langchain import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=pipeline)

In [9]:
# Import necessary modules from langchain
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

# Define a ResponseSchema for the 'name' field
name_schema = ResponseSchema(
    name="title",
    description="Heading of the work or experience"
)

# Define a ResponseSchema for the 'output' field
output_schema = ResponseSchema(
    name="output",
    description="All points related to the heading"
)

# Create a list of ResponseSchema objects
response_schemas = [name_schema, output_schema]

# Output the list of response schemas
response_schemas


[ResponseSchema(name='title', description='Heading of the work or experience', type='string'),
 ResponseSchema(name='output', description='all points related to heading', type='string')]

In [10]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
output_parser

StructuredOutputParser(response_schemas=[ResponseSchema(name='title', description='Heading of the work or experience', type='string'), ResponseSchema(name='output', description='all points related to heading', type='string')])

In [11]:
format_instruction = output_parser.get_format_instructions()
print(format_instruction)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"title": string  // Heading of the work or experience
	"output": string  // all points related to heading
}
```


In [12]:
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate

In [13]:
resume = "RESEARCH EXPERIENCE\n\nHF Radar for Estimation of Ocean Surface Currents Map | Radar (Aug 2020 - Present)\nMaster’s Thesis | Prof. Siddharth Duttagupta\n\n« Reviewed the Method of Obtaining Doppler Spectrum received from Bragg Scattering by surface waves and\nvarious algorithms for estimating its location and velocity of current beneath it\n\n¢ Working on Design of Co-located Orthogonal Loops Antenna for Bearing Determination of surface waves\n\n \n\nVertex Coloring using Oscillators | Neuromorphic (Aug 2019 - Nov 2019)\nSupervised Research Exposition | Prof. Udayan Ganguly\n\n+ Solved Vertex Colouring using Ring Oscillator ,modelled vertex as a oscillator and edge as Coupling Capacitor\n¢ Solved the same problem using Relaxation Oscillator and compared the two methods.\n\nWork EXPERIENCE\n\nAudio Speech Recognition | Meru Cabs, Mumbai (May 2019 - Jul 2019)\nGuide: Jagrat Khandelwal\n\nImplemented Detection of against policy behaviours from Call Recordings of Customers and Drivers by\nrecognising certain words and achieved Accuracy of 0.85 on validation dataset\n\nTrained mapping of 32 cepstral coefficients to phonemes (from HMM) using Fully Connected Nueral Networks\nand phonemes to text"

In [51]:
template = """
        Extract the following details from a description of a resume:
            
            
        name: Heading of the work or experience
        output: all the points related to it 
        
        resume description: {resume_description}
        
        {format_instruction}
        
"""

prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template(template)  
    ],
    input_variables=["resume_description"],
    partial_variables={"format_instruction": format_instruction},
    output_parser=output_parser # here we add the output parser to the Prompt template
)
messages = prompt.format_messages(resume_description = resume, format_instruction = format_instruction)

In [52]:
prompt.messages

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['format_instruction', 'resume_description'], template='\n        Extract the following details from a description of a resume:\n            \n            \n        name: Heading of the work or experience\n        output: all the points related to it \n        \n        resume description: {resume_description}\n        \n        {format_instruction}\n        \n'))]

In [53]:
messages

[HumanMessage(content='\n        Extract the following details from a description of a resume:\n            \n            \n        name: Heading of the work or experience\n        output: all the points related to it \n        \n        resume description: RESEARCH EXPERIENCE\n\nHF Radar for Estimation of Ocean Surface Currents Map | Radar (Aug 2020 - Present)\nMaster’s Thesis | Prof. Siddharth Duttagupta\n\n« Reviewed the Method of Obtaining Doppler Spectrum received from Bragg Scattering by surface waves and\nvarious algorithms for estimating its location and velocity of current beneath it\n\n¢ Working on Design of Co-located Orthogonal Loops Antenna for Bearing Determination of surface waves\n\n \n\nVertex Coloring using Oscillators | Neuromorphic (Aug 2019 - Nov 2019)\nSupervised Research Exposition | Prof. Udayan Ganguly\n\n+ Solved Vertex Colouring using Ring Oscillator ,modelled vertex as a oscillator and edge as Coupling Capacitor\n¢ Solved the same problem using Relaxation Os

In [55]:
response = llm(messages[0].content)

In [56]:
print(response)

        
        
        
        
        
        
        
        
        

```json
{
	"title": "RESEARCH EXPERIENCE",
	"output": [
		{
			"title": "HF Radar for Estimation of Ocean Surface Currents Map",
			"output": [
				"Radar (Aug 2020 - Present)",
				"Master’s Thesis",
				"Prof. Siddharth Duttagupta",
				"Design of Co-located Orthogonal Loops Antenna for Bearing Determination of surface waves",
				"Solved Vertex Colouring using Ring Oscillator,modelled vertex as a oscillator and edge as Coupling Capacitor",
				"Solved the same problem using Relaxation Oscillator and compared the two methods."
			]
		},
		{
			"title": "Vertex Coloring using Oscillators",
			"output": [
				"Neuromorphic (Aug 2019 - Nov 2019)",
				"Supervised Research Exposition",
				"Prof. Udayan Ganguly",
				"Solved Vertex Colouring using Ring Oscillator,modelled vertex as a oscillator and edge as Coupling Capacitor",
				"Solved the same problem using Relaxation Oscillator and compared the two met