# 1. NLP LLM

## Set up Kernel and Required Dependencies

In [1]:
%pip install --upgrade pip
%pip install --disable-pip-version-check \
    torch==1.13.1 \
    torchdata==0.5.1 --quiet

%pip install \
    transformers==4.27.2 \
    datasets==2.11.0  --quiet

Collecting pip
  Downloading pip-23.2.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-23.2.1
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.6/4.6 MB[0m [31m68.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m44.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.1/317.1 MB[0m [31m2.5 MB/s[0m eta [36m0:00:0

In [2]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig

## Dataset: just for checking effectiveness of model

In [3]:
huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

Downloading readme:   0%|          | 0.00/4.65k [00:00<?, ?B/s]

Downloading and preparing dataset csv/knkarthick--dialogsum to /root/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-cd36827d3490488d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/11.3M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.35M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/442k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-cd36827d3490488d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

## A sample of it

In [12]:
example_indices = [40]

dash_line = '-'.join('' for x in range(100))

for i, index in enumerate(example_indices):
    print(dash_line)
    print('Example ', i + 1)
    print(dash_line)
    print('INPUT DIALOGUE:')
    print(dataset['test'][index]['dialogue'])
    print(dash_line)
    print('BASELINE HUMAN SUMMARY:')
    print(dataset['test'][index]['summary'])
    print(dash_line)
    print()

---------------------------------------------------------------------------------------------------
Example  1
---------------------------------------------------------------------------------------------------
INPUT DIALOGUE:
#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
---------------------------------------------------------------------------------------------------



## Model and tokenizer

In [13]:
model_name='google/flan-t5-base'

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

## Predictions from model with sample data

In [14]:
for i, index in enumerate(example_indices):
    dialogue = dataset['test'][index]['dialogue']
    summary = dataset['test'][index]['summary']

    inputs = tokenizer(dialogue, return_tensors='pt')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0],
        skip_special_tokens=True
    )

    print(dash_line)
    print('Example ', i + 1)
    print(dash_line)
    print(f'INPUT PROMPT:\n{dialogue}')
    print(dash_line)
    print(f'BASELINE HUMAN SUMMARY:\n{summary}')
    print(dash_line)
    print(f'MODEL GENERATION - WITHOUT PROMPT ENGINEERING:\n{output}\n')

---------------------------------------------------------------------------------------------------
Example  1
---------------------------------------------------------------------------------------------------
INPUT PROMPT:
#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.
---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.
---------------------------------------------------------------------------------------------------
MODEL GENERATION - WITHOUT PROMPT ENGINEERING:
Person1: It's ten to nine.



## Zero shot Prompting on my data
### Data is taken from one of my summarization project

In [10]:
my_data = "prashanth: welcome to celebal tech ivr automation demo. hi how are you? manuj: i'm fine. prashanth: thank you for calling city health insurance my name is prashanth. how can i help you today? manuj: i have an health policy with you. i wanted to know do i need to renew the policy?\nprashanth: of course i can do that. could you please confirm few details before i give you the necessary information? could you please tell me your registered mobile number? manuj: yeah sure it's 9998887776. i can confirm that you have active health insurance policy with us. can you confirm your name and date of birth for authentication? manuj: sure my name is manuj and my date of birth is 20 june 1999. prashanth: thanks for confirmation. i can see that your policy is due for the renewal on 25th august 2021, which is in 2 weeks from now. you can renew either for one year or 2 year. the one year options is 2002 years option is 3690. manuj: i want to opt for 2 year option. prashanth: would you like me to a payment link of renewal for this option? manuj: yes, sure. prashanth: sir i will share the link with you shortly.\nmanuj: i have a doubt. will this policy cover my family as well?\nprashanth: yes, your current policy covers you, your spouse, and your parent.\nmanuj: okay, can you send me the link? payment link. prashanth: of course you will receive a payment link on your registered mobile number and email address. okay, thank you."

In [15]:
human_sumamry = "manuj wanted to know about the renewal of his policy. his mobile number is 9998887776, date of birth is 25th august 2021, prashanth has send him the renewal payemt link"

In [17]:

dialogue = my_data

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

# Input constructed prompt instead of the dialogue.
inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=50,
    )[0],
    skip_special_tokens=True
)

print(f'Summary written by me: {human_sumamry}')
print()
print()
print(f'MODEL GENERATION - ZERO SHOT:\n{output}\n')

Summary written by me: manuj wanted to know about the renewal of his policy. his mobile number is 9998887776, date of birth is 25th august 2021, prashanth has send him the renewal payemt link


MODEL GENERATION - ZERO SHOT:
Manuj has an active health insurance policy with celebal tech. He needs to renew it on 25th august 2021. He needs to pay for the renewal on 25th august 2021.



# 2. Computer vision

DeepSORT (Deep Simple Online and Realtime Tracking) is an algorithm designed for object tracking in computer vision applications. It combines deep learning techniques with traditional tracking algorithms to achieve high-quality object tracking in real-time or near real-time scenarios.

1. DeepSORT builds upon the SORT (Simple Online and Realtime Tracking) algorithm, enhancing its tracking capabilities.

2. It leverages a deep neural network for object detection to identify and track objects in video streams.

3. The neural network typically detects objects in each frame, producing bounding boxes and associated feature embeddings.

4. Feature embeddings capture unique characteristics of objects and enable matching objects across frames.

5. DeepSORT employs the Kalman filter for object motion prediction, allowing it to estimate an object's future position even when it's temporarily occluded.

6. To associate detections across frames, DeepSORT uses a combination of appearance similarity (feature embeddings) and spatial proximity.

7. It maintains a list of active tracks and uses a matching algorithm to assign detections to tracks.

8. DeepSORT also includes mechanisms to handle track management tasks such as birth, death, and update of tracks.

9. By fusing deep learning with traditional tracking techniques, DeepSORT achieves robust object tracking even in challenging scenarios with occlusions, temporary disappearances, and clutter.

10. DeepSORT is widely used in applications like surveillance, autonomous vehicles, and sports analytics, where real-time and accurate object tracking is crucial.