# Using Zero shot and Few shot Learning on ChatGroq models

In [103]:
import pandas as pd 
from langchain_groq.chat_models import ChatGroq
import csv
import numpy as np

In [87]:
# Groq API and Models 
Groq_Token = "API_Key_here"

groq_models = {"llama3-70b": "llama3-70b-8192", "mixtral": "mixtral-8x7b-32768", "gemma-7b": "gemma-7b-it","llama3.1-70b":"llama-3.1-70b-versatile","llama3-8b":"llama3-8b-8192","llama3.1-8b":"llama-3.1-8b-instant","gemma-9b":"gemma2-9b-it"}

# Zero Shot Learning

In [88]:
features_row = 0
with open('TestDataset_561.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    rows = list(reader)

if features_row < len(rows):
    row = rows[features_row][:-2]
    features_string = ', '.join(row)
else:
    print(f"Error in features reading")

test_rows = [10, 50, 70, 90, 120, 150] #STANDING, SITTING, LAYING, WALKING, WALKING_DOWNSTAIRS, WALKING_UPSTAIRS
test_rows_string = []
for test_rown in test_rows:
    with open('TestDataset_561.csv', 'r') as csv_file:
        reader = csv.reader(csv_file)
        rows = list(reader)

    if test_rown < len(rows):
        row = rows[test_rown][:-2]
        test_rown_string = ', '.join(row)
        test_rows_string.append(test_rown_string)
    else:
        print(f"Error in test row reading")

query = f"""
* You are a activity recognition model. 
* Your task is to analyze the given data in form of certain features and classify the activity as 'LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS' or 'WALKING_UPSTAIRS'. 
* The data is normalized between [-1, 1]
* Provide the activity label and, if necessary, a brief explanation of your reasoning.
* The complete list of features is as follows:
{features_string}


Analyze the following data and predict the activities:
1. {test_rows_string[0]}
2. {test_rows_string[1]}
3. {test_rows_string[2]}
4. {test_rows_string[3]}
5. {test_rows_string[4]}
6. {test_rows_string[5]}
""" 

#Only llama3.1-8b and llama3.1-70b accepted such high token queries
#Both have been used to show a generalized result
model_name = "llama3.1-8b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("Answer from llama3.1-8b: ")
print(answer.content)

model_name = "llama3.1-70b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("\n Answer from llama3.1-70b: ")
print(answer.content)

Answer from llama3.1-8b: 
Based on the given data, I will analyze each data point and predict the activity.

**Data Point 1:**
1.  2.7574570e-001, -1.0371994e-002, -9.9775890e-002, -9.9837313e-001, -9.8693291e-001, -9.9102190e-001, -9.9866291e-001, -9.8713965e-001, -9.9108432e-001, -9.4376125e-001, -5.6428961e-001, -8.1432563e-001,  8.4975268e-001,  6.8922983e-001,  8.4737582e-001, -9.9110378e-001, -9.9999154e-001, -9.9983492e-001, -9.9973485e-001, -9.9859886e-001, -9.8980896e-001, -9.8928972e-001, -7.6858336e-001, -4.3323793e-001, -3.9328675e-001,  3.9409003e-001, -1.7537723e-001,  1.3718364e-001,  1.8434982e-001,  8.3437153e-002, -2.1274865e-002,  3.8584767e-002, -6.5816959e-002,  4.2912522e-001, -2.7979818e-001,  1.5752764e-001, -5.2535204e-002,  1.2899289e-002, -7.8381560e-002,  2.5741515e-001,  9.2417336e-001, -3.1759660e-001,  1.2458390e-001, -9.9800172e-001, -9.8939331e-001, -9.9196835e-001, -9.9791812e-001, -9.8990488e-001, -9.9261060e-001,  8.5020163e-001, -3.3207944e-001,  1.

# Few Shot Learning

In [91]:
features_row = 0
with open('TestDataset_561.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    rows = list(reader)

if features_row < len(rows):
    row = rows[features_row][:-2]
    features_string = ', '.join(row)
else:
    print(f"Error in features reading")

#This data will be given as examples for the LLM to learn and give better predictions
train_rows = [[10, 190], [730, 890], [1380, 1540], [1990, 2160], [2670, 2840], [2410, 3590]]
train_labels = ["STANDING", "SITTING", "LAYING", "WALKING", "WALKING_DOWNSTAIRS", "WALKING_UPSTAIRS"]
train_string = ""
for i in range(len(train_rows)):
    with open('Dataset_561.csv', 'r') as csv_file:
        reader = csv.reader(csv_file)
        rows = list(reader)
    for rown in train_rows[i]:
        if (rown < len(rows)):
            row = rows[rown][:-2]
            train_rown_string = ', '.join(row)
            train_string = train_string + '\n' + train_rown_string + ': ' + train_labels[i]
        else:
            print("Error in train row reading")

test_rows = [10, 50, 70, 90, 120, 150] #STANDING, SITTING, LAYING, WALKING, WALKING_DOWNSTAIRS, WALKING_UPSTAIRS
test_rows_string = []
for test_rown in test_rows:
    with open('TestDataset_561.csv', 'r') as csv_file:
        reader = csv.reader(csv_file)
        rows = list(reader)

    if test_rown < len(rows):
        row = rows[test_rown][:-2]
        test_rown_string = ', '.join(row)
        test_rows_string.append(test_rown_string)
    else:
        print(f"Error in test row reading")

query = f"""
* You are a activity recognition model. 
* Your task is to analyze the given data in form of certain features and classify the activity as 'LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS' or 'WALKING_UPSTAIRS'. 
* The data is normalized between [-1, 1]
* Provide the activity label and, if necessary, a brief explanation of your reasoning.
* The complete list of features is as follows:
{features_string}

A few examples are as follows:
{train_string}

Analyze the following data and predict the activities:
1. {test_rows_string[0]}
2. {test_rows_string[1]}
3. {test_rows_string[2]}
4. {test_rows_string[3]}
5. {test_rows_string[4]}
6. {test_rows_string[5]}
""" 
model_name = "llama3.1-8b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("Answer from llama3.1-8b: ")
print(answer.content)

model_name = "llama3.1-70b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("\n Answer from llama3.1-70b: ")
print(answer.content)

Answer from llama3.1-8b: 
Based on the provided data, the activity labels are:

1.  2.7574570e-001, -1.0371994e-002, -9.9775890e-002, -9.9837313e-001, -9.8693291e-001, -9.9102190e-001, -9.9866291e-001, -9.8713965e-001, -9.9108432e-001, -9.4376125e-001, -5.6428961e-001, -8.1432563e-001,  8.4975268e-001,  6.8922983e-001,  8.4737582e-001, -9.9110378e-001, -9.9999154e-001, -9.9983492e-001, -9.9973485e-001, -9.9859886e-001, -9.8980896e-001, -9.8928972e-001, -7.6858336e-001, -4.3323793e-001, -3.9328675e-001,  3.9409003e-001, -1.7537723e-001,  1.3718364e-001,  1.8434982e-001,  8.3437153e-002, -2.1274865e-002,  3.8584767e-002, -6.5816959e-002,  4.2912522e-001, -2.7979818e-001,  1.5752764e-001, -5.2535204e-002,  1.2899289e-002, -7.8381560e-002,  2.5741515e-001,  9.2417336e-001, -3.1759660e-001,  1.2458390e-001, -9.9800172e-001, -9.8939331e-001, -9.9196835e-001, -9.9791812e-001, -9.8990488e-001, -9.9261060e-001,  8.5020163e-001, -3.3207944e-001,  1.1805617e-001,  9.4442809e-001, -2.8847057e-001,

In [92]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

In [99]:
y_test = pd.DataFrame(['STANDING', 'SITTING', 'LAYING', 'WALKING', 'WALKING_DOWNSTAIRS', 'WALKING_UPSTAIRS'])
y_pred = pd.DataFrame(['STANDING', 'STANDING', 'SITTING', 'SITTING', 'WALKING_UPSTAIRS', 'WALKING_UPSTAIRS'])

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')

Accuracy: 0.33
Precision: 0.17
Recall: 0.33


Testing to see what the model classifies activities which it has never seen before.<br>
We removed STANDING, SITTING AND LAYING. We then test all activities to see the results

In [101]:
features_row = 0
with open('TestDataset_561.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    rows = list(reader)

if features_row < len(rows):
    row = rows[features_row][:-2]
    features_string = ', '.join(row)
else:
    print(f"Error in features reading")

test_rows = [10, 50, 70, 90, 120, 150] #STANDING, SITTING, LAYING, WALKING, WALKING_DOWNSTAIRS, WALKING_UPSTAIRS
test_rows_string = []
for test_rown in test_rows:
    with open('TestDataset_561.csv', 'r') as csv_file:
        reader = csv.reader(csv_file)
        rows = list(reader)

    if test_rown < len(rows):
        row = rows[test_rown][:-2]
        test_rown_string = ', '.join(row)
        test_rows_string.append(test_rown_string)
    else:
        print(f"Error in test row reading")

query = f"""
* You are a activity recognition model. 
* Your task is to analyze the given data in form of certain features and classify the activity as 'WALKING', 'WALKING_DOWNSTAIRS' or 'WALKING_UPSTAIRS'. 
* The data is normalized between [-1, 1]
* Provide the activity label and, if necessary, a brief explanation of your reasoning.
* The complete list of features is as follows:
{features_string}


Analyze the following data and predict the activities:
1. {test_rows_string[0]}
2. {test_rows_string[1]}
3. {test_rows_string[2]}
4. {test_rows_string[3]}
5. {test_rows_string[4]}
6. {test_rows_string[5]}
""" 

model_name = "llama3.1-8b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("Answer from llama3.1-8b: ")
print(answer.content)

model_name = "llama3.1-70b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("\n Answer from llama3.1-70b: ")
print(answer.content)

Answer from llama3.1-8b: 
Based on the given data, I will analyze each data point and predict the activity as 'WALKING', 'WALKING_DOWNSTAIRS' or 'WALKING_UPSTAIRS'.

**Data Point 1:**
1.  2.7574570e-001, -1.0371994e-002, -9.9775890e-002, -9.9837313e-001, -9.8693291e-001, -9.9102190e-001, -9.9866291e-001, -9.8713965e-001, -9.9108432e-001, -9.4376125e-001, -5.6428961e-001, -8.1432563e-001,  8.4975268e-001,  6.8922983e-001,  8.4737582e-001, -9.9110378e-001, -9.9999154e-001, -9.9983492e-001, -9.9973485e-001, -9.9859886e-001, -9.8980896e-001, -9.8928972e-001, -7.6858336e-001, -4.3323793e-001, -3.9328675e-001,  3.9409003e-001, -1.7537723e-001,  1.3718364e-001,  1.8434982e-001,  8.3437153e-002, -2.1274865e-002,  3.8584767e-002, -6.5816959e-002,  4.2912522e-001, -2.7979818e-001,  1.5752764e-001, -5.2535204e-002,  1.2899289e-002, -7.8381560e-002,  2.5741515e-001,  9.2417336e-001, -3.1759660e-001,  1.2458390e-001, -9.9800172e-001, -9.8939331e-001, -9.9196835e-001, -9.9791812e-001, -9.8990488e-00

In [102]:
features_row = 0
with open('TestDataset_561.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    rows = list(reader)

if features_row < len(rows):
    row = rows[features_row][:-2]
    features_string = ', '.join(row)
else:
    print(f"Error in features reading")

#This data will be given as examples for the LLM to learn and give better predictions
train_rows = [[1990, 2160], [2670, 2840], [2410, 3590]]
train_labels = ["WALKING", "WALKING_DOWNSTAIRS", "WALKING_UPSTAIRS"]
train_string = ""
for i in range(len(train_rows)):
    with open('Dataset_561.csv', 'r') as csv_file:
        reader = csv.reader(csv_file)
        rows = list(reader)
    for rown in train_rows[i]:
        if (rown < len(rows)):
            row = rows[rown][:-2]
            train_rown_string = ', '.join(row)
            train_string = train_string + '\n' + train_rown_string + ': ' + train_labels[i]
        else:
            print("Error in train row reading")

test_rows = [10, 50, 70, 90, 120, 150] #STANDING, SITTING, LAYING, WALKING, WALKING_DOWNSTAIRS, WALKING_UPSTAIRS
test_rows_string = []
for test_rown in test_rows:
    with open('TestDataset_561.csv', 'r') as csv_file:
        reader = csv.reader(csv_file)
        rows = list(reader)

    if test_rown < len(rows):
        row = rows[test_rown][:-2]
        test_rown_string = ', '.join(row)
        test_rows_string.append(test_rown_string)
    else:
        print(f"Error in test row reading")

query = f"""
* You are a activity recognition model. 
* Your task is to analyze the given data in form of certain features and classify the activity as 'WALKING', 'WALKING_DOWNSTAIRS' or 'WALKING_UPSTAIRS'. 
* The data is normalized between [-1, 1]
* Provide the activity label and, if necessary, a brief explanation of your reasoning.
* The complete list of features is as follows:
{features_string}

A few examples are as follows:
{train_string}

Analyze the following data and predict the activities:
1. {test_rows_string[0]}
2. {test_rows_string[1]}
3. {test_rows_string[2]}
4. {test_rows_string[3]}
5. {test_rows_string[4]}
6. {test_rows_string[5]}
""" 
model_name = "llama3.1-8b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("Answer from llama3.1-8b: ")
print(answer.content)

model_name = "llama3.1-70b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("\n Answer from llama3.1-70b: ")
print(answer.content)

Answer from llama3.1-8b: 
Based on the provided data, I will analyze the activities and predict the labels.

**Data 1:**
2.7574570e-001, -1.0371994e-002, -9.9775890e-002, -9.9837313e-001, -9.8693291e-001, -9.9102190e-001, -9.9866291e-001, -9.8713965e-001, -9.9108432e-001, -9.4376125e-001, -5.6428961e-001, -8.1432563e-001,  8.4975268e-001,  6.8922983e-001,  8.4737582e-001, -9.9110378e-001, -9.9999154e-001, -9.9983492e-001, -9.9973485e-001, -9.9859886e-001, -9.8980896e-001, -9.8928972e-001, -7.6858336e-001, -4.3323793e-001, -3.9328675e-001,  3.9409003e-001, -1.7537723e-001,  1.3718364e-001,  1.8434982e-001,  8.3437153e-002, -2.1274865e-002,  3.8584767e-002, -6.5816959e-002,  4.2912522e-001, -2.7979818e-001,  1.5752764e-001, -5.2535204e-002,  1.2899289e-002, -7.8381560e-002,  2.5741515e-001,  9.2417336e-001, -3.1759660e-001,  1.2458390e-001, -9.9800172e-001, -9.8939331e-001, -9.9196835e-001, -9.9791812e-001, -9.8990488e-001, -9.9261060e-001,  8.5020163e-001, -3.3207944e-001,  1.1805617e-0

Testing the model with random data

In [112]:
random_data = 2 * np.random.rand(4, 561) - 1
test_rows_string = [np.array2string(random_data[0]), np.array2string(random_data[1]), np.array2string(random_data[2]), np.array2string(random_data[3])]

query = f"""
* You are a activity recognition model. 
* Your task is to analyze the given data in form of certain features and classify the activity as 'LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS' or 'WALKING_UPSTAIRS'. 
* The data is normalized between [-1, 1]
* Provide the activity label and, if necessary, a brief explanation of your reasoning.
* The complete list of features is as follows:
{features_string}


Analyze the following data and predict the activities:
1. {test_rows_string[0]}
2. {test_rows_string[1]}
3. {test_rows_string[2]}
4. {test_rows_string[3]}
""" 

model_name = "llama3.1-8b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("Answer from llama3.1-8b: ")
print(answer.content)

model_name = "llama3.1-70b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("\n Answer from llama3.1-70b: ")
print(answer.content)

Answer from llama3.1-8b: 
Based on the given data, I will analyze and predict the activities.

**Data 1:**
The data is:
[ 0.59123672  0.35350348 -0.77343102 -0.50677985  0.74054349  0.07768547
  0.0089077  -0.29573961  0.40446002  0.61890575  0.17020325  0.35742974
 -0.36427093  0.96938591 -0.16340231  0.42793073 -0.95093103  0.92171122
  0.93168836  0.91287292 -0.60621623 -0.67451401 -0.45376676 -0.16655504
  0.58293793 -0.62396803 -0.50036729 -0.50149461  0.16017251  0.20727455
 -0.61359754  0.23671121 -0.48184388  0.43946153  0.98322993  0.6280201
  0.28271208  0.50726997  0.97818728 -0.40108369  0.19267865 -0.84991435
 -0.38863946  0.17465254  0.9551787  -0.84605211  0.4778368   0.65142686
  0.892016   -0.90817933 -0.85114402  0.25970905  0.02111102  0.15016561
 -0.36357625  0.70385034 -0.51774498  0.28889535 -0.16313127  0.94631692
 -0.2604338  -0.25377385  0.00355832 -0.47267617 -0.86381615  0.79452095
 -0.49483778 -0.08845178  0.38745728  0.69506822 -0.65083075  0.27295377
  0.8

In [113]:
features_row = 0
with open('TestDataset_561.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    rows = list(reader)

if features_row < len(rows):
    row = rows[features_row][:-2]
    features_string = ', '.join(row)
else:
    print(f"Error in features reading")

#This data will be given as examples for the LLM to learn and give better predictions
train_rows = [[10, 190], [730, 890], [1380, 1540], [1990, 2160], [2670, 2840], [2410, 3590]]
train_labels = ["STANDING", "SITTING", "LAYING", "WALKING", "WALKING_DOWNSTAIRS", "WALKING_UPSTAIRS"]
train_string = ""
for i in range(len(train_rows)):
    with open('Dataset_561.csv', 'r') as csv_file:
        reader = csv.reader(csv_file)
        rows = list(reader)
    for rown in train_rows[i]:
        if (rown < len(rows)):
            row = rows[rown][:-2]
            train_rown_string = ', '.join(row)
            train_string = train_string + '\n' + train_rown_string + ': ' + train_labels[i]
        else:
            print("Error in train row reading")

random_data = 2 * np.random.rand(4, 561) - 1
test_rows_string = [np.array2string(random_data[0]), np.array2string(random_data[1]), np.array2string(random_data[2]), np.array2string(random_data[3])]

query = f"""
* You are a activity recognition model. 
* Your task is to analyze the given data in form of certain features and classify the activity as 'LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS' or 'WALKING_UPSTAIRS'. 
* The data is normalized between [-1, 1]
* Provide the activity label and, if necessary, a brief explanation of your reasoning.
* The complete list of features is as follows:
{features_string}

A few examples are as follows:
{train_string}

Analyze the following data and predict the activities:
1. {test_rows_string[0]}
2. {test_rows_string[1]}
3. {test_rows_string[2]}
4. {test_rows_string[3]}
""" 
model_name = "llama3.1-8b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("Answer from llama3.1-8b: ")
print(answer.content)

model_name = "llama3.1-70b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print("\n Answer from llama3.1-70b: ")
print(answer.content)

Answer from llama3.1-8b: 
Based on the provided data, I will analyze the activities and predict the activity labels.

**Data Analysis**

The data consists of 4 samples, each with 57 features. The features are normalized between [-1, 1].

**Sample 1**

The first sample has the following features:

[ 4.67549950e-01  5.61899778e-01 -2.71289076e-02  5.46914863e-01
 -2.45753590e-01  2.29554869e-01  4.06716427e-01  8.23122245e-02
 -9.31387187e-01  5.43764145e-01  5.05980141e-01 -8.19012118e-01
  9.27773940e-01  4.27866485e-01 -6.82776227e-01 -3.94272035e-01
  5.36000781e-01 -7.43886822e-01  6.65786241e-01 -3.69816089e-01
  9.61358382e-01  2.06597684e-01  1.66911367e-01 -2.34754597e-01
  8.44517158e-01  9.04722565e-01  7.69189193e-01  7.95653240e-01
 -8.20903503e-01  7.67519603e-01 -6.36039072e-01  9.68452488e-01
  9.91518770e-02  9.46017471e-01 -4.16473075e-01 -4.38125188e-01
 -4.32167882e-01 -6.42348029e-01  7.08524493e-01  8.36620046e-01
 -8.50750221e-01 -9.05911910e-01  5.03728837e-01  1.