## Format Dataset V3

In [1]:
import pandas as pd

In [2]:
df_old = pd.read_csv("./v2_fine_tune.csv")

In [3]:
df_old.shape

(10693, 5)

In [4]:
df_old.columns

Index(['category', 'instruction', 'context', 'question', 'response'], dtype='object')

In [5]:
df_old.sample(10)

Unnamed: 0,category,instruction,context,question,response
4787,translate,Translate the given text from English language...,,The children loved visiting their grandparents...,Ketaketi haru afno hajurbuwa hajurama ko ghar ...
166,open_qa,You are a helpful assistant. Answer the given ...,,Nepal ko rajdhani ko naam ke ho?,Nepal ko rajdhani Kathmandu ho.
807,translate,Translate the text below from English to Nepali:,,"Today I felt like eating momo, but it’s a bit ...","Aajai momo khana man lagyo, tara ghar ma banun..."
7971,RAG,Answer the following question by looking at th...,Nepal ko ek prasiddha sangit gatividhi 'Nepali...,Nepal ko ek prasiddha sangit gatividhi k ho?,"Nepali Lok Geet, Nepal ko ek prasiddha sangit ..."
4024,RAG,Answer the following question by looking at th...,"SmartHub Electronics, Biratnagar-5, Mahendra R...",Refund approve bhaye pachi kati din ma process...,Refund approve bhaye pachi 7 din bhitra proces...
3338,translate,Translate the text below from English to Nepali:,,Music gives voice to human emotions and can br...,Sangeet le manushya ko bhawana haru lai awaz d...
8340,translate,Translate the text below from Nepali to English:,,Football ek yasto sport ho jasle competition m...,Football is a sport that thrives on competitio...
3807,RAG,Answer the following question by looking at th...,"Dharan-14, Bhanu Chowk ma TechWorld Electronic...",Eco-friendly packaging ma k use garincha?,Recyclable materials.
318,translate,Translate the text below from English to Nepali:,,The education sector in Nepal is facing challe...,Nepal ko education sector maa chunautiharu chh...
5258,translate,Translate the text below from Nepali to English:,,Tapailai le malai recommend gareko kitab ekdam...,The book you recommended to me was absolutely ...


## New Dataset

In [9]:
df_new = pd.read_csv("./version_3.csv")

In [10]:
df_new.shape

(5921, 4)

In [11]:
df_new.columns

Index(['category', 'context', 'question', 'response'], dtype='object')

In [24]:
df_new.sample(5)

Unnamed: 0,category,context,question,response
3101,alpaca,"No, we require a valid reason for all returns,...",Store ma product ko return without reason milcha?,"Hajur, reason bina product return garna mildai..."
4453,alpaca,,What is the concept of the carbon footprint? E...,Carbon footprint bhaneko measure ho jasma huma...
3590,alpaca,Yo laptop ko RAM upgrade garna sakincha?,Classify whether this query is related to ecom...,Ecommerce
2794,alpaca,Serena Williams has won 23 Grand Slam singles ...,Serena Williams le kati Grand Slam titles jite...,Serena Williams le afno career ma 23 Grand Sla...
5805,nep-eng,,momo ko recipe sikna maile YouTube video herne...,"To learn the momo recipe, I am thinking of wat..."


In [18]:
df_new["category"].unique()

array(['alpaca', 'open_qa', 'RAG', 'nep-eng'], dtype=object)

In [21]:
def generate_instruction(category):
    if category == "nep-eng":
        return "Translate the given text in Nepali to English."
    elif category == "open_qa":
        return "You are a helpful AI assistant. Answer the user question:"
    elif category == "RAG":
        return "Answer the question using the context if it is helpful to answer the question."
    else:
        return ""

In [25]:
df_new["instruction"] = df_new["category"].apply(generate_instruction)

In [29]:
df_new.sample(15)

Unnamed: 0,category,context,question,response,instruction
5337,RAG,Hamro store ma sabai payment methods ma transp...,Online payment ma additional charge lagcha?,Kahi payment gateways ma additional charge lag...,Answer the question using the context if it is...
550,alpaca,,Nepal ma famous food ko naam lekha ani explain...,"Momo Nepal ma ekdam famous food ho, jun steame...",
4282,alpaca,,Describe the concept of robotic surgery in Rom...,Robotic surgery bhaneko medical procedure ho j...,
534,alpaca,,Ek line ma describe gara: 'Nepal ko political ...,Nepal ko political history monarchy bata democ...,
247,alpaca,"I am fine, thank you.",Tapailai kasto xa? Tapai le yo text translate ...,"Ma thik chu, dhanyabad.",
5133,RAG,"Product: LG Single Door Refrigerator, Price: 2...",LG refrigerator ko barema aru jankari dinus.,LG Single Door Refrigerator ko barema jankari ...,Answer the question using the context if it is...
2536,alpaca,,Yo context bata answer deu: 'Nepal ko jamma po...,Nepal ma Kathmandu bahek 2 crore 80 lakh manch...,
2628,alpaca,,Yo context bata answer deu: 'Nepal ma 7 ota pr...,Province 4 bahek 6 ota province ma thorai jana...,
5899,nep-eng,,music playlist maa naya trending songs add gar...,I added new trending songs to my music playlis...,Translate the given text in Nepali to English.
17,alpaca,20 ma 15 thapnu ani 10 ghataunu.,Yo problem ko solution step-wise lekha gara.,Step 1: 20 ma 15 thapnu = 35\nStep 2: 35 ma 10...,


## Merge dataset

In [30]:
df_combined_v2_v3 = pd.concat([df_old, df_new], ignore_index=True)

In [31]:
df_combined_v2_v3.shape

(16614, 5)

In [34]:
df_combined_v2_v3["category"] = df_combined_v2_v3["category"].apply(lambda x: "translate" if x == "nep-eng" else x)

In [35]:
df_combined_v2_v3["category"].value_counts()

category
RAG          5225
alpaca       4498
translate    3837
open_qa      2831
summary       223
Name: count, dtype: int64

In [36]:
df_combined_v2_v3.shape

(16614, 5)

In [37]:
df_combined_v2_v3.sample(15)

Unnamed: 0,category,instruction,context,question,response
7081,RAG,Answer the following question by looking at th...,Nepal ko sabai bhanda thulo mantralaya Ministr...,Nepal ko sabai bhanda thulo mantralaya k ho?,"Ministry of Home Affairs, Nepal ko sabai bhand..."
9852,open_qa,You are a helpful assistant. Answer the given ...,,Ek prem katha ko suruwat lekha.,"Ek din, ek ujyalai bhari bhaeko sahar ma, Aara..."
5146,RAG,Answer the following question by looking at th...,"Hajur, hamro store ma free Wi-Fi available chh...",Store ma Wi-Fi available chha?,"Hajur, free Wi-Fi available chha. Staff le pas..."
15579,open_qa,You are a helpful AI assistant. Answer the use...,,World ko sabai bhanda popular TV show kun ho r...,World ko sabai bhanda popular TV show 'Game of...
8225,RAG,Answer the following question by looking at th...,"Hamro Pasal ma smartphone, laptop, camera, ra ...",Electronics item ko delivery charge kati ho?,"Kathmandu upatyaka vitra free delivery huncha,..."
603,open_qa,You are a helpful assistant. Answer the given ...,I enjoy reading books during my free time.,Nepali ma yo English line ko anuwaad garnuhos.,Ma mero fursad ko samaya ma kitab padhna man p...
15364,open_qa,You are a helpful AI assistant. Answer the use...,,Nepal ma kati wota district court chha?,Nepal ma 77 wota district court chha.
6334,open_qa,You are a helpful assistant. Answer the given ...,Community forest ko role batainus.,Nepal ma forest conservation ko lagi kasari co...,"Local community forest groups ma join garnus, ..."
5041,open_qa,You are a helpful assistant. Answer the given ...,,Nepali ma 'Financial Planning' ko arth bataunu...,'Financial Planning' ko arth Nepali ma 'Aarthi...
8245,open_qa,You are a helpful assistant. Answer the given ...,Maile patrika padhe.,Yo sentence ko passive voice ma anuwaad garnuhos.,Patrika padhiyo.


## Shuffle and save

In [38]:
df_combine_shuffled = df_combined_v2_v3.sample(frac=1).reset_index(drop=True)

In [39]:
df_combine_shuffled.shape[0]

16614

In [40]:
df_combine_shuffled.head(5)

Unnamed: 0,category,instruction,context,question,response
0,RAG,Answer the following question by looking at th...,"Hamro platform ma NIC Asia, Nabil Bank, Global...",mobile banking ko kun kun bank apps support hu...,"NIC Asia, Nabil, Global IME, ra anya major ban..."
1,alpaca,,,Nepali ko auta old belief ko example lekha.,Dashain ko tika lagda ajako auspicious time ma...
2,alpaca,,,Euta short story lekha about a magical stone.,Euta sano keti le euta magical stone payo. Usl...
3,alpaca,,,What is the concept of bio-inspired robotics? ...,Bio-inspired robotics bhaneko field ho jasma r...
4,RAG,Answer the following question by looking at th...,"Accepted Payment Methods: Credit/Debit Card, E...",Payment garna ko lagi payment dispute resolve ...,"Hajur, payment dispute resolve garna 01-234567..."


In [41]:
df_combine_shuffled.tail(5)

Unnamed: 0,category,instruction,context,question,response
16609,RAG,Answer the following question by looking at th...,Tech Store Nepal le Dashain ma 20% discount ch...,Dashain ma kati discount cha mobile phones ra ...,Dashain ma 20% discount cha mobile phones ra e...
16610,RAG,Answer the question using the context if it is...,Hamro store ma seasonal discounts chalirako ch...,Kun payment methods support garchha?,"Hamro store ma eSewa, Khalti, debit card, ra c..."
16611,alpaca,,,What is the Internet of Things (IoT)? Explain ...,Internet of Things (IoT) bhaneko ek network ho...
16612,open_qa,You are a helpful AI assistant. Answer the use...,,Nepal ko sabai bhanda popular singer ko hun?,Nepal ko sabai bhanda popular singer ma Naraya...
16613,translate,Translate the text below from English to Nepali:,,The benefits of a good night’s sleep include i...,Ramro nindra ko fayeda ma gyanatmak karya sudh...


In [42]:
df_combine_shuffled.iloc[10000:13000:500]

Unnamed: 0,category,instruction,context,question,response
10000,translate,Translate the given text from English language...,,"We spent the afternoon at the zoo, watching th...","Hami zoo ma beluka bitayo, janawar haru herda ..."
10500,alpaca,,Timi kahile aaudai chau?,Yo sentence ma vayeko action classify gara (Re...,Question
11000,translate,Translate the text below from English to Nepali:,,Nepali movies are gaining popularity both loca...,Nepali movies le locally ra internationally do...
11500,open_qa,You are a helpful assistant. Answer the given ...,,Hi e-commerce,Namaste! Ma e-commerce assistant ho. Aba ke ch...
12000,RAG,Answer the following question by looking at th...,"Product List: Redmi Note 9 Pro, Price: Rs. 200...",iPhone ko price kati ho?,iPhone ko price Rs. 100000 ho.
12500,RAG,Answer the following question by looking at th...,"Damak Tech Store, Bus Park ma cha. Hamile smar...",Tapai ko store ma 25 hazar bhitra ko kun kun m...,"Damak Tech Store ma Samsung Galaxy M12, Realme..."


In [46]:
df_combine_shuffled.to_csv("./fine_tune_v3.csv", index=False)

## Test the dataset

In [47]:
load_csv = pd.read_csv("./fine_tune_v3.csv")

In [48]:
load_csv.columns

Index(['category', 'instruction', 'context', 'question', 'response'], dtype='object')

In [49]:
load_csv.shape

(16614, 5)