First we install the gpt4all module. This is the only module we need for creating embeddings with GPT 4.0

In [None]:
!pip install gpt4all

In [None]:
from google.colab import files
uploaded = files.upload()

Saving islamqa_10thousand_filtered.csv to islamqa_10thousand_filtered.csv


In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('islamqa_10thousand_filtered.csv', lineterminator="\n", encoding ="utf-8-sig")

In [None]:
df = df.drop(["URL"], axis=1)
# df.head()
df.columns = [x.replace("\r", "") for x in df.columns.to_list()]
df

Unnamed: 0,Title,Question,Answer
0,Interruption of Wudhu,"If during wudhu (ablution), one finds dirt stu...",Praise be to Allah. Continuity would not be di...
1,Wudu while wounded,What should a person do if one of the areas no...,Praise be to Allah.If a person is wounded in o...
2,Time of occurence of janaba unknown.,If a man sees the signs of sexual discharge im...,Praise be to Allah.If a man sees the signs of ...
3,Is Circumcision Necessary for Conversion?,A lawyer in Argentina is asking about the ruli...,Contents Related Male circumcision in Islam Is...
4,When Can You Pray After a Miscarriage?,If a woman has a miscarriage and has discharge...,Related Praise be to Allah.This case depends o...
...,...,...,...
1538,Ruling on reading Qur’aan over the grave of a ...,Some people in our village bring together some...,Praise be to Allah.This action is bid’ah (repr...
1539,His mother swore that she would never give him...,my mother swore to me she would not gove me an...,Praise be to Allah.There is nothing wrong with...
1540,Ruling on grave markers on which are written d...,Is it permissible to put a piece of metal or a...,Praise be to Allah.It is not permissible to wr...
1541,Male nurse being alone with female nurse in th...,I am a male nurse and my work is nursing men. ...,Praise be to Allah.It is not permissible for t...


As we already did with the ada embeddings, we will combine Title, Question and Asnwer columns to one single text and store it in the "combined" column.

In [None]:
df["combined"] = (
    "Title: " + df.Title.str.strip() + "; Question: " + df.Question.str.strip() + "; Answer: " + df.Answer.str.strip()
)

In [None]:
df["n_tokens"] = df.combined.apply(lambda x: len((x)))
df

Unnamed: 0,Title,Question,Answer,combined,n_tokens
0,Interruption of Wudhu,"If during wudhu (ablution), one finds dirt stu...",Praise be to Allah. Continuity would not be di...,Title: Interruption of Wudhu; Question: If dur...,862
1,Wudu while wounded,What should a person do if one of the areas no...,Praise be to Allah.If a person is wounded in o...,Title: Wudu while wounded; Question: What shou...,521
2,Time of occurence of janaba unknown.,If a man sees the signs of sexual discharge im...,Praise be to Allah.If a man sees the signs of ...,Title: Time of occurence of janaba unknown.; Q...,1978
3,Is Circumcision Necessary for Conversion?,A lawyer in Argentina is asking about the ruli...,Contents Related Male circumcision in Islam Is...,Title: Is Circumcision Necessary for Conversio...,2088
4,When Can You Pray After a Miscarriage?,If a woman has a miscarriage and has discharge...,Related Praise be to Allah.This case depends o...,Title: When Can You Pray After a Miscarriage?;...,2384
...,...,...,...,...,...
1538,Ruling on reading Qur’aan over the grave of a ...,Some people in our village bring together some...,Praise be to Allah.This action is bid’ah (repr...,Title: Ruling on reading Qur’aan over the grav...,4085
1539,His mother swore that she would never give him...,my mother swore to me she would not gove me an...,Praise be to Allah.There is nothing wrong with...,Title: His mother swore that she would never g...,2286
1540,Ruling on grave markers on which are written d...,Is it permissible to put a piece of metal or a...,Praise be to Allah.It is not permissible to wr...,Title: Ruling on grave markers on which are wr...,808
1541,Male nurse being alone with female nurse in th...,I am a male nurse and my work is nursing men. ...,Praise be to Allah.It is not permissible for t...,Title: Male nurse being alone with female nurs...,1908


In [None]:
from gpt4all import GPT4All, Embed4All
embedder = Embed4All()

100%|██████████| 45.5M/45.5M [00:00<00:00, 216MiB/s]


Model downloaded at:  /root/.cache/gpt4all/ggml-all-MiniLM-L6-v2-f16.bin


In [None]:
combined_list = list(df['combined'])

Now we create the embeddings for each text; below, we check first if it works for one example.

In [None]:
embeddings_test = embedder.embed(combined_list[0])
print(embeddings_test)

[-0.09791091084480286, 0.02941736951470375, 0.093024343252182, 0.0675569549202919, -0.029885396361351013, -0.058003153651952744, 0.04409881308674812, -0.07603773474693298, -0.05661290884017944, -0.11499246209859848, 0.0771193727850914, -0.027969615533947945, 0.002034258795902133, 0.061026789247989655, -0.11058900505304337, 0.007830442860722542, -0.044977523386478424, 0.012801693752408028, -0.07281006872653961, -0.014067874290049076, 0.04484245553612709, 0.04517330229282379, 0.06342434138059616, -0.05028580501675606, -0.03763023763895035, 0.055493250489234924, -0.02827422320842743, -0.04867742955684662, 0.04429234191775322, -0.006610937416553497, -0.004176123067736626, -0.0030711903236806393, -0.11158603429794312, -0.08361254632472992, 0.008636497892439365, 0.06193987652659416, -0.04145412519574165, 0.033742789179086685, 0.0427386574447155, 0.019892597571015358, 0.07564804702997208, -0.023340115323662758, -0.027308810502290726, -0.004060585517436266, 0.01787879876792431, 0.0624694488942

In [None]:
embeddings_list = [embedder.embed(text) for text in combined_list]

In [None]:
df['embedding'] = embeddings_list

In [None]:
df

Unnamed: 0,Title,Question,Answer,combined,n_tokens,embedding
0,Interruption of Wudhu,"If during wudhu (ablution), one finds dirt stu...",Praise be to Allah. Continuity would not be di...,Title: Interruption of Wudhu; Question: If dur...,862,"[-0.09791091084480286, 0.02941736951470375, 0...."
1,Wudu while wounded,What should a person do if one of the areas no...,Praise be to Allah.If a person is wounded in o...,Title: Wudu while wounded; Question: What shou...,521,"[-0.013454749248921871, 0.04537041112780571, 0..."
2,Time of occurence of janaba unknown.,If a man sees the signs of sexual discharge im...,Praise be to Allah.If a man sees the signs of ...,Title: Time of occurence of janaba unknown.; Q...,1978,"[-0.12529215216636658, 0.0703914687037468, 0.0..."
3,Is Circumcision Necessary for Conversion?,A lawyer in Argentina is asking about the ruli...,Contents Related Male circumcision in Islam Is...,Title: Is Circumcision Necessary for Conversio...,2088,"[-0.02148943580687046, 0.09738367050886154, -0..."
4,When Can You Pray After a Miscarriage?,If a woman has a miscarriage and has discharge...,Related Praise be to Allah.This case depends o...,Title: When Can You Pray After a Miscarriage?;...,2384,"[-0.051056697964668274, -0.03960786014795303, ..."
...,...,...,...,...,...,...
1538,Ruling on reading Qur’aan over the grave of a ...,Some people in our village bring together some...,Praise be to Allah.This action is bid’ah (repr...,Title: Ruling on reading Qur’aan over the grav...,4085,"[-0.04256254434585571, 0.13452360033988953, -0..."
1539,His mother swore that she would never give him...,my mother swore to me she would not gove me an...,Praise be to Allah.There is nothing wrong with...,Title: His mother swore that she would never g...,2286,"[-0.026030894368886948, 0.14860595762729645, 0..."
1540,Ruling on grave markers on which are written d...,Is it permissible to put a piece of metal or a...,Praise be to Allah.It is not permissible to wr...,Title: Ruling on grave markers on which are wr...,808,"[-0.05203848332166672, 0.14309021830558777, -0..."
1541,Male nurse being alone with female nurse in th...,I am a male nurse and my work is nursing men. ...,Praise be to Allah.It is not permissible for t...,Title: Male nurse being alone with female nurs...,1908,"[-0.11899266391992569, 0.03627452999353409, -0..."


In [None]:
print(len(embeddings_list))

1543


In [None]:
df.to_csv("islamqa_10thousand_filtered_with_gpt4all_embeddings.csv", encoding="utf-8-sig")