In [1]:
from hubconf import *

In [2]:
pipeline = prediction_pipeline()

Loading model PromptSentinel-Unbalanced-Paraphrase-v1...
Model PromptSentinel-Unbalanced-Paraphrase-v1 loaded successfully.
Creating prediction pipeline...
Pipeline created successfully. Use PredictionPipeline.predict(text) to classify prompts.


In [3]:
print(pipeline.predict("Well"))
print(pipeline.get_embedding_from_text("Well"))

conversation
[-0.18283820152282715, 0.046302393078804016, 0.0003776084631681442, -0.07285583019256592, 0.03111615963280201, -0.10735119879245758, -0.15005919337272644, -0.22141163051128387, 0.07309241592884064, -0.13785530626773834, 0.027739213779568672, -0.05461377650499344, -0.09083621203899384, -0.13999108970165253, -0.03637378662824631, 0.16851861774921417, -0.09791877865791321, -0.02986232191324234, 0.026982303708791733, -0.016844414174556732, 0.10207368433475494, 0.16582974791526794, 0.0800141841173172, -0.23055590689182281, 0.012827914208173752, 0.053679801523685455, -0.1546505093574524, 0.10559729486703873, -0.13099002838134766, -0.15658363699913025, -0.009016681462526321, -0.08525823056697845, -0.12824222445487976, -0.09396794438362122, 0.11881639063358307, -0.09335163235664368, 0.10893076658248901, -0.21793287992477417, 0.16122108697891235, 0.21723079681396484, -0.09642629325389862, 0.21126478910446167, 0.14839527010917664, 0.18376754224300385, 0.0682494193315506, -0.13218268

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA

malignant = pd.read_csv("Malignant/malignant.csv")
# print(malignant.head())

sentences = malignant['text']
categories = malignant['category'].tolist()
print(sentences)

0                                  hi, how are you doing?
1                     hello! i am wonderful and yourself?
2                    i am doing well. what are you up to?
3       just got in from my garden. i was seeing if my...
4       i am having dinner at my apartment with my 3 s...
                              ...                        
1576    I require your role as a historian, where you ...
1577    I would like for you to adopt the role of a So...
1578    I would appreciate it if you could adopt the p...
1579    I would appreciate it if you could assume the ...
1580    Can you assume the role of a psychologist? I w...
Name: text, Length: 1581, dtype: object


In [5]:
from tqdm import tqdm

embeddings = []
for i, sentence in tqdm(enumerate(sentences), total=len(sentences)):
    embeddings.append(pipeline.get_embedding_from_text(sentence))

100%|██████████| 1581/1581 [00:07<00:00, 200.29it/s]


In [6]:
pca = PCA(n_components=2)
pca_3d = PCA(n_components=3)
X_pca = pca.fit_transform(embeddings)
X_pca_3d = pca_3d.fit_transform(embeddings)

In [7]:
df_pca = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
df_pca['category'] = categories
df_pca_3d = pd.DataFrame(X_pca_3d, columns=['PC1', 'PC2', 'PC3'])
df_pca_3d['category'] = categories

In [8]:
df_pca.head()

Unnamed: 0,PC1,PC2,category
0,2.3e-05,4e-06,conversation
1,0.115691,-0.062333,conversation
2,0.08377,-0.116607,conversation
3,0.21331,0.026834,conversation
4,0.248564,-0.031893,conversation


In [9]:
df_pca_3d.head()

Unnamed: 0,PC1,PC2,PC3,category
0,0.108739,0.270404,0.081575,conversation
1,-0.034532,0.124024,0.059542,conversation
2,-0.079121,0.099252,0.03025,conversation
3,0.047439,0.178446,-0.072598,conversation
4,0.045336,0.22881,-0.081386,conversation


In [10]:
import plotly.express as px

fig = px.scatter(df_pca, x='PC1', y='PC2', color='category', opacity=0.5, title='PCA Scatter Plot', 
                 labels={'PC1': 'Principal Component 1', 'PC2': 'Principal Component 2'})
fig.update_layout(font=dict(family='Times New Roman', size=14))
fig.show()

In [11]:
import plotly.express as px

# Assuming df_pca contains the PCA-transformed values in columns 'PC1' and 'PC2'
fig = px.scatter_3d(df_pca_3d, x='PC1', y='PC2', z='PC3', color='category', opacity=0.75, title='PCA Scatter Plot', 
                 labels={'PC1': 'Principal Component 1', 'PC2': 'Principal Component 2', 'PC3': 'Principal Component 3'})
fig.show()

In [12]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(embeddings)
tsne_3d = TSNE(n_components=3, random_state=42)
X_tsne_3d = tsne_3d.fit_transform(embeddings)


The default initialization in TSNE will change from 'random' to 'pca' in 1.2.


The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.


The default initialization in TSNE will change from 'random' to 'pca' in 1.2.


The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.



In [13]:
print(X_tsne[:5])
print(X_tsne_3d[:5])

[[-17.866991   -0.2615223]
 [ 12.423502   -5.1674657]
 [ 17.922174   -6.9216204]
 [-21.119404  -13.477919 ]
 [-23.179533  -25.618273 ]]
[[ -7.708838  -14.514545   -1.230198 ]
 [  2.8325148  -7.943271  -10.547236 ]
 [  9.90814    -4.6414304 -11.711256 ]
 [-13.760336    5.9650073  -1.4848197]
 [-18.598555    9.750114   -7.5063252]]


In [14]:
df_tsne = pd.DataFrame(X_tsne, columns=['TSNE1', 'TSNE2'])
df_tsne['category'] = categories
df_tsne_3d = pd.DataFrame(X_tsne_3d, columns=['TSNE1', 'TSNE2', 'TSNE3'])
df_tsne_3d['category'] = categories

In [15]:
df_tsne.head()

Unnamed: 0,TSNE1,TSNE2,category
0,-17.866991,-0.261522,conversation
1,12.423502,-5.167466,conversation
2,17.922174,-6.92162,conversation
3,-21.119404,-13.477919,conversation
4,-23.179533,-25.618273,conversation


In [16]:
df_tsne_3d.head()

Unnamed: 0,TSNE1,TSNE2,TSNE3,category
0,-7.708838,-14.514545,-1.230198,conversation
1,2.832515,-7.943271,-10.547236,conversation
2,9.90814,-4.64143,-11.711256,conversation
3,-13.760336,5.965007,-1.48482,conversation
4,-18.598555,9.750114,-7.506325,conversation


In [17]:
fig = px.scatter(df_tsne, x='TSNE1', y='TSNE2', color='category', opacity=0.5, title='PromptSentinel-Unbalanced-Paraphrase-v1', 
                 labels={'TSNE1': 't-SNE1', 'TSNE2': 't-SNE2'},
                 width=800, height=600)
fig.update_layout(font=dict(family='Times New Roman', size=26))
fig.show()

In [18]:
fig = px.scatter_3d(df_tsne_3d, x='TSNE1', y='TSNE2', z='TSNE3', color='category', opacity=0.8, title='t-SNE Scatter Plot', 
                 labels={'TSNE1': 't-SNE1', 'TSNE2': 't-SNE2', 'TSNE3': 't-SNE3'},
                 width=800, height=600)
fig.update_layout(font=dict(family='Times New Roman', size=18))
fig.show()