In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

In [11]:
legal_text = pd.read_excel('data/clean_statutes.xlsx')
legal_text.head()

Unnamed: 0,Section,Subsection,Section_Title,Section_Text
0,151,1.0,SHORT TITLE,"This chapter may be cited as the Limited Sales, Excise, and Use Tax Act."
1,151,2.0,"APPLICABILITY OF DEFINITIONS, ETC","The definitions and other provisions of this chapter relating to the collection, administration, and enforcement of the taxes imposed by this chapter, including the requirements for sales tax permits, apply to the parties to a sale of a taxable item that is exempted from the taxes imposed by this chapter but that is subject to the taxes imposed by a city under Chapter 321 of this code."
2,151,28.0,AMUSEMENT SERVICES,"(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement.(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members.(c) Amusement services does not include services provided through coin-operated machines that are operated by the consumer."
3,151,3.0,BUSINESS,"Business means an activity of or caused by a person for the purpose of a direct or indirect gain, benefit, or advantage."
4,151,31.0,COMPUTER PROGRAM,"Computer program means a series of instructions that are coded for acceptance or use by a computer system and that are designed to permit the computer system to process data and provide results and information. The series of instructions may be contained in or on magnetic tapes, punched cards, printed instructions, or other tangible or electronic media."


In [14]:
model = 'distilbert-base-nli-stsb-mean-tokens'
text_col = 'Section_Text'
group_col = 'Section_Title'

embedder = SentenceTransformer(model)
legal_text['sentence'] = legal_text[text_col].str.split(".",expand=False).values
legal_text = legal_text.explode('sentence')
legal_text = legal_text.loc[legal_text['sentence']!='']
legal_text['sentence_embedding'] = embedder.encode(legal_text.sentence).tolist()
legal_text.head()

Unnamed: 0,Section,Subsection,Section_Title,Section_Text,sentence,sentence_embedding
0,151,1.0,SHORT TITLE,"This chapter may be cited as the Limited Sales, Excise, and Use Tax Act.","This chapter may be cited as the Limited Sales, Excise, and Use Tax Act","[0.16894295811653137, -0.00851594191044569, -0.296162873506546, 0.2573283612728119, -0.4143689274787903, -0.2717636227607727, 0.7010982632637024, -0.3780181407928467, -0.6480791568756104, 0.7720542550086975, -0.5060065984725952, 0.16790945827960968, -0.13197669386863708, -0.6182047128677368, -0.33469220995903015, 0.0659010037779808, -0.28614526987075806, 0.7763422727584839, -0.17924439907073975, -0.9127068519592285, 0.5247442722320557, -0.2832961976528168, 0.09317886829376221, 0.22836032509803772, 0.31795448064804077, -0.08414573222398758, 0.8347461223602295, 0.14836518466472626, -0.22460508346557617, -1.002913475036621, -0.33680590987205505, 0.3687874376773834, 0.3402027487754822, -0.07263676822185516, 0.4310338497161865, -0.7119324803352356, -0.06199664995074272, -0.8657553791999817, -0.3929443359375, -0.4287366271018982, 0.21567440032958984, -0.5469999313354492, 0.0720679759979248, 0.21653981506824493, -0.20912611484527588, 0.5977379679679871, -0.4017750322818756, 0.25816810131073, -1.0036046504974365, -0.5301874876022339, -0.2023560255765915, -0.2898483872413635, 0.4242514967918396, -0.9273300170898438, -0.400728315114975, -0.1888270378112793, -0.4356876313686371, -0.15422487258911133, 0.40744104981422424, -0.3754659593105316, -0.6607218980789185, 0.17205217480659485, -0.47895827889442444, -0.6150723099708557, -0.07213689386844635, -1.0801880359649658, 0.5136939883232117, 0.7722920179367065, -0.6013466715812683, -1.5178732872009277, -0.06552262604236603, -0.7815015912055969, -1.2493637800216675, 0.3824715316295624, 0.24552591145038605, -0.08070050179958344, 0.21149209141731262, 0.1707715094089508, -0.35965290665626526, -0.32550352811813354, -0.18411791324615479, 0.032342810183763504, -0.46833887696266174, 0.2520596981048584, 0.06257808208465576, -0.7050092220306396, -0.17139853537082672, 0.12051791697740555, 0.17716078460216522, -0.058587558567523956, 0.558040201663971, -0.5767393708229065, 0.3851204514503479, -0.5875386595726013, 0.2765406370162964, -0.5490323901176453, 0.30334892868995667, -0.15269577503204346, -0.3009059429168701, 0.34295323491096497, ...]"
1,151,2.0,"APPLICABILITY OF DEFINITIONS, ETC","The definitions and other provisions of this chapter relating to the collection, administration, and enforcement of the taxes imposed by this chapter, including the requirements for sales tax permits, apply to the parties to a sale of a taxable item that is exempted from the taxes imposed by this chapter but that is subject to the taxes imposed by a city under Chapter 321 of this code.","The definitions and other provisions of this chapter relating to the collection, administration, and enforcement of the taxes imposed by this chapter, including the requirements for sales tax permits, apply to the parties to a sale of a taxable item that is exempted from the taxes imposed by this chapter but that is subject to the taxes imposed by a city under Chapter 321 of this code","[1.055303692817688, -0.07879878580570221, 0.011990068480372429, 0.31049397587776184, -0.06889557838439941, -0.20171038806438446, 0.8443438410758972, -0.7071282863616943, -0.6012312173843384, 0.20297227799892426, -0.812744140625, 0.7422714233398438, -0.9264224171638489, -0.7922874689102173, -0.5745334625244141, -0.6535205245018005, -0.07272425293922424, 0.3950243592262268, -0.29133692383766174, -0.4885833263397217, -0.19814734160900116, -0.23807582259178162, 0.04715336486697197, 0.7785142064094543, 0.5257843136787415, -0.0587129220366478, 0.7513095140457153, 0.23749463260173798, 0.24604998528957367, -0.561417818069458, 0.4457608163356781, 0.27809903025627136, -0.23309840261936188, -0.5301850438117981, -0.053125474601984024, 0.14595060050487518, 0.46928125619888306, -0.6369664072990417, -0.9699912667274475, -0.32321861386299133, 0.48202818632125854, -0.07071300595998764, -0.2502410411834717, 0.7691090703010559, -0.3456295132637024, 0.22109992802143097, -0.19149518013000488, -0.5629249215126038, -0.9750672578811646, -0.28479164838790894, 0.002838765736669302, -0.5592496991157532, -0.07873548567295074, -0.1260708123445511, 0.4386172592639923, -0.05251935496926308, -0.8934889435768127, -0.1454208940267563, -0.0964876338839531, -0.4051279127597809, 0.11846861243247986, 0.39803099632263184, -0.5696048736572266, -0.46279430389404297, 0.43019551038742065, -0.6750214695930481, 0.25835809111595154, 0.0020327377133071423, -0.5570618510246277, -1.823028326034546, -0.02007298357784748, -0.5792675614356995, -1.712094783782959, 0.15256795287132263, -0.11115947365760803, 0.4438418447971344, -0.4867037832736969, -0.12591038644313812, -0.853267252445221, -0.043862126767635345, -0.3333151936531067, 0.32059383392333984, 0.02890957146883011, 0.2401597797870636, 0.32055842876434326, -0.4457187354564667, 0.07603046298027039, -0.1887575089931488, 0.12466991692781448, 0.14273516833782196, 0.8028139472007751, -0.32123348116874695, 0.6263676285743713, -0.6857101917266846, 0.5282830595970154, -0.11910924315452576, 0.20840533077716827, -0.1848381757736206, -0.4830543100833893, 0.14182238280773163, ...]"
2,151,28.0,AMUSEMENT SERVICES,"(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement.(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members.(c) Amusement services does not include services provided through coin-operated machines that are operated by the consumer.","(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement","[0.07914664596319199, 0.8852005004882812, 0.6613497138023376, 0.37163281440734863, -0.7094599008560181, -0.5898401141166687, 0.99375319480896, -0.2613564729690552, 0.1308562308549881, 0.44671157002449036, -0.6039671301841736, 0.43161094188690186, -0.7834445834159851, 0.7340551614761353, -0.32930752635002136, 0.26227501034736633, 0.34983813762664795, -0.36168912053108215, -0.7081705331802368, 0.04212944209575653, -0.5274221301078796, 0.6795638203620911, 0.11163768917322159, -0.4246758222579956, -0.8033706545829773, 0.08915809541940689, 0.09943550825119019, 0.5236286520957947, 0.055566802620887756, -0.7880780100822449, 0.18092310428619385, 0.7136402726173401, -0.20655345916748047, -0.49412015080451965, 0.05057704821228981, 0.05901239812374115, 0.2570330500602722, 0.09783735871315002, 0.4170505404472351, -0.8014495372772217, 0.16374513506889343, -0.37243732810020447, -0.07932136207818985, 0.2937990427017212, 0.0541142001748085, -0.2620258927345276, -0.5075677037239075, -0.189580500125885, -0.9371623992919922, 0.5245419144630432, -0.10055766254663467, -0.646271824836731, 0.6792309284210205, 0.8852449059486389, 0.6651191115379333, -0.13425664603710175, -0.8839454650878906, -0.5133633017539978, 0.474351704120636, 0.2187999188899994, 0.19985197484493256, 0.29069045186042786, 0.19477252662181854, 0.0534648559987545, -0.2592560648918152, -1.0695033073425293, -0.3437243700027466, -0.07018798589706421, 0.9108617901802063, -1.4479308128356934, 0.33619916439056396, 0.3062160909175873, -0.8603971004486084, 0.33265262842178345, 0.5310907959938049, -0.3020956516265869, 0.1603013575077057, -0.7642091512680054, -0.5622232556343079, -0.02954643778502941, 0.37955981492996216, 0.16768333315849304, 0.2510755658149719, 0.28299131989479065, -0.9769402146339417, 0.5452051758766174, -0.019501028582453728, 0.012686461210250854, -0.4299072027206421, -0.40084242820739746, 0.01715032197535038, 0.018011312931776047, 1.085639238357544, -0.16208241879940033, 0.600383996963501, -0.5674188137054443, 0.7528564929962158, -0.8870606422424316, -0.42315399646759033, 0.08904606103897095, ...]"
2,151,28.0,AMUSEMENT SERVICES,"(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement.(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members.(c) Amusement services does not include services provided through coin-operated machines that are operated by the consumer.","(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members","[0.6956534385681152, 0.550119936466217, 0.7375267744064331, 0.3352024257183075, -0.047779690474271774, -0.06484528630971909, 1.5313715934753418, -0.6855745911598206, -1.0877177715301514, 0.29689112305641174, -1.0509589910507202, 0.2512943148612976, -0.4242200255393982, 0.2956429719924927, -0.07658171653747559, -0.03735031187534332, 0.281088262796402, -0.04457869380712509, -0.42643043398857117, -0.27970555424690247, 0.3927646577358246, 0.4376095235347748, -0.07523810118436813, 0.9846092462539673, -0.9745696187019348, -0.5925203561782837, -0.061288610100746155, -0.08034271746873856, 0.31801798939704895, -0.7419871091842651, 0.272266685962677, -0.17027559876441956, -0.024481071159243584, 0.0726868137717247, -0.6441087126731873, -0.7354622483253479, 0.1721573770046234, -0.46729081869125366, 0.3618312478065491, -0.7326836585998535, 0.41594281792640686, 0.01682474836707115, 1.3426011800765991, 1.6517953872680664, -0.07185199856758118, -0.05508510768413544, 0.39949485659599304, 0.6139546036720276, -0.15841837227344513, 0.8770650029182434, 0.20917537808418274, -0.22844693064689636, -0.40183478593826294, -0.2689913213253021, 0.6988494396209717, -0.1765606850385666, -0.7082768082618713, -0.4242897927761078, 1.288340449333191, -0.2855532467365265, 0.8083611130714417, 0.7043229341506958, 0.5333597660064697, -0.04922936484217644, -0.13823756575584412, -0.15838725864887238, -0.20007158815860748, -0.3540583848953247, 0.603478193283081, -1.0551451444625854, -0.2912060022354126, -0.47278791666030884, -1.1739345788955688, 0.05206485465168953, -0.4807775616645813, -0.43188807368278503, -0.040242791175842285, -0.10076361894607544, -0.38401979207992554, 0.24741528928279877, -0.25486594438552856, 0.6458341479301453, 0.22961102426052094, 1.33399498462677, -0.46337631344795227, 0.7066237330436707, -0.3229151666164398, 0.2599301040172577, -0.27231451869010925, -0.7394894957542419, 0.6419855952262878, 0.29877224564552307, 0.729128360748291, 0.7894961833953857, -0.5535014867782593, -0.8796188831329346, 0.2678026556968689, -0.4378856420516968, -0.6298126578330994, 0.13830241560935974, ...]"
2,151,28.0,AMUSEMENT SERVICES,"(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement.(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members.(c) Amusement services does not include services provided through coin-operated machines that are operated by the consumer.",(c) Amusement services does not include services provided through coin-operated machines that are operated by the consumer,"[0.20828187465667725, 0.6319252252578735, -0.481172114610672, 0.6725893616676331, -0.3312583267688751, 0.3249225914478302, 0.5592515468597412, -0.2650900185108185, -0.10595586150884628, 1.314060091972351, 0.21763791143894196, -0.047196559607982635, -0.6815239191055298, 0.6920117735862732, 0.32326093316078186, -0.10314144939184189, 0.19538573920726776, -0.036983639001846313, -0.4031933546066284, -0.1681705117225647, -0.7078924775123596, 0.6090202927589417, -0.3114537298679352, 0.19690130650997162, -1.0906563997268677, 0.6889418959617615, 0.1630348563194275, -0.41861140727996826, -0.1389663964509964, -0.73796147108078, -0.11830835044384003, 1.2154964208602905, -0.003948035649955273, -0.3533198833465576, -0.2723570764064789, -0.2894478142261505, 0.3685382306575775, -0.09269674122333527, -0.23364581167697906, -0.6471644043922424, 0.541287899017334, -0.6562249660491943, -0.07626216113567352, -0.06290534883737564, 0.13578014075756073, -0.050549693405628204, 0.016725104302167892, -0.37671226263046265, -0.7274956107139587, 0.38362860679626465, -0.6477903127670288, -0.06889337301254272, 0.3508070707321167, 0.8950992226600647, 0.6093227863311768, 0.015290780924260616, -0.5832256078720093, -0.41056281328201294, 0.2838049530982971, 0.4589312970638275, 1.267553448677063, 0.5372200608253479, -0.9526788592338562, -0.45027899742126465, 0.05117616429924965, -0.4886861741542816, 0.01412851270288229, 0.2889191210269928, 0.9039333462715149, -1.2449356317520142, -0.3714618980884552, -0.11122007668018341, -0.4593814015388489, 0.08403816819190979, 0.6482228636741638, -0.258783757686615, 0.04929015040397644, 0.36392706632614136, -0.6755316257476807, -0.611065685749054, 0.013744257390499115, 0.41311752796173096, 0.3713036775588989, 0.7506353259086609, -0.16320407390594482, -0.3567432463169098, 0.23093201220035553, -0.8058661818504333, -0.4288029372692108, -0.045177165418863297, -0.09485139697790146, 0.28975439071655273, 1.4552932977676392, 0.26745182275772095, -0.3044391870498657, -1.1964982748031616, 0.2954278588294983, -0.7528925538063049, -0.09992829710245132, -0.07393264025449753, ...]"
2,151,28.0,AMUSEMENT SERVICES,"(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement.(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members.(c) Amusement services does not include services provided through coin-operated machines that are operated by the consumer.","(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement","[0.07914664596319199, 0.8852005004882812, 0.6613497138023376, 0.37163281440734863, -0.7094599008560181, -0.5898401141166687, 0.99375319480896, -0.2613564729690552, 0.1308562308549881, 0.44671157002449036, -0.6039671301841736, 0.43161094188690186, -0.7834445834159851, 0.7340551614761353, -0.32930752635002136, 0.26227501034736633, 0.34983813762664795, -0.36168912053108215, -0.7081705331802368, 0.04212944209575653, -0.5274221301078796, 0.6795638203620911, 0.11163768917322159, -0.4246758222579956, -0.8033706545829773, 0.08915809541940689, 0.09943550825119019, 0.5236286520957947, 0.055566802620887756, -0.7880780100822449, 0.18092310428619385, 0.7136402726173401, -0.20655345916748047, -0.49412015080451965, 0.05057704821228981, 0.05901239812374115, 0.2570330500602722, 0.09783735871315002, 0.4170505404472351, -0.8014495372772217, 0.16374513506889343, -0.37243732810020447, -0.07932136207818985, 0.2937990427017212, 0.0541142001748085, -0.2620258927345276, -0.5075677037239075, -0.189580500125885, -0.9371623992919922, 0.5245419144630432, -0.10055766254663467, -0.646271824836731, 0.6792309284210205, 0.8852449059486389, 0.6651191115379333, -0.13425664603710175, -0.8839454650878906, -0.5133633017539978, 0.474351704120636, 0.2187999188899994, 0.19985197484493256, 0.29069045186042786, 0.19477252662181854, 0.0534648559987545, -0.2592560648918152, -1.0695033073425293, -0.3437243700027466, -0.07018798589706421, 0.9108617901802063, -1.4479308128356934, 0.33619916439056396, 0.3062160909175873, -0.8603971004486084, 0.33265262842178345, 0.5310907959938049, -0.3020956516265869, 0.1603013575077057, -0.7642091512680054, -0.5622232556343079, -0.02954643778502941, 0.37955981492996216, 0.16768333315849304, 0.2510755658149719, 0.28299131989479065, -0.9769402146339417, 0.5452051758766174, -0.019501028582453728, 0.012686461210250854, -0.4299072027206421, -0.40084242820739746, 0.01715032197535038, 0.018011312931776047, 1.085639238357544, -0.16208241879940033, 0.600383996963501, -0.5674188137054443, 0.7528564929962158, -0.8870606422424316, -0.42315399646759033, 0.08904606103897095, ...]"
2,151,28.0,AMUSEMENT SERVICES,"(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement.(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members.(c) Amusement services does not include services provided through coin-operated machines that are operated by the consumer.","(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members","[0.6956534385681152, 0.550119936466217, 0.7375267744064331, 0.3352024257183075, -0.047779690474271774, -0.06484528630971909, 1.5313715934753418, -0.6855745911598206, -1.0877177715301514, 0.29689112305641174, -1.0509589910507202, 0.2512943148612976, -0.4242200255393982, 0.2956429719924927, -0.07658171653747559, -0.03735031187534332, 0.281088262796402, -0.04457869380712509, -0.42643043398857117, -0.27970555424690247, 0.3927646577358246, 0.4376095235347748, -0.07523810118436813, 0.9846092462539673, -0.9745696187019348, -0.5925203561782837, -0.061288610100746155, -0.08034271746873856, 0.31801798939704895, -0.7419871091842651, 0.272266685962677, -0.17027559876441956, -0.024481071159243584, 0.0726868137717247, -0.6441087126731873, -0.7354622483253479, 0.1721573770046234, -0.46729081869125366, 0.3618312478065491, -0.7326836585998535, 0.41594281792640686, 0.01682474836707115, 1.3426011800765991, 1.6517953872680664, -0.07185199856758118, -0.05508510768413544, 0.39949485659599304, 0.6139546036720276, -0.15841837227344513, 0.8770650029182434, 0.20917537808418274, -0.22844693064689636, -0.40183478593826294, -0.2689913213253021, 0.6988494396209717, -0.1765606850385666, -0.7082768082618713, -0.4242897927761078, 1.288340449333191, -0.2855532467365265, 0.8083611130714417, 0.7043229341506958, 0.5333597660064697, -0.04922936484217644, -0.13823756575584412, -0.15838725864887238, -0.20007158815860748, -0.3540583848953247, 0.603478193283081, -1.0551451444625854, -0.2912060022354126, -0.47278791666030884, -1.1739345788955688, 0.05206485465168953, -0.4807775616645813, -0.43188807368278503, -0.040242791175842285, -0.10076361894607544, -0.38401979207992554, 0.24741528928279877, -0.25486594438552856, 0.6458341479301453, 0.22961102426052094, 1.33399498462677, -0.46337631344795227, 0.7066237330436707, -0.3229151666164398, 0.2599301040172577, -0.27231451869010925, -0.7394894957542419, 0.6419855952262878, 0.29877224564552307, 0.729128360748291, 0.7894961833953857, -0.5535014867782593, -0.8796188831329346, 0.2678026556968689, -0.4378856420516968, -0.6298126578330994, 0.13830241560935974, ...]"
2,151,28.0,AMUSEMENT SERVICES,"(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement.(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members.(c) Amusement services does not include services provided through coin-operated machines that are operated by the consumer.",(c) Amusement services does not include services provided through coin-operated machines that are operated by the consumer,"[0.20828187465667725, 0.6319252252578735, -0.481172114610672, 0.6725893616676331, -0.3312583267688751, 0.3249225914478302, 0.5592515468597412, -0.2650900185108185, -0.10595586150884628, 1.314060091972351, 0.21763791143894196, -0.047196559607982635, -0.6815239191055298, 0.6920117735862732, 0.32326093316078186, -0.10314144939184189, 0.19538573920726776, -0.036983639001846313, -0.4031933546066284, -0.1681705117225647, -0.7078924775123596, 0.6090202927589417, -0.3114537298679352, 0.19690130650997162, -1.0906563997268677, 0.6889418959617615, 0.1630348563194275, -0.41861140727996826, -0.1389663964509964, -0.73796147108078, -0.11830835044384003, 1.2154964208602905, -0.003948035649955273, -0.3533198833465576, -0.2723570764064789, -0.2894478142261505, 0.3685382306575775, -0.09269674122333527, -0.23364581167697906, -0.6471644043922424, 0.541287899017334, -0.6562249660491943, -0.07626216113567352, -0.06290534883737564, 0.13578014075756073, -0.050549693405628204, 0.016725104302167892, -0.37671226263046265, -0.7274956107139587, 0.38362860679626465, -0.6477903127670288, -0.06889337301254272, 0.3508070707321167, 0.8950992226600647, 0.6093227863311768, 0.015290780924260616, -0.5832256078720093, -0.41056281328201294, 0.2838049530982971, 0.4589312970638275, 1.267553448677063, 0.5372200608253479, -0.9526788592338562, -0.45027899742126465, 0.05117616429924965, -0.4886861741542816, 0.01412851270288229, 0.2889191210269928, 0.9039333462715149, -1.2449356317520142, -0.3714618980884552, -0.11122007668018341, -0.4593814015388489, 0.08403816819190979, 0.6482228636741638, -0.258783757686615, 0.04929015040397644, 0.36392706632614136, -0.6755316257476807, -0.611065685749054, 0.013744257390499115, 0.41311752796173096, 0.3713036775588989, 0.7506353259086609, -0.16320407390594482, -0.3567432463169098, 0.23093201220035553, -0.8058661818504333, -0.4288029372692108, -0.045177165418863297, -0.09485139697790146, 0.28975439071655273, 1.4552932977676392, 0.26745182275772095, -0.3044391870498657, -1.1964982748031616, 0.2954278588294983, -0.7528925538063049, -0.09992829710245132, -0.07393264025449753, ...]"
2,151,28.0,AMUSEMENT SERVICES,"(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement.(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members.(c) Amusement services does not include services provided through coin-operated machines that are operated by the consumer.","(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement","[0.07914664596319199, 0.8852005004882812, 0.6613497138023376, 0.37163281440734863, -0.7094599008560181, -0.5898401141166687, 0.99375319480896, -0.2613564729690552, 0.1308562308549881, 0.44671157002449036, -0.6039671301841736, 0.43161094188690186, -0.7834445834159851, 0.7340551614761353, -0.32930752635002136, 0.26227501034736633, 0.34983813762664795, -0.36168912053108215, -0.7081705331802368, 0.04212944209575653, -0.5274221301078796, 0.6795638203620911, 0.11163768917322159, -0.4246758222579956, -0.8033706545829773, 0.08915809541940689, 0.09943550825119019, 0.5236286520957947, 0.055566802620887756, -0.7880780100822449, 0.18092310428619385, 0.7136402726173401, -0.20655345916748047, -0.49412015080451965, 0.05057704821228981, 0.05901239812374115, 0.2570330500602722, 0.09783735871315002, 0.4170505404472351, -0.8014495372772217, 0.16374513506889343, -0.37243732810020447, -0.07932136207818985, 0.2937990427017212, 0.0541142001748085, -0.2620258927345276, -0.5075677037239075, -0.189580500125885, -0.9371623992919922, 0.5245419144630432, -0.10055766254663467, -0.646271824836731, 0.6792309284210205, 0.8852449059486389, 0.6651191115379333, -0.13425664603710175, -0.8839454650878906, -0.5133633017539978, 0.474351704120636, 0.2187999188899994, 0.19985197484493256, 0.29069045186042786, 0.19477252662181854, 0.0534648559987545, -0.2592560648918152, -1.0695033073425293, -0.3437243700027466, -0.07018798589706421, 0.9108617901802063, -1.4479308128356934, 0.33619916439056396, 0.3062160909175873, -0.8603971004486084, 0.33265262842178345, 0.5310907959938049, -0.3020956516265869, 0.1603013575077057, -0.7642091512680054, -0.5622232556343079, -0.02954643778502941, 0.37955981492996216, 0.16768333315849304, 0.2510755658149719, 0.28299131989479065, -0.9769402146339417, 0.5452051758766174, -0.019501028582453728, 0.012686461210250854, -0.4299072027206421, -0.40084242820739746, 0.01715032197535038, 0.018011312931776047, 1.085639238357544, -0.16208241879940033, 0.600383996963501, -0.5674188137054443, 0.7528564929962158, -0.8870606422424316, -0.42315399646759033, 0.08904606103897095, ...]"
2,151,28.0,AMUSEMENT SERVICES,"(a) Amusement services means the provision of amusement, entertainment, or recreation, but does not include the provision of educational or health services if prescribed by a licensed practitioner of the healing arts for the primary purpose of education or health maintenance or improvement.(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members.(c) Amusement services does not include services provided through coin-operated machines that are operated by the consumer.","(b) Amusement services includes membership in a private club or organization that provides entertainment, recreational, sports, dining, or social facilities to its members","[0.6956534385681152, 0.550119936466217, 0.7375267744064331, 0.3352024257183075, -0.047779690474271774, -0.06484528630971909, 1.5313715934753418, -0.6855745911598206, -1.0877177715301514, 0.29689112305641174, -1.0509589910507202, 0.2512943148612976, -0.4242200255393982, 0.2956429719924927, -0.07658171653747559, -0.03735031187534332, 0.281088262796402, -0.04457869380712509, -0.42643043398857117, -0.27970555424690247, 0.3927646577358246, 0.4376095235347748, -0.07523810118436813, 0.9846092462539673, -0.9745696187019348, -0.5925203561782837, -0.061288610100746155, -0.08034271746873856, 0.31801798939704895, -0.7419871091842651, 0.272266685962677, -0.17027559876441956, -0.024481071159243584, 0.0726868137717247, -0.6441087126731873, -0.7354622483253479, 0.1721573770046234, -0.46729081869125366, 0.3618312478065491, -0.7326836585998535, 0.41594281792640686, 0.01682474836707115, 1.3426011800765991, 1.6517953872680664, -0.07185199856758118, -0.05508510768413544, 0.39949485659599304, 0.6139546036720276, -0.15841837227344513, 0.8770650029182434, 0.20917537808418274, -0.22844693064689636, -0.40183478593826294, -0.2689913213253021, 0.6988494396209717, -0.1765606850385666, -0.7082768082618713, -0.4242897927761078, 1.288340449333191, -0.2855532467365265, 0.8083611130714417, 0.7043229341506958, 0.5333597660064697, -0.04922936484217644, -0.13823756575584412, -0.15838725864887238, -0.20007158815860748, -0.3540583848953247, 0.603478193283081, -1.0551451444625854, -0.2912060022354126, -0.47278791666030884, -1.1739345788955688, 0.05206485465168953, -0.4807775616645813, -0.43188807368278503, -0.040242791175842285, -0.10076361894607544, -0.38401979207992554, 0.24741528928279877, -0.25486594438552856, 0.6458341479301453, 0.22961102426052094, 1.33399498462677, -0.46337631344795227, 0.7066237330436707, -0.3229151666164398, 0.2599301040172577, -0.27231451869010925, -0.7394894957542419, 0.6419855952262878, 0.29877224564552307, 0.729128360748291, 0.7894961833953857, -0.5535014867782593, -0.8796188831329346, 0.2678026556968689, -0.4378856420516968, -0.6298126578330994, 0.13830241560935974, ...]"


In [37]:
query = 'natural gas purchase'

pd.set_option('max_colwidth', None)
legal_corpus = legal_text.copy()
query_embedding = embedder.encode(query)
legal_corpus['query_similarity'] = np.round(cosine_similarity(query_embedding, list(legal_corpus.sentence_embedding))[0],3)
legal_corpus=legal_corpus.sort_values('query_similarity', ascending=False).drop_duplicates([group_col])
# legal_corpus[['regulation','service_type','sentence','query_similarity']].head()
legal_corpus[['Section_Title','sentence','query_similarity']].head()

Unnamed: 0,Section_Title,sentence,query_similarity
117,GAS AND ELECTRICITY,"For purposes of this subsection, the use of gas or electricity in an exempt manner by an independent contractor engaged by the purchaser of the gas or electricity to perform one or more of the exempt activities identified in Subsections (a)(2)-(9) is considered use by the purchaser of the gas or electricity",0.582
102,ITEMS TAXED BY OTHER LAW,(b) Natural gas is exempted under Subsection (a)(2) only to the extent that the gas is taxed as a motor fuel under Chapter 162,0.537
18,SALE FOR RESALE,"(e) A sale for resale does not include the sale of tangible personal property to a purchaser who acquires the property for the purpose of using, consuming, or expending it in, or incorporating it into, an oil or gas well in the performance of an oil well service taxable under Chapter 191",0.485
140,ENERGY-EFFICIENT PRODUCTS,"(a) In this section, energy-efficient product means a product that has been designated as an Energy Star qualified product under the Energy Star program jointly operated by the United States Environmental Protection Agency and the United States Department of Energy",0.463
142,COMPONENTS OF TANGIBLE PERSONAL PROPERTY USED IN CONNECTION WITH SEQUESTRATION OF CARBON DIOXIDE,"003, Health and Safety Code, or a clean energy project, as defined by Section 120",0.446
