To be run after the vector database is loaded (using script in generate_embeddings.ipynb)

In [1]:
from datetime import datetime
import pandas as pd
import requests
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from InstructorEmbedding import INSTRUCTOR

import weaviate
import json
import os
from dotenv import load_dotenv


load_dotenv()
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
weaviate_api_key = os.getenv('WEAVIATE_API_KEY')
weaviate_url = os.getenv('WEAVIATE_URL')
openai_key = os.getenv('OPENAI_API_KEY')
billtrack50_api_key = os.getenv('bill_tracker_api_key')

client = weaviate.Client(
    url = weaviate_url,
    auth_client_secret=weaviate.AuthApiKey(api_key=weaviate_api_key), 
    additional_headers = {
        "X-OpenAI-Api-Key": openai_key
    }
)

  from .autonotebook import tqdm as notebook_tqdm
            Please consider upgrading to the latest version. See https://weaviate.io/developers/weaviate/client-libraries/python for details.


In [30]:
model = SentenceTransformer('sentence-transformers/msmarco-MiniLM-L12-cos-v5')

general_query = """
Find bills relating to gender identity, LGBTQ+ groups, trans, nonbinary, gender-nonconforming, genderqueer, genderfluid. Also include bills that could have a bigger impact on these groups than others.
This could include but not limited to topics on : Sex reassignment, gender reassignment, Biological sex, Natural sex hormones, Sex organs etc.
"""
anti_trans_query = """
Find bills that explicitly or implicitly harm or violate the rights of transgender people. 
"""
Original Birth; Certificate Birth; Certificate Amended Biological Sex, gender identity; LGBTQ; trans; nonbinary; non-binary; gender-nonconforming; genderqueer; genderfluid; Sex reassignment; gender reassignment; Natural sex; hormones Sex; Sex organs; queer
query_text = anti_trans_query
# instruction_prompt = "Represent the legislation bill for retrieval:"

# negative_text = "A list of, archive"
# query_text = "Find recent hate crime incidents targeting gender or gender identity of the victims."

query_vector = model.encode(query_text).tolist()
# query_vector = model.encode([[instruction_prompt,query_text]]).tolist()
# query_vector = [item for sublist in query_vector for item in sublist]
state = "MO"

get_legislation_group = f"""
{{
  Get {{
    Legislation_v2(where: {{
      path: ["stateCode"],
      operator: Equal,
      valueString: "{state}"
    }},
    
      nearVector: {{
        vector: {query_vector}
      }},
      group: {{
        type: merge,
        force: 0
      }},
      limit: 30
    ) {{
        billID,
        stateCode,
        stateBillID,
        shortBillName,
        statesummary,
        billText
    }}
  }}
}}
"""

query_result = client.query.raw(get_legislation_group)
# print(query_result)
# save to csv
df = pd.DataFrame(query_result['data']['Get']['Legislation_v2'])
df.to_csv(f'../data_storage/{state}_weaviate_result_w_full_text_{query_text}.csv', index=False)

In [20]:
raw = pd.read_csv('../data_storage/legislation/complete_cleaned_full_text.csv')

(9023, 14)

In [28]:
df.shape

(30, 6)

In [21]:
raw[raw['StateCode'] == "MO"]

Unnamed: 0,BillID,StateCode,StateBillID,ShortBillName,Created,SponsorParty,billtype,status,CommitteeCategories,statesummary,BillText,BillText_vector,statesummary_vector,ShortBillName_vector
3449,1658172,MO,HB2183,Modifies and establishes provisions relating t...,2024-01-03 10:30:09.690000000,R,Bill,In Committee,"""Government Affairs, Health and Social Services""",Modifies and establishes provisions relating t...,"""Missouri MO HB 2183 MO HB2183 MOHB2183 MO HB ...","[0.004087352193892002, 0.058131974190473557, 0...","[-0.02304215170443058, 0.07312975078821182, -0...","[-0.02304215170443058, 0.07312975078821182, -0..."
3455,1709868,MO,HB2757,Waives certain fees for offenders who are not ...,2024-02-20 14:30:51.493000000,D,Bill,Introduced,,Waives certain fees for offenders who are not ...,"""Missouri MO HB 2757 MO HB2757 MOHB2757 MO HB ...","[0.0021185572259128094, 0.03159540519118309, 0...","[-0.0010598552180454135, 0.049115657806396484,...","[-0.0010598552180454135, 0.049115657806396484,..."
3460,1694195,MO,HB2619,"""Prohibits state departments from spending mon...",2024-02-01 14:36:20.447000000,R,Bill,In Committee,"""Government Affairs, Justice""","""Prohibits state departments from spending mon...","""Missouri MO HB 2619 MO HB2619 MOHB2619 MO HB ...","[0.05760334059596062, 0.0007604744168929756, 0...","[0.08393386006355286, -0.026641573756933212, -...","[0.06795506924390793, -0.008220933377742767, -..."
3469,1709759,MO,HB2727,"""Establishes the \""Uniform Unlawful Restrictio...",2024-02-20 10:46:07.387000000,R,Bill,In Committee,Justice,"""Establishes the \""Uniform Unlawful Restrictio...","""Missouri MO HB 2727 MO HB2727 MOHB2727 MO HB ...","[0.013948975130915642, 0.05373416468501091, -0...","[0.027912044897675514, 0.05187356844544411, -0...","[0.027912044897675514, 0.05187356844544411, -0..."
3513,1652669,MO,HB1626,Modifies provisions relating to school bus end...,2023-12-07 03:15:22.890000000,R,Bill,Crossed Over,"""Government Affairs, Transportation and Infras...",Modifies provisions relating to school bus end...,"""Missouri MO HB 1626 MO HB1626 MOHB1626 MO HB ...","[0.02766963094472885, 0.05258200690150261, 0.0...","[0.0419449657201767, -0.0011467994190752506, -...","[0.0419449657201767, -0.0011467994190752506, -..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8145,1716184,MO,HB2893,Modifies provisions governing Missouri empower...,2024-02-29 10:50:29.153000000,D,Bill,Introduced,,Modifies provisions governing Missouri empower...,"""Missouri MO HB 2893 MO HB2893 MOHB2893 MO HB ...","[0.01237466186285019, 0.011058736592531204, -0...","[0.010556774213910103, 0.05966833233833313, -0...","[0.010556774213910103, 0.05966833233833313, -0..."
8146,1716207,MO,SB1506,Modifies provisions relating to the Career Lad...,2024-02-29 10:50:43.300000000,R,Bill,In Committee,Government Affairs,Modifies provisions relating to the Career Lad...,"""Missouri MO SB 1506 MO SB1506 MOSB1506 MO SB ...","[-0.04488617181777954, 0.029216719791293144, -...","[-0.06954620033502579, 0.08065293729305267, 0....","[-0.06954620033502579, 0.08065293729305267, 0...."
8147,1716084,MO,HB2896,Modifies provisions relating to the offense of...,2024-02-29 10:45:28.630000000,R,Bill,In Committee,Government Affairs,Modifies provisions relating to the offense of...,"""Missouri MO HB 2896 MO HB2896 MOHB2896 MO HB ...","[-0.02140355110168457, 0.004559319466352463, 0...","[-0.002303385641425848, 0.04252709075808525, 0...","[-0.002303385641425848, 0.04252709075808525, 0..."
8148,1716098,MO,SB1510,Requires driver's and nondriver's licenses iss...,2024-02-29 10:45:37.257000000,R,Bill,In Committee,Transportation and Infrastructure,Requires driver's and nondriver's licenses iss...,"""Missouri MO SB 1510 MO SB1510 MOSB1510 MO SB ...","[0.038691919296979904, 0.04934678599238396, -0...","[0.03872412443161011, 0.04821164533495903, 0.0...","[0.03872412443161011, 0.04821164533495903, 0.0..."


In [45]:
df.shape

(500, 6)