In [2]:
import pandas as pd
%load_ext autoreload
%autoreload 2

In [3]:
from dotenv import load_dotenv
import os

load_dotenv()

open_ai_key = os.getenv("OPEN_AI_KEY")
google_search_key = os.getenv("GOOGLE_PROGRAMMABLE_SEARCH_KEY")

In [5]:
dfs = []
for i in range(1, 11):
    df = pd.read_csv(f'data/asap_{i:02d}/df.csv')
    dfs.append(df)
df = pd.concat(dfs, ignore_index=True)

In [None]:
import nltk
from nltk import ngrams, pos_tag
from nltk.tokenize import word_tokenize
from collections import Counter

def generate_ngram_results(set_ids, df, count_threshold = 3):
    # Initialize dictionary to store results
    ngram_results = {i: [] for i in set_ids}

    # Loop through each EssaySet
    for set_id in set_ids:
        # Filter DataFrame for current EssaySet and score1 == 2
        filtered_df = df[(df['EssaySet'] == set_id) & (df['Score1'] == 2)]
        
        # Print progress for each EssaySet
        print(f"Processing EssaySet {set_id} with {len(filtered_df)} essays.")
        
        # Initialize a Counter to count n-grams across all essays in the set
        ngram_counter = Counter()
        
        # Process each EssayText in the filtered_df DataFrame
        for essay in filtered_df['EssayText']:
            tokens = word_tokenize(essay)
            pos_tags = pos_tag(tokens)
            
            # Generate bi-grams, tri-grams, and tetra-grams
            bi_grams = list(ngrams(pos_tags, 2))
            tri_grams = list(ngrams(pos_tags, 3))
            tetra_grams = list(ngrams(pos_tags, 4))
            
            # Concatenate all n-grams into a single list and update the counter
            all_ngrams = bi_grams + tri_grams + tetra_grams
            ngram_counter.update(all_ngrams)
        
        # Filter n-grams that appeared at least 3 times
        frequent_ngrams = [ngram for ngram, count in ngram_counter.items() if count >= count_threshold]
        
        # Store the frequent n-grams in the results dictionary
        ngram_results[set_id] = frequent_ngrams
        
        # Print after processing each set
        print(f"Completed processing EssaySet {set_id}, with {len(frequent_ngrams)} n-grams.")
    
    return ngram_results


setup

In [278]:
import gensim
model = gensim.models.KeyedVectors.load_word2vec_format('word2vec_model/GoogleNews-vectors-negative300.bin', binary=True)  

In [None]:
from gensim.models.doc2vec import Doc2Vec
doc2vec_model = Doc2Vec.load("doc2vec_model/doc2vec_wiki_d300_n5_w8_mc50_t12_e10_dbow.model")

In [279]:
import nltk
from nltk import ngrams, pos_tag
from nltk.tokenize import word_tokenize
from collections import Counter

def generate_ngram_results(set_ids, df, count_threshold = 3):
    # Initialize dictionary to store results
    ngram_results = {i: [] for i in set_ids}

    # Loop through each EssaySet
    for set_id in set_ids:
        # Filter DataFrame for current EssaySet and score1 == 2
        filtered_df = df[(df['EssaySet'] == set_id) & (df['Score1'] == 2)]
        
        # Print progress for each EssaySet
        print(f"Processing EssaySet {set_id} with {len(filtered_df)} essays.")
        
        # Initialize a Counter to count n-grams across all essays in the set
        ngram_counter = Counter()
        
        # Process each EssayText in the filtered_df DataFrame
        for essay in filtered_df['EssayText']:
            tokens = word_tokenize(essay)
            pos_tags = pos_tag(tokens)
            
            # Generate bi-grams, tri-grams, and tetra-grams
            bi_grams = list(ngrams(pos_tags, 2))
            tri_grams = list(ngrams(pos_tags, 3))
            tetra_grams = list(ngrams(pos_tags, 4))
            
            # Concatenate all n-grams into a single list and update the counter
            all_ngrams = bi_grams + tri_grams + tetra_grams
            ngram_counter.update(all_ngrams)
        
        # Filter n-grams that appeared at least 3 times
        frequent_ngrams = [ngram for ngram, count in ngram_counter.items() if count >= count_threshold]
        
        # Store the frequent n-grams in the results dictionary
        ngram_results[set_id] = frequent_ngrams
        
        # Print after processing each set
        print(f"Completed processing EssaySet {set_id}, with {len(frequent_ngrams)} n-grams.")
    
    return ngram_results


In [353]:
ngram_results = generate_ngram_results([1,2,3,4,5,6,7,8,9,10], df, 30)

Processing EssaySet 1 with 886 essays.
Completed processing EssaySet 1, with 459 n-grams.
Processing EssaySet 2 with 751 essays.
Completed processing EssaySet 2, with 426 n-grams.
Processing EssaySet 3 with 699 essays.
Completed processing EssaySet 3, with 403 n-grams.
Processing EssaySet 4 with 230 essays.
Completed processing EssaySet 4, with 72 n-grams.
Processing EssaySet 5 with 73 essays.
Completed processing EssaySet 5, with 13 n-grams.
Processing EssaySet 6 with 126 essays.
Completed processing EssaySet 6, with 28 n-grams.
Processing EssaySet 7 with 723 essays.
Completed processing EssaySet 7, with 327 n-grams.
Processing EssaySet 8 with 1313 essays.
Completed processing EssaySet 8, with 845 n-grams.
Processing EssaySet 9 with 1031 essays.
Completed processing EssaySet 9, with 563 n-grams.
Processing EssaySet 10 with 970 essays.
Completed processing EssaySet 10, with 583 n-grams.


In [281]:
import os

def load_prompts(set_ids):
    prompts = {}
    for set_id in set_ids:
        file_name = f"prompts/asap_{set_id:02d}.txt"
        if os.path.exists(file_name):
            with open(file_name, 'r') as file:
                prompts[set_id] = file.read().strip()
        else:
            print(f"Prompt file {file_name} not found.")
    return prompts

set_ids = list(range(1, 11))  # Example set ids, you can modify this as needed
prompts = load_prompts(set_ids)


In [282]:
import os

def load_only_prompts(set_ids):
    prompts = {}
    for set_id in set_ids:
        file_name = f"only_prompts/asap_{set_id:02d}.txt"
        if os.path.exists(file_name):
            with open(file_name, 'r') as file:
                prompts[set_id] = file.read().strip()
        else:
            print(f"Prompt file {file_name} not found.")
    return prompts

only_prompts = load_only_prompts(list(range(1, 11)))


In [283]:
import requests
import tiktoken
from bs4 import BeautifulSoup
from openai import OpenAI
client = OpenAI(api_key=open_ai_key)
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
nltk.download('punkt')
import json
import concurrent.futures

def extract_articles(keyword):
      url = 'https://customsearch.googleapis.com/customsearch/v1'
      params = {
          'key': google_search_key,
          'cx': '901399343dffc41a5',
          'q': keyword,
          'start': 1,
          'num': 10,
          'lr': "lang_en",
          "filter": 1
      }      
      articles = []
      all_links = []
      try:
        for i in range(2):
          response = requests.get(url, params={
              **params,
              "start": i * 10 + 1
          })
          data = response.json()
          links = [item['link'] for item in data['items'] if 'link' in item]
          all_links.extend(links)

        print("All links: ", all_links)
        
        # Function to scrape a single link
        def scrape_link(link):
            try:
                page = requests.get(link, timeout=10)
                soup = BeautifulSoup(page.content, 'html.parser')
                return soup.get_text()
            except Exception as e:
                print(f"Failed to scrape {link}: {e}")
                return None

        # Use ThreadPoolExecutor to scrape links concurrently
        with concurrent.futures.ThreadPoolExecutor() as executor:
            results = list(executor.map(scrape_link, all_links))

        # Filter out None results and extend articles list
        articles.extend([result for result in results if result is not None])

      except Exception as e:
          print(f"An error occurred: {e}")
          # all.extend(data["items"])
          
      return articles

def extract_domain_specific_keywords(prompt, answers):
    # first 50k tokens
    tokenizer = tiktoken.get_encoding("cl100k_base")
    tokens = tokenizer.encode(answers)
    first_50000_tokens = tokens[:50000]
    extracted_text = tokenizer.decode(first_50000_tokens)

    response = client.chat.completions.create(
        model="gpt-4o",
        response_format={ "type": "json_object" },
        messages=[
            {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
            {"role": "user", "content": f"Here is a question: {prompt}. Identify 100 domain-specific keywords that are most relevant to the question from the following set of student answers, and return them in a JSON object with an array field named 'keywords':\n\n{extracted_text}"}
        ]
    )
    
    keywords = json.loads(response.choices[0].message.content)

    return keywords

def extract_tfidf_from_articles(keyword, articles):
    def preprocess(text):
        return text.lower()

    # Preprocess articles
    preprocessed_articles = [preprocess(article) for article in articles]

    # Initialize the vectorizer
    vectorizer = TfidfVectorizer()

    # Fit and transform the articles
    tfidf_matrix = vectorizer.fit_transform(preprocessed_articles)

    # Get the feature names (i.e., the words)
    feature_names = vectorizer.get_feature_names_out()

    # Split the keyword into individual words and lower each
    keywords = keyword.lower().split()

    # Initialize list to store tf-idf values for each word in the keyword
    tfidf_values_list = []

    # Iterate over each word in the keyword
    for word in keywords:
        try:
            # Find the index of the word
            word_index = feature_names.tolist().index(word)
            # Get the tf-idf values for the word across all documents
            tfidf_values = tfidf_matrix[:, word_index].toarray().flatten()
            # Append the average tf-idf value for the word
            tfidf_values_list.append(tfidf_values.mean())
        except ValueError:
            # If the word is not found, append 0.0
            tfidf_values_list.append(0.0)

    # Calculate the average tf-idf value for the entire keyword phrase
    if tfidf_values_list:
        average_tfidf = sum(tfidf_values_list) / len(tfidf_values_list)
    else:
        average_tfidf = 0.0

    return average_tfidf


def extract_weighted_keywords(setnumbers, df, prompts):
  weighted_keywords = {}
  all_articles = {}
  for setno in setnumbers:
    answers = " ".join(df[df['EssaySet'] == setno]['EssayText'].tolist())
    keywords = extract_domain_specific_keywords(prompts[setno], answers)["keywords"]
    # keywords = ['animals', 'Asia', 'Australia', 'Bamboo', 'Bears', 'Biologists', 'Biodiversity', 'Boa constrictor', 'Burmese python', 'Bushnell', 'Cape Coral', 'Carnivores', 'Change', 'China', 'Climates', 'Cobra', 'Cold blooded', 'Creatures', 'Customer', 'Ecosystem', 'Ecophobia', 'Effective threat', 'Endangered species', 'Environment', 'Everglades', 'Exclusive food', 'Exploded', 'Food source', 'Florida', 'Food stability', 'Food variety', 'Generalists', 'Government', 'Habitat', 'Hankering', 'Herbivores', 'Hood spread', 'Households', 'Humans', 'Hurricane Andrew', 'Invasive species', 'Key Largo wood rat', 'Kimberly', 'Koala', 'Lizards', 'Macroherbivores', 'Macinnes', 'Madagascar', 'Mammals', 'Mcinnes', 'Microchip', 'Monitor lizard', 'Natural predators', 'Niche environments', 'Nile monitor lizard', 'Non-native', 'Organisms', 'Panda', 'Pandas', 'Permit', 'Pets', 'Plants', 'Policy adviser', 'Potomac', 'Predators', 'Prey', 'Python', 'Raccoon', 'Radiated tortoises', 'Reed', 'Reptiles', 'Resource-specific', 'RobRoy Macinnes', 'Scales', 'Scorpion', 'Sensitive habitats', 'Skip Snow', 'Specialists', 'Stability', 'Survival', 'Tourists', 'Traditional diet', 'Urban areas', 'U.S. Fish and Wildlife Service', 'U.S. Geological Survey', 'Unique habitat', 'Variety of foods', 'Warm blooded', 'Weeds', 'Wildlife biologist', 'Zoologists']
    
    set_scores = {}

    total_keywords = len(keywords)
    for i, keyword in enumerate(keywords, 1):
       articles = extract_articles(keyword)
       tfidf = extract_tfidf_from_articles(keyword, articles)
       
       set_scores[keyword] = tfidf
       print(f"Progress: {i}/{total_keywords} - keyword: {keyword}, tfidf: {tfidf}")
       
       if setno not in all_articles:
           all_articles[setno] = {}
       all_articles[setno][keyword] = articles
    
    weighted_keywords[setno] = set_scores
  
  return weighted_keywords, all_articles

weighted_keywords, all_articles = extract_weighted_keywords([1,2,3,4,5,6,7,8,9,10], df, prompts)

[nltk_data] Downloading package punkt to /Users/josephtey/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


All links:  ['https://experiment.com/', 'https://experimentbeauty.com/', 'https://en.wikipedia.org/wiki/Experiment', 'https://labs.google/', 'https://www.merriam-webster.com/dictionary/experiment', 'https://atlas.cern/', 'https://experiments.withgoogle.com/collection/chrome', 'https://www.imdb.com/title/tt0250258/', 'https://aspe.hhs.gov/reports/overview-final-report-seattle-denver-income-maintenance-experiment', 'https://www.prisonexp.org/conclusion', 'https://www.wnycstudios.org/podcasts/experiment', 'https://www.cdc.gov/tuskegee/timeline.htm', 'https://cals.cornell.edu/agricultural-experiment-station', 'https://hbr.org/2010/04/column-why-businesses-dont-experiment', 'https://www.prisonexp.org/', 'https://support.google.com/google-ads/answer/6261395?hl=en', 'https://theexperimentpublishing.com/', 'https://www.uaf.edu/afes/places/fairbanks/', 'https://journals.ametsoc.org/view/journals/bams/93/4/bams-d-11-00094.1.xml', 'https://www.experiment.org/scholarships/']
Progress: 1/98 - keywo

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 2/98 - keyword: procedure, tfidf: 0.054361147770560184
All links:  ['https://www.mass.gov/', 'https://en.wikipedia.org/wiki/Mass', 'https://massmoca.org/', 'https://massdesigngroup.org/', 'https://www.masssave.com/', 'https://www.massaudubon.org/', 'https://massculturalcouncil.org/', 'https://masshumanities.org/', 'https://www.imdb.com/title/tt11389748/', 'https://www.nhlbi.nih.gov/health/educational/lose_wt/BMI/bmicalc.htm', 'https://www.massgeneralbrigham.org/en', 'https://mtc.dor.state.ma.us/mtc/', 'https://www.massmutual.com/', 'https://www.merriam-webster.com/dictionary/mass', 'https://masstimes.org/', 'https://www.massgeneral.org/', 'https://www.cdc.gov/healthyweight/assessing/bmi/adult_bmi/english_bmi_calculator/bmi_calculator.html', 'https://dtaconnect.eohhs.mass.gov/', 'https://www.ea.com/games/mass-effect/mass-effect-legendary-edition', 'https://www.gunviolencearchive.org/reports/mass-shooting']
Failed to scrape https://www.massmutual.com/: ('Connection aborted.', R

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://studentaid.gov/complete-aid-process/how-calculated: HTTPSConnectionPool(host='studentaid.gov', port=443): Read timed out. (read timeout=10)
Failed to scrape https://studentaid.gov/apply-for-aid/fafsa/filling-out/dependency: HTTPSConnectionPool(host='studentaid.gov', port=443): Read timed out. (read timeout=10)
Progress: 12/98 - keyword: determine, tfidf: 0.06506686206733144
All links:  ['https://en.wikipedia.org/wiki/Acid', 'https://www.epa.gov/acidrain/acid-rain-program', 'https://www.cdc.gov/folic-acid/index.html', 'https://www.epa.gov/acidrain/what-acid-rain', 'https://www.talktofrank.com/drug/lsd', 'https://www.emailonacid.com/', 'https://wdfw.wa.gov/fishing/basics/domoic-acid/levels', 'https://www.britannica.com/science/acid', 'https://uspreventiveservicestaskforce.org/uspstf/recommendation/folic-acid-for-the-prevention-of-neural-tube-defects-preventive-medication', 'https://www.whitehouse.gov/wp-content/uploads/2024/04/Nucleic-Acid_Synthesis_Screening_Fra

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 13/98 - keyword: acid, tfidf: 0.1508654500941708
All links:  ['https://www.rainapp.com/', 'https://www.raintribute.com/', 'https://www.youtube.com/watch?v=jX6kn9_U8qk', 'https://www.rain.aero/', 'https://www.instagram.com/turrelljames/?hl=en', 'https://en.wikipedia.org/wiki/Rain', 'https://shop.rainafrica.com/', 'https://lrain.bandcamp.com/album/fatigue', 'https://rainrfid.org/', 'https://carolinarain.org/', 'https://rain.agency/', 'https://www.rainoncology.com/', 'https://www.rainx.com/', 'https://rain4sahara.org/', 'https://www.raincards.xyz/', 'https://twitter.com/faze_rain?lang=en', 'https://www.rainbird.com/', 'https://sadnessmusic.bandcamp.com/album/rain', 'https://www.linkedin.com/in/rainleander', 'https://www.netflix.com/title/80154610']
Progress: 14/98 - keyword: rain, tfidf: 0.10323430367501911
All links:  ['https://www.ntsb.gov/investigations/process/Pages/default.aspx', 'https://www.icc-cpi.int/news/statement-icc-prosecutor-karim-aa-khan-qc-situation-ukraine-recei

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 15/98 - keyword: investigation, tfidf: 0.03797280861048054
All links:  ['https://www.remedygames.com/games/control', 'https://www.merriam-webster.com/dictionary/control', 'https://www.cdc.gov/index.html', 'https://en.wikipedia.org/wiki/Control_(video_game)', 'https://store.steampowered.com/app/870780/Control_Ultimate_Edition/', 'https://store.epicgames.com/en-US/p/control', 'https://www.cbp.gov/travel/us-citizens/mobile-passport-control', 'https://www.dictionary.com/browse/control', 'https://vi-control.net/community/', 'https://www.controlglobal.com/', 'https://www.bis.doc.gov/index.php/regulations/commerce-control-list-ccl', 'https://www.imdb.com/title/tt0421082/', 'https://www.armscontrol.org/node/13866', 'https://www.sciencedirect.com/journal/control-engineering-practice', 'https://sanctionssearch.ofac.treas.gov/', 'https://www.controlrisks.com/', 'https://tobaccocontrol.bmj.com/', 'https://www.waterboards.ca.gov/', 'https://masscannabiscontrol.com/home/', 'https://control

  soup = BeautifulSoup(page.content, 'html.parser')


Failed to scrape https://www.mgi.gov/: HTTPSConnectionPool(host='www.mgi.gov', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1122)')))
Failed to scrape https://pubs.rsc.org/en/journals/journal/TA: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Progress: 19/98 - keyword: materials, tfidf: 0.0908862312021797
All links:  ['https://apps.apple.com/us/app/measure/id1383426740', 'https://play.google.com/store/apps/details?id=com.grymala.photoruler&hl=en_US', 'https://support.apple.com/en-us/102468', 'https://ageagle.com/', 'https://www.merriam-webster.com/dictionary/measure', 'https://www.metro.net/about/measure-r/', 'https://osh.sccgov.org/2016-measure-affordable-housing-bond', 'https://www.vta.org/projects/funding/2000-measure-a', 'https://osh.sccgov.org/2016-measure-housing-bond-progress', 'https://www.chula

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 20/98 - keyword: measure, tfidf: 0.12550486649151787
All links:  ['https://www.amount.com/', 'https://www.merriam-webster.com/dictionary/amount', 'https://studentaid.gov/understand-aid/types/loans/subsidized-unsubsidized', 'https://www.va.gov/disability/compensation-rates/veteran-rates/', 'https://studentaid.gov/understand-aid/types/grants/pell', 'https://americorps.gov/members-volunteers/segal-americorps-education-award/find-out-more', 'https://otda.ny.gov/programs/snap/', 'https://edd.ca.gov/en/unemployment/UI-Calculator/', 'https://www.irs.gov/individuals/tax-withholding-estimator', 'https://www.ers.usda.gov/publications/pub-details/?pubid=43836', 'https://dictionary.cambridge.org/dictionary/english/amount', 'https://www.hhs.texas.gov/services/food/snap-food-benefits', 'https://dpi.wi.gov/parental-education-options/choice-programs/payment-amounts-frequently-asked-questions', 'https://community.concur.com/t5/Concur-Expense/Refund-expenses-negative-amount/m-p/39403', 'https:

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://studentaid.gov/understand-aid/types/loans/subsidized-unsubsidized: HTTPSConnectionPool(host='studentaid.gov', port=443): Read timed out. (read timeout=10)
Progress: 30/98 - keyword: difference, tfidf: 0.05912620111961739
All links:  ['https://www.avery.com/', 'https://www.labelrestaurant.com/', 'https://developer.mozilla.org/en-US/docs/Web/HTML/Element/label', 'https://www.w3schools.com/tags/tag_label.asp', 'https://www.merriam-webster.com/dictionary/label', 'https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/', 'https://nielseniq.com/global/en/landing-page/label-insight/', 'https://www.nolabelbrew.com/', 'https://www.mascotlabelgroup.com/', 'https://dsld.od.nih.gov/', 'https://how2recycle.info/', 'https://dictionary.cambridge.org/dictionary/english/label', 'https://www.v-label.com/', 'https://en.wikipedia.org/wiki/Record_label', 'https://developer.apple.com/documentation/swiftui/label', 'https://www.theslowlabel.com/', 'https://www.fda.go

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 32/98 - keyword: conclusion, tfidf: 0.0658258525026103
All links:  ['https://www.toolband.com/', 'https://en.wikipedia.org/wiki/Tool_(band)', 'https://toolofna.com/', 'https://www.facebook.com/ToolMusic/', 'https://www.merriam-webster.com/dictionary/tool', 'https://studentaid.gov/pslf/', 'https://www.milwaukeetool.com/', 'https://screeningtool.geoplatform.gov/', 'https://www.microsoft.com/en-us/software-download/windows10', 'https://www.northerntool.com/', 'https://open.spotify.com/artist/2yEwvVSSSUkcLeSTNyHKh8', 'https://developers.google.com/search/docs/appearance/structured-data', 'https://www.instagram.com/toolmusic/?hl=en', 'https://www.youtube.com/c/FireballTool', 'https://www.irs.gov/individuals/tax-withholding-estimator', 'https://www.medicare.gov/plan-compare/', 'https://www.irs.gov/payments/online-payment-agreement-application', 'https://www.cmegroup.com/markets/interest-rates/cme-fedwatch-tool.html', 'https://vt.ncsbe.gov/RegLkup/', 'https://liheapch.acf.hhs.gov/el

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 39/98 - keyword: effect, tfidf: 0.12631591945449983
All links:  ['https://www.mass.gov/', 'https://en.wikipedia.org/wiki/Mass', 'https://massmoca.org/', 'https://massdesigngroup.org/', 'https://www.masssave.com/', 'https://www.massaudubon.org/', 'https://massculturalcouncil.org/', 'https://masshumanities.org/', 'https://www.imdb.com/title/tt11389748/', 'https://www.nhlbi.nih.gov/health/educational/lose_wt/BMI/bmicalc.htm', 'https://www.massgeneralbrigham.org/en', 'https://mtc.dor.state.ma.us/mtc/', 'https://www.massmutual.com/', 'https://www.merriam-webster.com/dictionary/mass', 'https://masstimes.org/', 'https://www.massgeneral.org/', 'https://www.cdc.gov/healthyweight/assessing/bmi/adult_bmi/english_bmi_calculator/bmi_calculator.html', 'https://dtaconnect.eohhs.mass.gov/', 'https://www.ea.com/games/mass-effect/mass-effect-legendary-edition', 'https://www.gunviolencearchive.org/reports/mass-shooting']
Failed to scrape https://www.massmutual.com/: ('Connection aborted.', Remo

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 44/98 - keyword: procedure, tfidf: 0.054362008204804135
All links:  ['https://www.desmos.com/scientific', 'https://www.scientificamerican.com/', 'https://www.nature.com/srep/', 'https://www.scientific.net/', 'https://www.thermofisher.com/us/en/home.html', 'https://www.bostonscientific.com/en-US/home.html', 'https://en.wikipedia.org/wiki/Scientific_method', 'https://scar.org/', 'https://scientificlinux.org/', 'https://professional.heart.org/en/meetings/scientific-sessions', 'https://www.dietaryguidelines.gov/2020-advisory-committee-report', 'https://www.verder-scientific.com/', 'https://onlinelibrary.wiley.com/journal/5192', 'https://www.fishersci.com/us/en/home.html', 'https://www.scientificgames.com/', 'https://www.freedomscientific.com/', 'https://dictionary.cambridge.org/dictionary/english/scientific', 'https://www.linkedin.com/company/boston-scientific', 'https://journals.biologists.com/jcs/article/121/11/1771/30038/The-importance-of-stupidity-in-scientific-research', 'ht

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 49/98 - keyword: validity, tfidf: 0.08168303486909832
All links:  ['https://www.accuracy.com/', 'https://en.wikipedia.org/wiki/Accuracy_and_precision', 'https://www.merriam-webster.com/dictionary/accuracy', 'https://developers.google.com/machine-learning/crash-course/classification/accuracy', 'https://www.dictionary.com/browse/accuracy', 'https://dictionary.cambridge.org/dictionary/english/accuracy', 'https://www.gps.gov/systems/gps/performance/accuracy/', 'https://forum.dronedeploy.com/t/relative-accuracy-without-gcps/8123', 'https://nanoporetech.com/platform/accuracy', 'https://forums.fast.ai/t/my-dogs-vs-cats-models-always-have-0-5-accuracy-whats-wrong/1665', 'https://www.paymentaccuracy.gov/', 'https://www.equator-network.org/reporting-guidelines/stard/', 'https://www.washingtonpost.com/climate-environment/interactive/2024/how-accurate-is-the-weather-forecast/', 'https://community.esri.com/t5/arcgis-field-maps-questions/field-maps-issue-with-location-accuracy-when/td-p/10

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 55/98 - keyword: reaction_time, tfidf: 0.0
All links:  ['https://discourse.slicer.org/t/surface-area-method-for-portions-of-segment-or-from-landmarks/22645', 'https://www.reddit.com/r/starcitizen/comments/ebv83e/star_citizen_surface_area/', 'https://discourse.shapr3d.com/t/surface-area-values-on-ipad/30664', 'https://forum.image.sc/t/surface-area-measurements-in-bonej/5420', 'https://mordred-descriptor.github.io/documentation/master/subpackages/mordred.surface_area.html', 'https://forum.vectorworks.net/index.php?/topic/48573-surface-area-of-a-3d-solid/', 'https://community.graphisoft.com/t5/Modeling/Zone-do-not-calculate-wall-surface-area/td-p/383792', 'https://forum.vectorworks.net/index.php?/topic/64968-calculating-surface-area-labelling/', 'https://forum.onshape.com/discussion/21346/is-there-a-way-to-calculate-the-entire-surface-area-across-a-plane', 'https://forums.autodesk.com/t5/autocad-plant-3d-forum/surface-area/td-p/4932440', 'https://discourse.mcneel.com/t/surface-a

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.3m.com/3M/en_US/health-information-systems-us/drive-value-based-care/patient-classification-methodologies/: HTTPSConnectionPool(host='www.3m.com', port=443): Read timed out. (read timeout=10)
Progress: 68/98 - keyword: methodologies, tfidf: 0.045535086182996086
All links:  ['https://www.merriam-webster.com/dictionary/comparison', 'https://www.va.gov/education/gi-bill-comparison-tool/', 'https://www.reddit.com/r/VPN/comments/m736zt/vpn_comparison_table/', 'https://community.qualtrics.com/employee-engagement-18/why-doesn-t-historical-data-appear-in-comparison-in-a-comparison-widget-1714', 'https://community.alteryx.com/t5/Alteryx-Designer-Desktop-Discussions/Case-sensitive-string-comparison/td-p/62849', 'https://dictionary.cambridge.org/dictionary/english/comparison', 'https://www.opm.gov/healthcare-insurance/healthcare/plan-information/compare-plans/', 'https://www.apple.com/watch/compare/', 'https://www.jetbrains.com/products/compare/', 'https://www.medicar

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 69/98 - keyword: comparison, tfidf: 0.0766402261159826
All links:  ['https://www.merriam-webster.com/dictionary/duplicate', 'https://en.wikipedia.org/wiki/Duplicate_(1998_film)', 'https://community.monday.com/t/duplicate-board-not-carrying-all-automations-to-the-new-duplicated-board/24951', 'https://community.esri.com/t5/arcgis-online-questions/csv-error-quot-duplicate-field-quot-message/td-p/603039', 'https://www.dropboxforum.com/t5/Delete-edit-and-organize/Is-there-any-easy-way-to-find-duplicate-files/td-p/253636', 'https://forum.squarespace.com/topic/252003-how-to-duplicate-course-overview-page/', 'https://forum.asana.com/t/change-duplicate-task-include-default-settings/106330', 'https://www.dropboxforum.com/t5/Delete-edit-and-organize/Removal-of-duplicate-photos-in-my-dropbox-photo-library/td-p/84989', 'https://trailhead.salesforce.com/trailblazer-community/feed/0D54V00007YwezBSAR', 'https://forum.asana.com/t/duplicate-team-multiple-inter-dependent-projects/63985', 'https

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 74/98 - keyword: phase, tfidf: 0.11367136096949457
All links:  ['https://www.stagepga.com/', 'https://www.merriam-webster.com/dictionary/stage', 'https://www.stagerestauranthawaii.com/', 'https://www.stagefund.com/', 'https://www.summitcountyco.gov/services/transit_summit_stage/index.php', 'https://www.instagram.com/sarahstage/?hl=en', 'https://www.dictionary.com/browse/stage', 'https://community.hubspot.com/t5/Dashboards-Reporting/Create-quot-time-in-deal-stage-quot-report/m-p/401189', 'https://www.thestageonbroadway.com/', 'https://www.moma.org/calendar/exhibitions/5240', 'https://www.youtube.com/c/SeedtoStage', 'https://stagedeli.com/', 'https://performance.stageright.com/products/portable-stages/', 'https://www.mdanderson.org/cancerwise/stage-iv-lung-cancer-survivor--targeted-therapy-and-surgery-left-me-cancer-free.h00-159695967.html', 'https://www.stageleft.com/', 'https://us.creative.com/p/speakers/creative-stage', 'https://www.firststage.org/', 'https://www.saws.org/co

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 83/98 - keyword: acidity, tfidf: 0.050502373443076495
All links:  ['https://en.wikipedia.org/wiki/PH', 'https://www.usgs.gov/media/images/ph-scale-0', 'https://www.britannica.com/science/pH', 'https://www.usgs.gov/special-topics/water-science-school/science/ph-and-water', 'https://www.epa.gov/goldkingmine/what-ph', 'https://www.healthline.com/health/whats-so-important-about-skin-ph', 'https://www.epa.gov/caddis/ph', 'https://www.hud.gov/program_offices/public_indian_housing/programs/ph', 'https://arxiv.org/list/hep-ph/new', 'https://www.phnashville.com/', 'https://debeaumont.org/phwins/what-is-phwins/', 'https://www.docmartins.com/', 'https://www.apple.com/ph-edu/store', 'https://www.qld.gov.au/environment/land/management/soil/soil-properties/ph-levels', 'https://www.astho.org/topic/leadership-and-workforce-development/ph-hero/ph-workforce-resources/', 'https://debeaumont.org/phwins/2021-findings/', 'https://extension.psu.edu/understanding-soil-ph', 'https://finance.yahoo.com

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 91/98 - keyword: data_recording, tfidf: 0.0
All links:  ['https://www.reddit.com/r/Genshin_Impact/comments/17zdepo/graph_adversarial_technology_experiment_log_day_4/', 'http://ssep.ncesse.org/current-flight-opportunities/ssep-mission-4-to-the-international-space-station-iss/ssep-mission-4-to-iss-flight-phase-operations/ssep-mission-4-to-iss-experiment-log/', 'https://www.reddit.com/r/Genshin_Impact/comments/17xbrdp/graph_adversarial_technology_experiment_log_day_1/', 'https://scp-wiki.wikidot.com/experiment-log-040', 'https://www.elabjournal.com/doc/ExperimentLog.html', 'https://steemit.com/gardening/@englishtchrivy/follow-my-experiment-log-on-planting-chestnuts-and-walnuts-from-seeds', 'https://hypixel.net/threads/scp-8986-experiment-log.5678923/', 'https://www.raptitude.com/experiment-log-no-30-full-time-stoicism/', 'https://www.deviantart.com/ryuko-rose/art/Experiment-Log-33-867466201', 'https://www.raptitude.com/experiment-log-no-6-recording-how-i-use-my-time/', 'https://

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 94/98 - keyword: step_details, tfidf: 0.004216869700751758
All links:  ['https://www.merriam-webster.com/thesaurus/specifics', 'https://dictionary.cambridge.org/dictionary/english/specifics', 'https://www.merriam-webster.com/dictionary/specific', 'https://mousespecifics.com/', 'http://www.ipni.net/specifics-en', 'https://www.thesaurus.com/browse/specifics', 'https://www.iatfglobaloversight.org/oem-requirements/customer-specific-requirements/', 'https://www.ebay.com/sellercenter/listings/item-specifics', 'https://education.mn.gov/MDE/dse/test/spec/', 'https://www.modot.org/missouri-standard-specifications-highway-construction', 'https://www.fdot.gov/programmanagement/implemented/specbooks/default.shtm', 'https://wsdot.wa.gov/engineering-standards/all-manuals-and-standards/manuals/standard-specifications-road-bridge-and-municipal-construction', 'https://www.dpi.nc.gov/documents/accountability/testing/eog/eog-mathematics-grades-3-8-test-specifications', 'https://ogs.ny.gov/green

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 97/98 - keyword: recordings, tfidf: 0.056415928092076406
All links:  ['https://roads.dot.ca.gov/', 'https://droughtmonitor.unl.edu/', 'https://www.cdc.gov/coronavirus/2019-ncov/need-extra-precautions/people-with-medical-conditions.html', 'https://www.nps.gov/yell/planyourvisit/conditions.htm', 'https://neighbourhood-enlargement.ec.europa.eu/enlargement-policy/conditions-membership_en', 'https://www.va.gov/resources/the-pact-act-and-your-va-benefits/', 'https://www.hrsa.gov/provider-relief/compliance/terms-conditions', 'https://www.tsa.gov/travel/tsa-cares', 'https://policies.google.com/terms?hl=en-US', 'https://www.mayoclinic.org/diseases-conditions', 'https://www.drought.gov/current-conditions', 'http://511.alaska.gov/', 'http://traveler.modot.org/', 'https://www.airnow.gov/', 'https://www.timberlinelodge.com/conditions', 'https://www.ercot.com/gridmktinfo/dashboards', 'https://www.weather.gov/', 'https://quickmap.dot.ca.gov/', 'https://tripcheck.com/', 'https://www.nsf.gov/

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 12/93 - keyword: validity, tfidf: 0.08168434883728169
All links:  ['https://results.org/', 'https://results.usatf.org/', 'https://results.elections.europa.eu/', 'https://results.lavote.gov/', 'https://www.imdb.com/title/tt3824412/', 'https://results.leonetiming.com/?mid=7114', 'https://www.fiaformulae.com/en/results', 'https://pennrelaysonline.com/Results/schedule.aspx?t=d&d=s', 'https://godrakebulldogs.com/sports/2015/11/24/DR_1124152133.aspx', 'https://www.treasurydirect.gov/auctions/announcements-data-results/', 'https://vizhub.healthdata.org/gbd-results/', 'https://flashresults.ncaa.com/Outdoor/2024/index.htm', 'https://www.sos.state.mn.us/elections-voting/election-results', 'https://candidates.cambridgeenglish.org/', 'https://flashresults.ncaa.com/Outdoor/2024/FirstRounds/West/index.htm', 'https://live.pttiming.com/?mid=7281', 'https://seer.cancer.gov/', 'https://www.merriam-webster.com/dictionary/result', 'https://www.flashresults.com/', 'https://www.nyrr.org/tcsnycmara

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 14/93 - keyword: measure, tfidf: 0.12550486649151787
All links:  ['https://samplefocus.com/', 'https://www.youtube.com/channel/UC2QrsAs1AKG4w2v0950SC2A', 'https://www.whosampled.com/', 'https://www.merriam-webster.com/dictionary/sample', 'https://pandas.pydata.org/docs/dev/reference/api/pandas.DataFrame.sample.html', 'https://www.nasa.gov/missions/osiris-rex/surprising-phosphate-finding-in-nasas-osiris-rex-asteroid-sample/', 'https://www.niaid.nih.gov/grants-contracts/sample-applications', 'https://twitter.com/iansample?lang=en', 'https://www.mongodb.com/docs/manual/reference/operator/aggregation/sample/', 'https://www.dmv.ca.gov/portal/driver-education-and-safety/educational-materials/sample-driver-license-dl-knowledge-tests/', 'https://mvp.sos.ga.gov/s/', 'https://usa.ipums.org/usa-action/variables/SAMPLE', 'https://www.sec.gov/rules-regulations/staff-guidance/disclosure-guidance/sample-letter-companies-regarding-climate', 'https://en.wikipedia.org/wiki/Sampling_(statistics

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 16/93 - keyword: procedure, tfidf: 0.054361147770560184
All links:  ['https://www.merriam-webster.com/dictionary/improve', 'https://www.nichd.nih.gov/research/supported/IMPROVE', 'https://vista.cira.colostate.edu/Improve/', 'https://datascience.cancer.gov/collaborations/nci-department-energy-collaborations/improve', 'https://www.modot.org/improvei70/columbiakingdomcity', 'https://detroitmi.gov/ImproveDetroit', 'https://www.modot.org/improvei70/warrentonwentzville', 'https://www.dictionary.com/browse/improve', 'https://www.gottman.com/blog/want-to-improve-your-relationship-start-paying-more-attention-to-bids/', 'https://support.microsoft.com/en-us/office/improve-accessibility-with-the-accessibility-checker-a16f6de0-2f39-4a2b-8bd8-5ad801426c7f', 'https://improve81.vdot.virginia.gov/', 'https://fw.ky.gov/Wildlife/Pages/Improve-Your-Land-for-Wildlife.aspx', 'https://www.uscis.gov/newsroom/news-releases/uscis-increases-automatic-extension-of-certain-employment-authorization-docume

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 18/93 - keyword: a, tfidf: 0.0
All links:  ['https://data.gov/', 'https://data.worldbank.org/', 'https://data.org/', 'https://en.wikipedia.org/wiki/Data', 'https://www.merriam-webster.com/dictionary/data', 'https://covid.cdc.gov/', 'https://ourworldindata.org/', 'https://data.census.gov/', 'https://datacatalog.worldbank.org/', 'https://commission.europa.eu/law/law-topic/data-protection_en', 'https://lookerstudio.google.com/', 'https://www.imf.org/en/Data', 'https://www.ndbc.noaa.gov/', 'https://commission.europa.eu/law/law-topic/data-protection/data-protection-eu_en', 'https://developers.google.com/search/docs/appearance/structured-data', 'https://data.cityofchicago.org/', 'https://mpdc.dc.gov/ko/page/district-crime-data-glance', 'https://ddialliance.org/', 'https://www.census.gov/data', 'https://datacarpentry.org/']
Progress: 19/93 - keyword: data, tfidf: 0.19522144865228608
All links:  ['https://www.merriam-webster.com/dictionary/type', 'https://docs.python.org/3/library/ty

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://pittsburghpa.gov/dpw/plastic-bag-ban: HTTPSConnectionPool(host='pittsburghpa.gov', port=443): Max retries exceeded with url: /dpw/plastic-bag-ban (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1122)')))
Failed to scrape https://law.lis.virginia.gov/vacode/title58.1/chapter17/section58.1-1745/: HTTPSConnectionPool(host='law.lis.virginia.gov', port=443): Max retries exceeded with url: /vacode/title58.1/chapter17/section58.1-1745/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1122)')))
Failed to scrape https://www.b-plastic.com/en/homepage/: HTTPSConnectionPool(host='www.b-plastic.com', port=443): Read timed out.
Progress: 26/93 - keyword: plastic B, tfidf: 0.06220210779255783
All links:  ['https://www.hayabusa.org/forum/threads/chain-length-guide-f

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 32/93 - keyword: improved, tfidf: 0.03805890677055101
All links:  ['https://experiment.com/', 'https://experimentbeauty.com/', 'https://en.wikipedia.org/wiki/Experiment', 'https://labs.google/', 'https://www.merriam-webster.com/dictionary/experiment', 'https://atlas.cern/', 'https://experiments.withgoogle.com/collection/chrome', 'https://www.imdb.com/title/tt0250258/', 'https://aspe.hhs.gov/reports/overview-final-report-seattle-denver-income-maintenance-experiment', 'https://www.prisonexp.org/conclusion', 'https://www.wnycstudios.org/podcasts/experiment', 'https://www.cdc.gov/tuskegee/timeline.htm', 'https://cals.cornell.edu/agricultural-experiment-station', 'https://hbr.org/2010/04/column-why-businesses-dont-experiment', 'https://www.prisonexp.org/', 'https://support.google.com/google-ads/answer/6261395?hl=en', 'https://theexperimentpublishing.com/', 'https://www.uaf.edu/afes/places/fairbanks/', 'https://journals.ametsoc.org/view/journals/bams/93/4/bams-d-11-00094.1.xml', 'h

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 34/93 - keyword: height, tfidf: 0.11983579526970867
All links:  ['https://www.plasticfreejuly.org/', 'https://en.wikipedia.org/wiki/Plastic', 'https://plasticbank.com/', 'https://www.plasticpollutioncoalition.org/', 'https://www.merriam-webster.com/dictionary/plastic', 'https://plasticoceans.org/', 'https://ourworldindata.org/plastic-pollution', 'https://plasticoceans.org/the-facts/', 'https://www.britannica.com/science/plastic', 'https://www.sciencehistory.org/education/classroom-activities/role-playing-games/case-of-plastics/history-and-future-of-plastics/', 'https://endplasticwaste.org/', 'https://matmos.bandcamp.com/album/plastic-anniversary', 'https://journals.lww.com/plasreconsurg/pages/default.aspx', 'https://www.plasticscm.com/', 'https://plastic.design/', 'https://www.instagram.com/plasticjesus/?hl=en', 'https://www.plasticsurgery.org/news/plastic-surgery-statistics?sub=2015+Plastic+Surgery+Statistics', 'https://www.charleston-sc.gov/1911/Environmentally-Acceptable-P

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 36/93 - keyword: repeat, tfidf: 0.07198328396706062
All links:  ['https://apps.apple.com/us/app/measure/id1383426740', 'https://huyenchip.com/2024/04/17/personal-growth.html', 'https://www.fcc.gov/general/measuring-broadband-america', 'https://www.pmeasuring.com/', 'https://www.oecd-ilibrary.org/trade/measuring-distortions-in-international-markets_8fe4491d-en', 'https://www.merriam-webster.com/dictionary/measure', 'https://smartgrowthamerica.org/resources/measuring-sprawl-2014/', 'https://www.emerald.com/insight/publication/issn/1368-3047', 'https://play.google.com/store/apps/details?id=com.jrsoftworx.ruler&hl=en_US', 'https://www.imf.org/en/Publications/WP/Issues/2020/07/17/Measuring-Social-Unrest-Using-Media-Reports-49573', 'https://thenewpress.com/books/measuring-what-counts', 'https://www.itu.int/en/ITU-D/Statistics/Pages/publications/mis2017.aspx', 'https://www.statsamerica.org/distress/', 'https://www.measuringworth.com/', 'https://gpm.nasa.gov/missions/trmm', 'https://

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://gpm.nasa.gov/missions/trmm: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
Progress: 37/93 - keyword: measuring, tfidf: 0.05241524428977248
All links:  ['https://www.amount.com/', 'https://www.merriam-webster.com/dictionary/amount', 'https://studentaid.gov/understand-aid/types/loans/subsidized-unsubsidized', 'https://www.va.gov/disability/compensation-rates/veteran-rates/', 'https://studentaid.gov/understand-aid/types/grants/pell', 'https://americorps.gov/members-volunteers/segal-americorps-education-award/find-out-more', 'https://otda.ny.gov/programs/snap/', 'https://edd.ca.gov/en/unemployment/UI-Calculator/', 'https://www.irs.gov/individuals/tax-withholding-estimator', 'https://www.ers.usda.gov/publications/pub-details/?pubid=43836', 'https://dictionary.cambridge.org/dictionary/english/amount', 'https://www.hhs.texas.gov/services/food/snap-food-benefits', 'https://dpi.wi.gov/parental-education-options/choice-programs/payment-amo

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 63/93 - keyword: improved, tfidf: 0.038058906770545076
All links:  ['https://www.merriam-webster.com/dictionary/conduct', 'https://grants.nih.gov/grants/guide/notice-files/NOT-OD-10-019.html', 'https://www.supremecourt.gov/about/code-of-conduct-for-justices.aspx', 'https://www.uscourts.gov/judges-judgeships/code-conduct-united-states-judges', 'https://www.americanbar.org/groups/professional_responsibility/publications/model_rules_of_professional_conduct/model_rules_of_professional_conduct_table_of_contents/', 'https://www.ifrc.org/our-promise/do-good/code-conduct-movement-ngos', 'https://www.apa.org/ethics/code', 'https://www.insurance.nd.gov/tools-legal/market-conduct-examinations', 'https://help.x.com/en/rules-and-policies/hateful-conduct-policy', 'https://statutes.capitol.texas.gov/Docs/PE/htm/PE.42.htm', 'https://www.uc.edu/campus-life/conduct/student-code-of-conduct.html', 'https://www.dictionary.com/browse/conduct', 'https://www.courts.wa.gov/court_rules/?fa=court_rules

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 64/93 - keyword: conduct, tfidf: 0.087154690897399
All links:  ['https://www.more.com/', 'https://www.merriam-webster.com/dictionary/more', 'https://www.instagram.com/moretheband/?hl=en', 'https://www.more-empowerment.org/', 'https://www.youtube.com/c/LessJunkMoreJourney', 'https://www.totalwine.com/', 'https://studentaid.gov/understand-aid/types/loans/subsidized-unsubsidized', 'https://www.moretheband.com/', 'https://www.hersheypark.com/explore-the-park/rides/', 'https://www.mcdonalds.com/us/en-us.html', 'https://www.cdc.gov/index.html', 'https://www.wellsfargo.com/', 'https://support.microsoft.com/en-us', 'https://www.anthem.com/', 'https://www.dominos.com/en/', 'https://www.hilton.com/en/hilton-honors/', 'https://one.google.com/about/', 'https://www.consilium.europa.eu/media/ny3j24sm/much-more-than-a-market-report-by-enrico-letta.pdf', 'https://www.amtrak.com/home', 'https://policies.google.com/technologies/cookies?hl=en-US']
Failed to scrape https://www.more.com/: ('Conne

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 68/93 - keyword: validity, tfidf: 0.08168814631199176
All links:  ['https://www.amazon.com/weights/s?k=weights', 'https://wandb.ai/site', 'https://www.walmart.com/browse/sports-outdoors/weights/4125_4134_1026285_1078284', 'https://www.dickssportinggoods.com/c/weight-lifting-equipment', 'https://www.tkostrength.com/pages/free-weights', 'https://blogs.loc.gov/law/2015/09/weights-and-measurements/', 'https://www.lakecountyca.gov/229/Weights-Measures-Programs', 'https://www.linkedin.com/in/chrisvanpelt', 'https://mda.maryland.gov/weights_measures/Pages/weights_measures.aspx', 'https://www.titleist.com.sg/teamtitleist/team-titleist/f/scotty-cameron-putters/36553/trouble-removing-weights', 'https://www.statalist.org/forums/forum/general-stata-discussion/general/1564268-using-weights-in-regression', 'https://texasagriculture.gov/Regulatory-Programs/Weights-and-Measures', 'https://www.somervillema.gov/departments/isd/weights-and-measures', 'https://texasagriculture.gov/Regulatory-Pro

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://studentaid.gov/manage-loans/repayment/plans: HTTPSConnectionPool(host='studentaid.gov', port=443): Read timed out. (read timeout=10)
Progress: 75/93 - keyword: different, tfidf: 0.10154337622175627
All links:  ['https://www.merriam-webster.com/dictionary/initial', 'https://www.rentokil-initial.com/', 'https://www.dictionary.com/browse/initial', 'https://www.uc.pa.gov/unemployment-benefits/file/pages/file%20an%20initial%20claim.aspx', 'https://github.com/tailwindlabs/tailwindcss/discussions/12490', 'https://www.utm.edu/academics/majors-and-programs/msed-teaching-initial-licensure-elementary', 'https://dictionary.cambridge.org/dictionary/english/initial', 'https://www.thesaurus.com/browse/initial', 'https://trailhead.salesforce.com/trailblazer-community/feed/0D54S00000A8OdASAV', 'https://developer.mozilla.org/en-US/docs/Web/CSS/initial', 'https://education.delaware.gov/legacy/home/educators/licensure-and-certification/definitions/initial-license/', 'https://en.wi

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 76/93 - keyword: initial, tfidf: 0.08755623618791858
All links:  ['https://www.merriam-webster.com/dictionary/detailed', 'https://detailed.com/', 'https://www.detailedimage.com/', 'https://echo.epa.gov/detailed-facility-report', 'https://dictionary.cambridge.org/dictionary/english/detailed', 'https://www.youtube.com/c/DetailedDream', 'https://www.samhsa.gov/data/report/2022-nsduh-detailed-tables', 'https://chromewebstore.google.com/detail/detailed-seo-extension/pfjdepjjfjjahkjfpkcgfmfhmnakjfba', 'https://www.reddit.com/r/MapPorn/comments/b50j1r/detailed_map_of_the_paris_catacombs_in_english/', 'https://foil.app.tn.gov/foil/details.jsp', 'https://community.southwest.com/t5/Travel-Policies/Detailed-Receipts/td-p/60966', 'https://www.census.gov/data/tables/2013/demo/2009-2013-lang-tables.html', 'https://grants.nih.gov/grants/how-to-apply-application-guide/format-and-write/develop-your-budget.htm', 'https://www.census.gov/library/visualizations/interactive/detailed-race-ethniciti

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 77/93 - keyword: detailed, tfidf: 0.04367533038237773
All links:  ['https://www.firstinspires.org/', 'https://www.first.org/', 'https://www.firstinspires.org/robotics/frc', 'https://www.fnb-online.com/', 'https://www.merriam-webster.com/dictionary/first', 'https://localfirstbank.com/', 'https://constitution.congress.gov/constitution/amendment-1/', 'https://www.firsthorizon.com/', 'https://firstwatch.com/', 'https://firsttee.org/', 'https://www.firstlegoleague.org/', 'https://www.bankatfirst.com/personal.html', 'https://www.firstsolar.com/', 'https://www.firstinterstatebank.com/', 'https://www.first-online.bank/', 'https://www.fhb.com/en/personal', 'https://firstbook.org/', 'https://www.firstrepublic.com/', 'https://students-residents.aamc.org/financial-aid', 'https://www.oldnational.com/']
Progress: 78/93 - keyword: first, tfidf: 0.15575558886946142
All links:  ['https://clinicaltrials.gov/', 'https://trialsjournal.biomedcentral.com/', 'https://www.who.int/clinical-trials-reg

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://cimea-diplome.it/: HTTPSConnectionPool(host='cimea-diplome.it', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1122)')))
Progress: 80/93 - keyword: comparability, tfidf: 0.07672724749024108
All links:  ['https://www.merriam-webster.com/dictionary/additional', 'https://www.additionfi.com/', 'https://www.merriam-webster.com/thesaurus/additional', 'https://www.dictionary.com/browse/additional', 'https://policies.google.com/terms/service-specific?hl=en-US', 'https://support.google.com/business/answer/9292476?hl=en', 'https://www.irs.gov/businesses/small-businesses-self-employed/questions-and-answers-for-the-additional-medicare-tax', 'https://www.cdc.gov/coronavirus/2019-ncov/vaccines/recommendations/immuno.html', 'https://www.uscis.gov/forms/filing-fees/additional-information-on-filing-a-fee-waiver', 'https://dor.m

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 81/93 - keyword: additional, tfidf: 0.07500894203626754
All links:  ['https://www.imdb.com/title/tt0936501/', 'https://en.wikipedia.org/wiki/Taken_(film)', 'https://community.fly.io/t/name-already-taken/5559', 'https://www.dropboxforum.com/t5/Settings-and-Preferences/Email-address-already-taken/td-p/587093', 'https://community.fly.io/t/name-has-already-been-taken-when-i-have-no-apps/16077', 'https://www.dropboxforum.com/t5/Settings-and-Preferences/Changing-Email-says-email-already-taken/td-p/739099', 'https://www.newyorker.com/magazine/2013/08/12/taken', 'https://dunn.medium.com/the-risk-not-taken-40cf0a8919cb', 'https://embowman.com/books/taken/', 'https://www.nytimes.com/2010/10/07/opinion/07Wideman.html', 'https://www.amazon.com/Taken-Single-Disc-Extended-Liam-Neeson/dp/B001TODCII', 'https://discuss.python.org/t/pypi-name-taken-advice/3446', 'https://forum.gitlab.com/t/path-has-already-been-taken/34766', 'https://www.tfewines.com/our-brands/taken/', 'https://www.merriam-we

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 88/93 - keyword: investigation, tfidf: 0.03797280861048054
All links:  ['https://order.design/', 'https://www.order.co/', 'https://www.doi.gov/sites/doi.gov/files/elips/documents/so-3395-signed.pdf', 'https://www.whitehouse.gov/briefing-room/presidential-actions/2023/10/30/executive-order-on-the-safe-secure-and-trustworthy-development-and-use-of-artificial-intelligence/', 'https://www.doi.gov/sites/doi.gov/files/elips/documents/so-3401-comprehensive-analysis-and-temporary-halt-on-all-activitives-in-the-arctic-national-wildlife-refuge-relating-to-the-coastal-plain-oil-and-gas-leasing-program.pdf', 'https://www.merriam-webster.com/dictionary/order', 'https://www2.illinois.gov/Documents/ExecOrders/2020/ExecutiveOrder-2020-10.pdf', 'https://gov.georgia.gov/executive-action/executive-orders/2020-executive-orders', 'https://www.flgov.com/wp-content/uploads/orders/2020/EO_20-91.pdf', 'https://files.nc.gov/governor/documents/files/EO121-Stay-at-Home-Order-3.pdf', 'https://www.cdph.ca

  k = self.parse_starttag(i)
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www2.illinois.gov/Documents/ExecOrders/2020/ExecutiveOrder-2020-10.pdf: The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.

Original exception(s) from parser:
 expected name token at '<![�\x00��\x1d6���HC�|���,'


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 89/93 - keyword: order, tfidf: 0.07650564544926584
All links:  ['https://www.yourheights.com/', 'https://heights.org/', 'https://www.sterlingheights.gov/', 'https://www.theheightstoledo.com/', 'https://www.heightsarts.org/', 'https://theheightsithaca.com/', 'https://www.shakerheightsoh.gov/', 'https://houstonheights.org/', 'https://www.visithoustontexas.com/about-houston/neighborhoods/houston-heights/', 'https://www.heightsandhills.org/', 'https://clevelandheights.gov/', 'https://www.heightsplatform.com/', 'https://www.universityheights.com/', 'http://www.heightstheater.com/', 'https://www.imdb.com/title/tt0382073/', 'https://theheightstheater.com/', 'https://www.eurekaheights.com/', 'https://www.youtube.com/@newheightshow', 'https://www.heightsmercantile.com/', 'https://www.alamoheightstx.gov/']
Progress: 90/93 - keyword: heights, tfidf: 0.097371762508214
All links:  ['https://www.merriam-webster.com/dictionary/accurately', 'https://dictionary.cambridge.org/dictionary/englis

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 91/93 - keyword: accurately, tfidf: 0.04740236835531356
All links:  ['https://www.amount.com/', 'https://www.merriam-webster.com/dictionary/amount', 'https://studentaid.gov/understand-aid/types/loans/subsidized-unsubsidized', 'https://www.va.gov/disability/compensation-rates/veteran-rates/', 'https://studentaid.gov/understand-aid/types/grants/pell', 'https://americorps.gov/members-volunteers/segal-americorps-education-award/find-out-more', 'https://otda.ny.gov/programs/snap/', 'https://edd.ca.gov/en/unemployment/UI-Calculator/', 'https://www.irs.gov/individuals/tax-withholding-estimator', 'https://www.ers.usda.gov/publications/pub-details/?pubid=43836', 'https://dictionary.cambridge.org/dictionary/english/amount', 'https://www.hhs.texas.gov/services/food/snap-food-benefits', 'https://dpi.wi.gov/parental-education-options/choice-programs/payment-amounts-frequently-asked-questions', 'https://community.concur.com/t5/Concur-Expense/Refund-expenses-negative-amount/m-p/39403', 'htt

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 7/95 - keyword: specialist, tfidf: 0.07720580872908468
All links:  ['https://www.generalist.com/', 'https://www.linkedin.com/pulse/you-might-generalist-milly-tamati', 'https://en.wikipedia.org/wiki/Generalist', 'https://www.forbes.com/sites/danpontefract/2017/02/15/dont-be-afraid-to-call-yourself-a-neo-generalist/', 'https://www.nlm.nih.gov/NIHbmic/generalist_repositories.html', 'https://twitter.com/millytamati?lang=en', 'https://datascience.nih.gov/data-ecosystem/generalist-repository-ecosystem-initiative', 'https://brownschool.wustl.edu/academics/master-of-public-health/generalist-concentration/', 'https://sharing.nih.gov/data-management-and-sharing-policy/sharing-scientific-data/generalist-repositories', 'https://www.merriam-webster.com/dictionary/generalist', 'https://ochagavia.nl/blog/the-undercover-generalist/', 'https://www.reddit.com/r/brotato/comments/14z3c4g/how_to_make_generalist_work_in_d5/', 'https://thegeneralist.store/', 'https://www.reddit.com/r/civ/comments/x

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://ocfs.ny.gov/programs/fostercare/education.php: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 11/95 - keyword: stability, tfidf: 0.07427321969033603
All links:  ['https://www.change.org/', 'https://www.ipcc.ch/', 'https://unfccc.int/', 'https://www.changehealthcare.com/', 'https://www.merriam-webster.com/dictionary/change', 'https://www.un.org/en/climatechange/what-is-climate-change', 'https://colorofchange.org/', 'https://www.uscis.gov/addresschange', 'https://science.nasa.gov/climate-change/', 'https://www.dmv.pa.gov/Driver-Services/Driver-Licensing/pages/change-your-name-or-address.aspx', 'https://apps.ilsos.gov/addrchange/', 'https://www.dmv.virginia.gov/online-services/address-change', 'https://egov.uscis.gov/coa/displayCOAForm.do', 'https://www.southwest.com/air/change/', 'https://developer.mozilla.org/en-US/docs/Web/API/HTMLElement/change_event', 'https://climateknowledgeportal.worldbank.org/home', 'https://www.va.gov/education/apply-for-education-benefits/application/1995/', 'https://travel.state.gov/content/travel/en/passports/have-passport/change-correct.htm

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.united.com/: HTTPSConnectionPool(host='www.united.com', port=443): Read timed out. (read timeout=10)
Progress: 22/95 - keyword: continental United States, tfidf: 0.05821795567717525
All links:  ['https://www.unep.org/node', 'https://www.merriam-webster.com/dictionary/environment', 'https://en.wikipedia.org/wiki/Natural_environment', 'https://www.eea.europa.eu/en', 'https://www.sciencedirect.com/journal/environment-international', 'https://environment.ec.europa.eu/index_en', 'https://www.apple.com/environment/', 'https://environmentamerica.org/', 'https://mde.maryland.gov/', 'https://www.nationalgeographic.com/environment', 'https://www.epw.senate.gov/', 'https://www.gov.uk/government/organisations/environment-agency', 'https://www.epa.gov/home', 'https://unep.org/interactive/beat-plastic-pollution/', 'https://www.lionsclubs.org/en/start-our-global-causes/environment', 'https://www.sfenvironment.org/', 'https://woods.stanford.edu/', 'https://www.bloomberg.or

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 25/95 - keyword: threat, tfidf: 0.08259975680621848
All links:  ['https://en.wikipedia.org/wiki/Native_species', 'https://www.cityofmissionviejo.org/departments/animal-services/native-vs-non-native-species', 'https://dnrec.delaware.gov/delaware-native-species-commission/', 'https://www.agriculture.pa.gov/Plants_Land_Water/PlantIndustry/GISC/Pages/PA-Native-Species-Day.aspx', 'https://www.usda.gov/peoples-garden/gardening-advice/why-native-species-matter', 'https://phys.org/tags/native+species/', 'https://e360.yale.edu/features/native-species-or-invasive-the-distinction-blurs-as-the-world-warms', 'https://marinedebris.noaa.gov/why-marine-debris-problem/non-native-species-transport', 'https://longwoodgardens.org/events-performances/events/pennsylvania-native-species-day', 'https://www.audubon.org/native-plants', 'https://documents.ats.aq/ATCM39/ww/atcm39_ww009_e.pdf', 'https://study.com/learn/lesson/native-indigenous-species.html', 'https://wvdnr.gov/plants-animals/native-speci

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 26/95 - keyword: native species, tfidf: 0.10467446380006049
All links:  ['https://climate.com/', 'http://www.climate.gov/', 'https://www.climatecentral.org/', 'https://www.climateweeknyc.org/', 'https://www.theclimategroup.org/', 'https://www.noaa.gov/climate', 'https://www.merriam-webster.com/dictionary/climate', 'https://www.cpc.ncep.noaa.gov/', 'https://www.climaterealityproject.org/', 'https://science.nasa.gov/climate-change/', 'https://www.theclimatepledge.com/', 'https://www.ipcc.ch/', 'https://www.climateactionreserve.org/', 'https://www.climateaction100.org/', 'https://www.un.org/en/climatechange/what-is-climate-change', 'https://citizensclimatelobby.org/', 'https://screeningtool.geoplatform.gov/', 'https://toolkit.climate.gov/content/us-climate-resilience-toolkit', 'https://www.cif.org/', 'https://unfccc.int/process-and-meetings/the-paris-agreement']
Progress: 27/95 - keyword: climate, tfidf: 0.18103531110773094
All links:  ['https://www.merriam-webster.com/dictionar

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 30/95 - keyword: Key Largo wood rat, tfidf: 0.03666251506470727
All links:  ['https://www.usgs.gov/programs/national-geospatial-program/national-map', 'https://earthquake.usgs.gov/earthquakes/map/', 'https://store.usgs.gov/node/358', 'https://ngmdb.usgs.gov/topoview/viewer/', 'https://earthexplorer.usgs.gov/', 'https://maps.usgs.gov/map/', 'https://apps.nationalmap.gov/downloader/', 'https://maps.waterdata.usgs.gov/', 'https://waterwatch.usgs.gov/?id=ww_flood', 'https://dashboard.waterdata.usgs.gov/', 'https://pubs.usgs.gov/sim/3292/', 'https://streamstats.usgs.gov/ss/', 'https://pubs.usgs.gov/publication/pp1395', 'https://waterdata.usgs.gov/nwis/rt', 'https://alaska.usgs.gov/science/geology/state_map/interactive_map/AKgeologic_map.html', 'https://webapps.usgs.gov/txgeology/', 'https://nawqatrends.wim.usgs.gov/Decadal/', 'https://livingatlas.arcgis.com/topomapexplorer/', 'https://eerscmap.usgs.gov/uswtdb/viewer/', 'https://astrogeology.usgs.gov/search']
Progress: 31/95 - keyw

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 32/95 - keyword: potential python habitat, tfidf: 0.03770492827802455
All links:  ['https://www.myflorida.com/', 'https://en.wikipedia.org/wiki/Florida', 'https://www.ufl.edu/', 'https://www.visitflorida.com/', 'https://www.floridahealth.gov/', 'https://www.fiu.edu/', 'https://www.fsu.edu/', 'https://www.famu.edu/', 'https://www.fau.edu/', 'https://www.fdot.gov/', 'https://www.flhsmv.gov/', 'https://www.usf.edu/', 'https://www.floridajobs.org/', 'https://floridadep.gov/', 'https://www.ucf.edu/', 'https://www.legoland.com/florida/', 'https://www.floridablue.com/', 'https://www.fit.edu/', 'https://floridarevenue.com/Pages/default.aspx', 'https://dos.fl.gov/']
Failed to scrape https://www.flhsmv.gov/: HTTPSConnectionPool(host='www.flhsmv.gov', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1122)')))
Failed to scrape https://www.

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.fs.usda.gov/managing-land/urban-forests/ucf/nucfac: HTTPSConnectionPool(host='www.fs.usda.gov', port=443): Read timed out. (read timeout=10)
Progress: 36/95 - keyword: urbanized nation, tfidf: 0.022910869557340664
All links:  ['https://www.forbes.com/sites/richardkestenbaum/2024/04/10/the-pet-industry-is-doing-great-and-getting-worse/', 'https://www.americanpetproducts.org/research-insights/industry-trends-and-stats', 'https://www.bloomberg.com/company/press/global-pet-industry-to-grow-to-500-billion-by-2030-bloomberg-intelligence-finds/', 'https://www.pida.org/', 'https://en.wikipedia.org/wiki/Pet_industry', 'https://globalpetindustry.com/', 'https://commonthreadco.com/blogs/coachs-corner/pet-industry-trends-growth-ecommerce-marketing', 'https://petadvocacy.org/', 'https://www.linkedin.com/in/phillip-cooper-2363721a', 'https://explodingtopics.com/blog/pet-industry-trends', 'https://www.statista.com/statistics/253976/pet-food-industry-expenditure-in-the-us/

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 40/95 - keyword: ecophobia, tfidf: 0.07517628125045962
All links:  ['https://www.imdb.com/title/tt13274016/', 'https://en.wikipedia.org/wiki/Reptile_(film)', 'https://www.netflix.com/title/81463014', 'https://en.wikipedia.org/wiki/Reptile', 'https://twitter.com/reptile9lol?lang=en', 'https://www.reptilegardens.com/', 'https://www.youtube.com/watch?v=KS1cNkZ9o1U', 'https://arxiv.org/abs/1803.02999', 'http://www.reptile-database.org/', 'https://reptilesun.com/', 'https://gulfarium.com/reptile-safari/', 'https://github.com/f0rb1dd3n/Reptile', 'https://reptile-database.reptarium.cz/', 'https://www.fernbankmuseum.org/events/calendar-of-events/2024/07/20-reptile-day/', 'https://travelingworldofreptiles.com/meet-the-staff/', 'https://www.reptileforums.co.uk/', 'https://www.facebook.com/emreptilefamilyllc/', 'https://www.thereptilezoo.org/', 'https://www.seattlereptileguy.com/party-packages', 'https://showmereptileshow.com/']
Progress: 41/95 - keyword: reptile, tfidf: 0.0888071995540

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 57/95 - keyword: US Fish and Wildlife Service, tfidf: 0.10010231577808262
All links:  ['https://nationalzoo.si.edu/animals/fiji-banded-iguana', 'https://en.wikipedia.org/wiki/Fiji_crested_iguana', 'https://photographylife.com/photographing-critically-endangered-fijian-iguanas', 'https://science.sandiegozoo.org/species/fiji-iguana', 'https://www.iguanafoundation.org/what-we-support/fiji-iguanas/central-fiji-banded-iguana/', 'https://www.sciencedaily.com/releases/2010/01/100111155112.htm', 'https://www.iguanafoundation.org/what-we-support/fiji-iguanas/lau-banded-iguana/', 'https://sdzwildlifeexplorers.org/animals/fiji-iguana', 'https://www.zoochat.com/community/media/fiji-island-banded-iguana.443553/', 'https://tulsazoo.org/animal/fiji-island-banded-iguana/', 'https://www.reptilecentre.com/pages/info-fiji-iguana-care-sheet', 'https://www.iucn-isg.org/latest-news/fijian-crested-iguanas-reintroduction/', 'https://www.wesaidgotravel.com/tell-me-more-about-fiji/', 'https://www.isla

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 68/95 - keyword: outer space, tfidf: 0.09289847023182922
All links:  ['https://fla-keys.com/', 'https://en.wikipedia.org/wiki/Florida_Keys', 'https://floridakeysbrewingco.com/', 'https://floridakeys.noaa.gov/', 'https://www.visitflorida.com/places-to-go/southeast/florida-keys/', 'https://floridakeys.com/', 'https://fkec.com/', 'https://fla-keys.co.uk/', 'https://sanctuaries.noaa.gov/vr/florida-keys/', 'https://fkspca.org/', 'https://www.keysdiscovery.com/', 'https://www.hawkscay.com/', 'http://keysaudubon.org/', 'https://www.fkcs.org/', 'https://cfk.edu/', 'https://www.monroecounty-fl.gov/', 'https://keysarts.com/', 'https://www.littlepalmisland.com/', 'https://cffk.org/', 'https://www.britannica.com/place/Florida-Keys']
Progress: 69/95 - keyword: Florida Keys, tfidf: 0.12002029600284471
All links:  ['https://en.wikipedia.org/wiki/Transmitter', 'https://simsettlements.com/site/index.php?threads/castle-build-limit-duplicate-radio-transmitter-solved.22415/', 'https://community.

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 75/95 - keyword: organisms, tfidf: 0.062104301671003434
All links:  ['https://en.wikipedia.org/wiki/Predation', 'https://agsci.colostate.edu/agbio/ipm/natural-enemies-of-pests/', 'https://www.fas.scot/environment/biodiversity/natural-predators/', 'https://www.reddit.com/r/explainlikeimfive/comments/lxy9mw/eli5_what_are_the_natural_predators_of_humans_if/', 'https://www.nps.gov/samo/learn/management/cats.htm', 'https://xerces.org/blog/planting-for-helpful-predators-how-natural-enemies-can-control-garden-pests-for-you', 'https://www.wbrparish.org/394/Natural-Mosquito-Predators', 'http://entomologytoday.org/2016/03/25/native-predators-may-be-having-a-larger-impact-than-expected-on-invasive-stink-bug/', 'https://www.canr.msu.edu/grapes/integrated_pest_management/natural-enemies-predators-parasites', 'https://extension.umd.edu/resource/tachinid-flies-and-other-natural-enemies-japanese-beetles', 'https://reelroyreviews.com/2024/01/12/no-natural-predators-well-almost-none-saltburn/'

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 80/95 - keyword: special environments, tfidf: 0.04760738659498315
All links:  ['https://experiments.withgoogle.com/collection/chrome', 'https://labs.google/', 'https://experiments.withgoogle.com/', 'https://home.cern/science/experiments', 'https://experiments.withgoogle.com/experiments', 'https://sciencebob.com/category/experiments/', 'https://musiclab.chromeexperiments.com/', 'https://support.google.com/google-ads/answer/6261395?hl=en', 'http://ssep.ncesse.org/', 'https://argo-rollouts.readthedocs.io/en/stable/features/experiment/', 'https://aws.amazon.com/sagemaker/experiments/', 'https://en.wikipedia.org/wiki/Experiment', 'https://www.youtube.com/channel/UC4gjodJXjGFVw-0R7bDmjrw', 'https://aspe.hhs.gov/reports/overview-final-report-seattle-denver-income-maintenance-experiment', 'https://www.youtube.com/@Odd_Experiments', 'https://www.fcc.gov/general/rural-broadband-experiments', 'https://launchdarkly.com/features/experimentation/', 'https://experimentsinopera.com/', 'https

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 83/95 - keyword: human being, tfidf: 0.052998331077446795
All links:  ['https://en.wikipedia.org/wiki/White-tailed_deer', 'https://nhpbs.org/natureworks/whitetaileddeer.htm', 'https://www.nationalgeographic.com/animals/mammals/facts/white-tailed-deer', 'https://www.pgc.pa.gov/Wildlife/WildlifeSpecies/White-tailedDeer/pages/default.aspx', 'https://dec.ny.gov/nature/animals-fish-plants/white-tailed-deer', 'https://dwr.virginia.gov/wildlife/information/white-tailed-deer/', 'https://mdc.mo.gov/discover-nature/field-guide/white-tailed-deer', 'https://dwr.virginia.gov/wildlife/deer/', 'https://extension.psu.edu/white-tailed-deer', 'https://dnrec.delaware.gov/fish-wildlife/hunting/white-tailed-deer/', 'https://www.in.gov/dnr/fish-and-wildlife/wildlife-resources/animals/white-tailed-deer-biology/', 'https://www.esf.edu/aec/adks/mammals/wtd.php', 'https://www.nature.com/articles/s41586-021-04353-x', 'https://www.ckwri.tamuk.edu/news-events/hunters-and-conservation-and-management-white

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 85/95 - keyword: raccoon, tfidf: 0.09734598195426107
All links:  ['https://en.wikipedia.org/wiki/Cockroach', 'https://www.cockroachlabs.com/', 'https://twitter.com/lsarsour/status/12319983351?lang=en', 'https://www.cockroachlabs.com/blog/why-join-cockroach-labs/', 'https://www.syngentapmp.com/product/advion-trio-cockroach-gel-bait-insecticide', 'https://entomology.ca.uky.edu/ef614', 'https://www.linkedin.com/pulse/cockroach-mentality-achmad-zaky', 'https://github.com/cockroachdb/cockroach', 'https://www.terminix.com/cockroaches/', 'https://www.bbc.com/future/article/20140918-the-reality-about-roaches', 'https://www.peta.org/living/humane-home/humane-cockroach-control/', 'https://www.orkin.com/pests/cockroaches', 'https://www.food4rhino.com/en/app/cockroach', 'https://www.reddit.com/r/Earwolf/comments/16xmpem/comedy_bang_bang_832_cockroach_hell_paul_f/', 'https://www.floridastateparks.org/parks-and-trails/cockroach-bay-preserve-state-park', 'https://irishcockroach.com/', 'http

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 91/95 - keyword: ecophobia, tfidf: 0.07517628125045962
All links:  ['https://www.science.org/', 'https://en.wikipedia.org/wiki/Science', 'https://www.science.org/journal/science', 'https://www.mos.org/', 'https://science.nasa.gov/', 'https://www.sciencenews.org/', 'https://www.sciencedaily.com/', 'https://www.nsf.gov/', 'https://sciencebasedtargets.org/', 'https://www.msichicago.org/', 'https://www.aaas.org/', 'https://www.sciencedirect.com/', 'https://new.smm.org/', 'https://www.nextgenscience.org/content/home-page', 'https://www.mst.edu/', 'https://www.nytimes.com/section/science', 'https://www.nysed.gov/standards-instruction/science', 'https://www.sciencechannel.com/', 'https://www.hmns.org/', 'https://www.saic.com/']
Progress: 92/95 - keyword: science, tfidf: 0.08811244927284319
All links:  ['https://www.indeed.com/career-advice/finding-a-job/what-is-policy-adviser', 'https://governor.wa.gov/office-governor/policy-advisors', 'https://www.franklin.edu/career-guide/politica

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 94/95 - keyword: Pet Industry Joint Advisory Council, tfidf: 0.027350425750225015
All links:  ['https://www.ucdavis.edu/climate/news/local-extinction-southern-california-mountain-lions-possible-within-50-years', 'https://esajournals.onlinelibrary.wiley.com/doi/abs/10.1002/fee.2552', 'https://onlinelibrary.wiley.com/doi/full/10.1111/zsc.12212', 'https://sustainability.stanford.edu/news/whats-likely-cause-human-extinction-and-how-can-we-avoid-it', 'https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.14126', 'https://amt.copernicus.org/articles/13/893/2020/', 'https://www.zsl.org/news-and-events/news/conservation-zoos-have-powerful-potential-reverse-extinction', 'https://peregrinefund.org/news-release/potential-extinction-raptor-species', 'https://www.reddit.com/r/asoiaf/comments/1dcm7zn/spoilers_extended_is_the_potential_extinction_of/', 'https://pubs.acs.org/doi/10.1021/acs.est.8b06173', 'https://acamh.onlinelibrary.wiley.com/doi/full/10.1111/jcpp.13814', 'h

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 7/95 - keyword: threat, tfidf: 0.08259975680621848
All links:  ['https://education.nationalgeographic.org/resource/ecosystem/', 'https://en.wikipedia.org/wiki/Ecosystem', 'https://www.britannica.com/science/ecosystem', 'https://australian.museum/learn/species-identification/ask-an-expert/what-is-an-ecosystem/', 'https://ecosystem-energy.com/', 'https://www.sciencedirect.com/journal/ecosystem-services', 'https://dataspace.copernicus.eu/', 'https://memory.ucsf.edu/research-trials/professional/care-ecosystem', 'https://www.ecosystemmarketplace.com/', 'https://www.fisheries.noaa.gov/alaska/ecosystems/ecosystem-status-reports-gulf-alaska-bering-sea-and-aleutian-islands', 'https://www.equinix.com/data-centers/ecosystem-discovery', 'http://www.climatehubs.usda.gov/ecosystem-services', 'https://www.wri.org/research/corporate-ecosystem-services-review', 'https://www.millenniumassessment.org/', 'https://dviyer.medium.com/my-role-in-a-social-change-ecosystem-a-mid-year-check-in-1d852589

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 15/95 - keyword: biologists, tfidf: 0.04420331297300088
All links:  ['https://www.habitat.org/', 'https://www.habitat.com/', 'https://www.habitat.org/restores', 'https://habitatmetrodenver.org/', 'https://en.wikipedia.org/wiki/Habitat', 'https://www.ashevillehabitat.org/home/', 'https://www.1hotels.com/south-beach/taste/habitat', 'https://www.habitatcincinnati.org/', 'https://www.habitatphiladelphia.org/', 'https://www.habitatla.org/', 'https://community.chef.io/tools/chef-habitat', 'https://habitatportlandregion.org/', 'https://habitatskateboards.com/', 'https://www.cvillehabitat.org/', 'https://www.clevelandhabitat.org/', 'https://www.tchabitat.org/', 'https://habitatforsyth.org/', 'https://www.habitat4humanity.org/', 'https://habitatgift.com/', 'https://habitatdane.org/']
Progress: 16/95 - keyword: habitat, tfidf: 0.145317420530979
All links:  ['https://www.nhl.com/predators/', 'https://www.imdb.com/title/tt1424381/', 'https://en.wikipedia.org/wiki/Predators_(film)', 'http

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 30/95 - keyword: trade, tfidf: 0.12599417796677626
All links:  ['https://www.census.gov/popclock/', 'https://www.worldometers.info/world-population/', 'https://data.worldbank.org/indicator/SP.POP.TOTL', 'https://population.un.org/wpp/', 'https://popcouncil.org/', 'https://www.prb.org/', 'https://ourworldindata.org/population-growth', 'https://data.oecd.org/pop/population.htm', 'https://www.un.org/development/desa/pd/', 'https://www.unfpa.org/', 'https://ofm.wa.gov/washington-data-research/population-demographics', 'https://en.wikipedia.org/wiki/Population', 'https://www.populationmedia.org/', 'https://ofm.wa.gov/sites/default/files/public/dataresearch/pop/april1/ofm_april1_population_final.pdf', 'https://populationconnection.org/', 'https://www.osbm.nc.gov/facts-figures/population-demographics/state-demographer/countystate-population-projections', 'https://data.census.gov/', 'https://bhw.hrsa.gov/workforce-shortage-areas/shortage-designation', 'https://opb.georgia.gov/census-

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 31/95 - keyword: population, tfidf: 0.12940030741690145
All links:  ['https://en.wikipedia.org/wiki/Natural_barrier', 'https://www.sciencedirect.com/topics/computer-science/natural-barrier', 'https://www.miamiwaterkeeper.org/natural_barriers_provide_effective_storm_protection', 'https://academic.oup.com/bioscience/article/57/2/102/228160', 'https://wildlife.onlinelibrary.wiley.com/doi/abs/10.2193/2007-450', 'https://www.fs.usda.gov/t-d/phys_sec/deter/index.htm', 'https://thehill.com/opinion/energy-environment/351260-natural-barriers-are-a-key-defense-against-storms-like-harvey-and/', 'https://www.preventionweb.net/news/these-natural-barriers-can-weaken-tsunami', 'https://www.reddit.com/r/PlanetZoo/comments/gayhin/tips_on_natural_barriers/', 'https://www.nber.org/papers/w28171', 'https://oceanservice.noaa.gov/facts/coral_protect.html', 'https://wwf.panda.org/wwf_news/?17672/Coral-reefs-and-mangroves-act-as-natural-barriers-against-tsunamis', 'https://oceanservice.noaa.gov/podc

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 40/95 - keyword: Key Largo wood rat, tfidf: 0.03660483913156897
All links:  ['https://www.regulations.gov/', 'https://www.fmcsa.dot.gov/regulations', 'https://wdfw.wa.gov/fishing/regulations', 'https://www.fmcsa.dot.gov/regulations/hours-service/summary-hours-service-regulations', 'https://wildlife.ca.gov/Regulations', 'https://www.ecfr.gov/', 'https://www.merriam-webster.com/dictionary/regulation', 'https://dec.ny.gov/things-to-do/freshwater-fishing/regulations', 'https://www.tsa.gov/travel/security-screening/whatcanibring/all', 'https://www.osha.gov/laws-regs', 'https://fwp.mt.gov/hunt/regulations', 'https://myfwc.com/fishing/saltwater/recreational/', 'https://www.who.int/health-topics/international-health-regulations', 'https://dsd.maryland.gov/Pages/COMARSearch.aspx', 'https://www.fppc.ca.gov/the-law/fppc-regulations/regulations-index.html', 'https://fwp.mt.gov/fish/regulations', 'https://www.faa.gov/regulations_policies/faa_regulations', 'https://www.transportation.gov/r

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 43/95 - keyword: microchip, tfidf: 0.07591789316306029
All links:  ['https://www.tsa.gov/travel/security-screening/identification', 'https://www.irs.gov/businesses/small-businesses-self-employed/apply-for-an-employer-identification-number-ein-online', 'https://travel.state.gov/content/travel/en/passports/how-apply/identification.html', 'https://www.irs.gov/businesses/small-businesses-self-employed/employer-id-numbers', 'https://www.ilsos.gov/publications/pdf_publications/dsd_x173.pdf', 'https://www.uscis.gov/i-9-central/form-i-9-acceptable-documents', 'https://www.votetexas.gov/mobile/id-faqs.htm', 'https://www.merriam-webster.com/dictionary/identification', 'https://cdp.dhs.gov/femasid', 'https://www.hhs.gov/hipaa/for-professionals/privacy/special-topics/de-identification/index.html', 'https://www.sos.state.co.us/pubs/elections/vote/acceptableFormsOfID.html', 'https://www.dps.texas.gov/section/driver-license/how-apply-texas-identification-card', 'http://visitmonmouth.com/Pag

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 46/95 - keyword: adapting, tfidf: 0.029116742750501124
All links:  ['https://climate.com/', 'http://www.climate.gov/', 'https://www.climatecentral.org/', 'https://www.climateweeknyc.org/', 'https://www.theclimategroup.org/', 'https://www.noaa.gov/climate', 'https://www.merriam-webster.com/dictionary/climate', 'https://www.cpc.ncep.noaa.gov/', 'https://www.climaterealityproject.org/', 'https://science.nasa.gov/climate-change/', 'https://www.theclimatepledge.com/', 'https://www.ipcc.ch/', 'https://www.climateactionreserve.org/', 'https://www.climateaction100.org/', 'https://www.un.org/en/climatechange/what-is-climate-change', 'https://citizensclimatelobby.org/', 'https://screeningtool.geoplatform.gov/', 'https://toolkit.climate.gov/content/us-climate-resilience-toolkit', 'https://www.cif.org/', 'https://unfccc.int/process-and-meetings/the-paris-agreement']
Progress: 47/95 - keyword: climate, tfidf: 0.18103328899489737
All links:  ['https://humanimpact.org/', 'https://www.thehum

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://mynasadata.larc.nasa.gov/interactive-models/human-impact-and-creation-urban-heat-islands: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))
Progress: 48/95 - keyword: human impact, tfidf: 0.09295839513295201
All links:  ['http://www.climatehubs.usda.gov/disturbances-and-stressors', 'https://www.nextgenscience.org/resources/middle-school-disruptions-ecosystems', 'https://www.tuck.dartmouth.edu/news/articles/ecosystem-disruption-when-industry-boundaries-collapse', 'https://www.sciencedirect.com/science/article/pii/S004873332300197X', 'https://www.britannica.com/science/ecological-disturbance', 'https://19january2017snapshot.epa.gov/climate-impacts/climate-impacts-ecosystems', 'https://news.ucsc.edu/2011/07/apex-consumers.html', 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3484782/', 'https://www.epa.gov/climateimpacts/climate-change-impacts-ecosystems', 'https://www.nature.com/articles/ncomms14163', 'https://www.linkedin.com/pulse/dua

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 50/95 - keyword: key species, tfidf: 0.07890825752584313
All links:  ['https://survivalinternational.org/', 'https://www.iiss.org/en/publications/survival/', 'https://www.redcross.org/get-help/how-to-prepare-for-emergencies/survival-kit-supplies.html', 'https://cran.r-project.org/package=survival', 'https://www.culturalsurvival.org/', 'https://mustangsurvival.com/', 'https://www.cancerresearchuk.org/health-professional/cancer-statistics/survival', 'https://www.merriam-webster.com/dictionary/survival', 'https://www.nybooks.com/online/2016/11/10/trump-election-autocracy-rules-for-survival/', 'https://cpw.state.co.us/learn/Pages/Survival.aspx', 'https://www.iucn.org/our-union/commissions/iucn-species-survival-commission-2021-2025', 'https://youngsurvival.org/', 'https://www.aza.org/species-survival-plan-programs', 'https://www.ready.gov/kit', 'https://www.survivalsystemsinc.com/', 'https://www.tandfonline.com/toc/tsur20/current', 'https://www.youtube.com/channel/UCA7RTjJU42Y3toO

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 55/95 - keyword: human introduction, tfidf: 0.05127243213475596
All links:  ['https://rep.bioscientifica.com/', 'https://en.wikipedia.org/wiki/Reproduction', 'http://ssr.org/', 'https://wsava.org/global-guidelines/reproduction-guidelines/', 'https://www.mfa.org/collections/mfa-images/licensing/reproduction-request-form', 'https://www.copyright.gov/circs/circ21.pdf', 'https://www.asrm.org/', 'https://www.plutobooks.com/9780745399881/social-reproduction-theory', 'https://www.archives.gov/research/order/fees', 'https://academic.oup.com/humrep', 'https://www.vetmed.ucdavis.edu/departments/phr', 'https://wwwnc.cdc.gov/eid/article/25/1/17-1901_article', 'https://laws-lois.justice.gc.ca/eng/acts/a-13.4/', 'https://www.britannica.com/science/reproduction-biology', 'https://raf.bioscientifica.com/', 'https://eservices.archives.gov/', 'https://www.vet.upenn.edu/veterinary-hospitals/NBC-hospital/services/reproduction', 'https://my.clevelandclinic.org/health/articles/9118-female-reproduc

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://wwwnc.cdc.gov/eid/article/25/1/17-1901_article: HTTPSConnectionPool(host='wwwnc.cdc.gov', port=443): Read timed out. (read timeout=10)
Progress: 56/95 - keyword: reproduction, tfidf: 0.06055021148045124
All links:  ['https://www.conservation.org/', 'https://mdc.mo.gov/', 'https://www.conservationfund.org/', 'https://www.iucn.org/', 'https://fsa.usda.gov/programs-and-services/conservation-programs/conservation-reserve-program/index', 'https://www.nrcs.usda.gov/programs-initiatives/csp-conservation-stewardship-program', 'https://fsa.usda.gov/programs-and-services/conservation-programs/index', 'https://www.wcs.org/', 'https://thesca.org/', 'https://dec.ny.gov/', 'https://www.iucnredlist.org/', 'https://www.usda.gov/topics/conservation', 'https://www.merriam-webster.com/dictionary/conservation', 'https://www.dcnr.pa.gov/', 'https://conbio.onlinelibrary.wiley.com/journal/15231739', 'https://conservationco.org/', 'https://www.culturalheritage.org/', 'https://conserva

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 60/95 - keyword: environmental balance, tfidf: 0.02118445667138133
All links:  ['https://www.urbanwildlifeinfo.org/', 'http://urbanwildlifegroup.org/urban-wildlife-information', 'https://www.fws.gov/program/urban-wildlife-conservation', 'https://en.wikipedia.org/wiki/Urban_wildlife', 'https://www.lpzoo.org/science-project/urban-wildlife-information-network/', 'https://www.urbanwildlifeandbeeremoval.com/', 'https://urbanwildliferehab.org/', 'https://georgiawildlife.com/urbanwildlifeprogram', 'https://www.lpzoo.org/conservation-science/science-centers/urban-wildlife-institute/', 'https://urbanwildliferesearchproject.org/', 'https://tpwd.texas.gov/wildlife/wildlife-diversity/urban-wildlife-program/', 'https://bouldercolorado.gov/services/urban-wildlife-policy-and-programs', 'https://www.kearns.utah.gov/community/page/urban-wildlife', 'https://www.providenceri.gov/parks/urban-wildlife-refuge/', 'https://dnr.wisconsin.gov/aid/UWDAC.html', 'https://www.cityofdavis.org/residents/urb

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 62/95 - keyword: controlled environment, tfidf: 0.06456340464454328
All links:  ['https://www.forbes.com/advisor/pet-insurance/pet-ownership-statistics/', 'https://www.avma.org/resources-tools/pet-owners/responsible-pet-ownership', 'https://undark.org/2024/06/24/fuzzy-science-pets-improve-health/', 'https://www.avma.org/resources-tools/reports-statistics/us-pet-ownership-statistics', 'https://www.reddit.com/r/philosophy/comments/12lzuua/the_case_against_pet_ownership/', 'https://www.pewresearch.org/short-reads/2023/07/07/about-half-us-of-pet-owners-say-their-pets-are-as-much-a-part-of-their-family-as-a-human-member/', 'https://www.reddit.com/r/petfree/comments/17c314w/latest_statistics_on_pet_ownership_in_the_us_its/', 'https://www.vox.com/future-perfect/2023/4/11/23673393/pets-dogs-cats-animal-welfare-boredom', 'https://www.funpawcare.com/2012/12/05/pet-ownership-right-or-privilege/', 'https://www.statista.com/statistics/308235/estimated-pet-ownership-in-the-united-kingdom-u

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 64/95 - keyword: illegal trade, tfidf: 0.07708919336482012
All links:  ['https://www.worldwildlife.org/threats/illegal-wildlife-trade', 'https://www.traffic.org/about-us/illegal-wildlife-trade/', 'https://www.ice.gov/features/wildlife', 'https://www.traffic.org/about-us/legal-wildlife-trade/', 'https://www.cbp.gov/trade/programs-administration/natural-resources-protection/wildlife-trafficking', 'https://www.nrdc.org/stories/wildlife-trade-101', 'https://wwf.panda.org/discover/our_focus/wildlife_practice/problems/illegal_trade/', 'https://en.wikipedia.org/wiki/Wildlife_trade', 'https://cites.org/eng/disc/what.php', 'https://www.zsl.org/what-we-do/conservation/protecting-species/illegal-wildlife-trade-crisis', 'https://www.dcceew.gov.au/environment/wildlife-trade', 'https://www.hsi.org/issues/wildlife-trade/', 'https://environment.ec.europa.eu/topics/nature-and-biodiversity/wildlife-trade_en', 'https://www.thegef.org/what-we-do/topics/illegal-wildlife-trade', 'https://foreignpo

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 65/95 - keyword: wild animal trade, tfidf: 0.06051623095220995
All links:  ['https://www.nature.com/articles/s41598-022-23287-6', 'https://steamcommunity.com/app/648350/discussions/1/1728701877470514069/?l=thai', 'https://link.springer.com/article/10.1007/s12237-008-9038-7', 'https://www.nature.com/articles/s41598-022-25981-x', 'https://pubmed.ncbi.nlm.nih.gov/36336704/', 'https://www.habitatmag.com/Publication-Content/Building-Operations/2020/2020-April/Clinton-Hill-Co-op-Conquers-Chronic-Heat-Imbalance', 'https://www.theguardian.com/environment/2009/jan/29/endangered-habitats-washington-state', 'https://www.epa.gov/report-environment/diversity-and-biological-balance', 'https://newsletter.x-mol.com/paperRedirect/1692214334036135936', 'https://seedsconnections.org/habitat-balance-0', 'https://www.theworldcounts.com/stories/impact-of-ecosystem-destruction', 'https://tc.copernicus.org/articles/15/233/2021/', 'https://19january2017snapshot.epa.gov/climate-impacts/climate-impacts

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 67/95 - keyword: invasive reptiles, tfidf: 0.06164199969916119
All links:  ['https://necsi.edu/predator-prey-relationships', 'https://www.snexplores.org/article/scientists-say-predator-and-prey-definition-pronunciation', 'https://biologydictionary.net/predator-prey-relationship/', 'https://www.encyclopedia.com/environment/energy-government-and-defense-magazines/predator-prey-relationships', 'https://www.reddit.com/r/pokemon/comments/9mrq2m/confirmed_pokemon_predator_prey_relationships/', 'https://www.adfg.alaska.gov/index.cfm?adfg=intensivemanagement.predatorprey', 'https://www.reddit.com/r/theHunter/comments/bajxca/are_there_any_predatorprey_relationships_in_the/', 'https://esajournals.onlinelibrary.wiley.com/doi/10.1890/08-2061.1', 'https://tpwd.texas.gov/publications/nonpwdpubs/young_naturalist/animals/predator_prey_relationship/index.phtml', 'https://www.nps.gov/articles/000/predator-prey-relationships-on-isle-royale.htm', 'https://besjournals.onlinelibrary.wiley.com/doi/

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 72/95 - keyword: wildlife protection, tfidf: 0.08301301190671255
All links:  ['https://www.fws.gov/', 'https://en.wikipedia.org/wiki/United_States_Fish_and_Wildlife_Service', 'https://www.fws.gov/careers', 'https://digitalmedia.fws.gov/', 'https://www.fws.gov/about', 'https://www.facebook.com/USFWS/', 'https://ipac.ecosphere.fws.gov/', 'https://www.census.gov/content/dam/Census/library/publications/2018/demo/fhw16-nat.pdf', 'https://ecos.fws.gov/ecp0/pub/listedAnimals.jsp', 'https://www.flickr.com/photos/usfwssoutheast/', 'https://myfwc.com/', 'https://twitter.com/usfws?lang=en', 'https://www.supremecourt.gov/opinions/18pdf/17-71_omjp.pdf', 'https://www.scotusblog.com/case-files/cases/weyerhaeuser-company-v-united-states-fish-wildlife-service/', 'https://wildlife.ca.gov/', 'https://www.usa.gov/agencies/fish-and-wildlife-service', 'https://fws.gov/partner', 'https://www.doi.gov/document-library/departmental-manual/managing-nepa-process-us-fish-and-wildlife-service', 'https://a

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 73/95 - keyword: US Fish and Wildlife Service, tfidf: 0.10010231577808262
All links:  ['https://environmental-conscience.com/', 'https://www.tandfonline.com/doi/full/10.1080/09593330.2019.1574907', 'https://www.mnhn.fr/en/lamarck-or-the-environmental-conscience', 'https://www.quora.com/What-is-environment-conscience', 'https://www.routledge.com/The-European-Environmental-Conscience-in-EU-Politics-A-Developing-Ideology/Hoerber-Weber/p/book/9781032159928', 'https://www.theparisreview.org/blog/2017/10/20/agnes-vardas-ecological-conscience/', 'https://www.bbvaopenmind.com/en/science/leading-figures/felix-rodriguez-de-la-fuente-environmental-conscience/', 'https://www.sciencedirect.com/science/article/pii/S0272494406000557', 'https://www.environmentalgovernance.org/post/the-environmental-conscience', 'https://www.highnorthnews.com/en/argues-arctic-cannot-mitigate-eus-black-environmental-conscience', 'https://goldenglobes.com/articles/trigger-environmental-conscience-rachel-carsons

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 79/95 - keyword: technological impact, tfidf: 0.02764267364101266
All links:  ['https://study.com/academy/lesson/reptile-adaptations-lesson-for-kids.html', 'https://onlinelibrary.wiley.com/doi/10.1111/eva.12114', 'https://www.desertmuseum.org/books/nhsd_adaptations_amph.php', 'https://www.britannica.com/animal/reptile/Clinging-and-climbing', 'https://animals.mom.com/adaptations-of-reptiles-5321170.html', 'https://www.thebiodude.com/blogs/helpful-husbandry-faqs/10-weird-and-whacky-reptile-adaptations', 'https://www.southfloridawildlifecenter.org/survival-strategies-exploring-reptile-adaptations/', 'https://zooatlanta.org/caiman-lizards-beautiful-and-highly-adapted-predators/', 'https://www.reddit.com/r/worldbuilding/comments/11b9z7k/i_need_some_help_with_a_creature_design_would_it/', 'https://www.nature.com/articles/s41598-017-09091-7', 'https://www.scientificamerican.com/article/warming-threatens-reptiles-more-than-birds-and-mammals/', 'https://www.casdschools.org/site/handle

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 80/95 - keyword: reptile adaptation, tfidf: 0.014376047312218412
All links:  ['https://www.msc.org/en-us/what-you-can-do/eat-sustainable-seafood/sustainable-species', 'https://www.fisheries.noaa.gov/species-directory/sustainable-seafood', 'https://www.seafoodwatch.org/', 'https://www.ipbes.net/sustainable-use-assessment', 'http://www.aplusaquaculture.nz/sustainable-species', 'https://www.ipbes.net/media_release/Sustainable_Use_Assessment_Published', 'https://www.unep.org/resources/report/assessment-report-sustainable-use-wild-species', 'https://www.jzar.org/jzar/article/view/313', 'https://www.sciencedirect.com/science/article/pii/S0278691521004646', 'https://www.traffic.org/publications/reports/naturefinance-legal-and-sustainable-wild-species-trade/', 'https://www.instagram.com/brodiemoss/p/CWflkdhJ9ad/', 'https://www.epa.gov/newsreleases/epa-announces-plan-protect-endangered-species-and-support-sustainable-agriculture', 'https://www.thespruceeats.com/sustainable-seafood-cho

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 82/95 - keyword: carnivore reptiles, tfidf: 0.0381088053197881
All links:  ['https://emeritus.org/blog/career-generalist-vs-specialist/', 'https://www.techtarget.com/whatis/feature/Generalist-vs-specialist-Whats-the-difference', 'https://www.linkedin.com/pulse/generalists-vs-specialists-t-shaped-who-owns-future-kochar-vjxuf', 'https://www.forbes.com/sites/forbeshumanresourcescouncil/2021/03/12/generalists-versus-specialists-the-winner-doesnt-take-it-all/', 'https://www.indeed.com/career-advice/finding-a-job/generalist-vs-specialist', 'https://dev.to/kethmars/generalists-vs-specialists-which-one-should-you-strive-for-as-a-developer-pg3', 'https://medium.com/@basploeg/range-the-case-for-generalists-in-a-specialized-world-b0ed44bc0470', 'https://www.honeycomb.io/blog/figuring-out-your-path', 'https://www.karelvredenburg.com/home/2023/8/31/generalists-vs-specialists', 'https://hbr.org/2018/07/when-generalists-are-better-than-specialists-and-vice-versa', 'https://www.alexhyett.com

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.washingtonpost.com/opinions/2024/06/10/scotus-mifepristone-abortion-studies-retracted/: HTTPSConnectionPool(host='www.washingtonpost.com', port=443): Read timed out. (read timeout=10)
Progress: 90/95 - keyword: scientific controversy, tfidf: 0.03059958898670781
All links:  ['https://www.sciencedirect.com/science/article/pii/S0006322316323873', 'https://www.animalbalance.org/', 'https://www.facebook.com/AnimalBalance/', 'https://balance.it/', 'https://toegrips.com/my-dog-is-wobbly-and-off-balance/', 'https://elibrary.asabe.org/abstract.asp?aid=53085', 'https://essentialanimals.com/essential-oils-for-a-dogs-hormonal-imbalance/', 'https://himiku.net/products/animalbalance', 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4838534/', 'https://www.nature.com/articles/s41598-019-38600-z', 'https://wildlifesos.org/chronological-news/conservation-imbalance-charismatic-megafauna-v-s-the-rest/', 'https://www.animalpak.com/products/animal-alpha-f-workout-supplement-for-w

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 93/95 - keyword: species judgment, tfidf: 0.017942988504784728
All links:  ['https://www.sciencedirect.com/topics/psychology/environmental-adaptation', 'https://powerlisting.fandom.com/wiki/Environmental_Adaptation', 'https://www.dshs.wa.gov/sites/default/files/DDA/dda/documents/policy/policy4.18.pdf', 'https://www.adaptationenvironmental.com/', 'https://dodd.ohio.gov/waivers-and-services/services/environmental-accessibility-adaptations', 'https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1007672', 'https://nph.onlinelibrary.wiley.com/doi/10.1111/nph.13076', 'https://arxiv.org/abs/2003.13254', 'https://www.dhs.state.mn.us/id_002196/', 'https://journals.sagepub.com/doi/10.1177/2378023120905439', 'https://direct.mit.edu/evco/article/29/4/441/98587/Environmental-Adaptation-of-Robot-Morphology-and', 'https://www.eea.europa.eu/en/topics/in-depth/climate-change-impacts-risks-and-adaptation', 'https://onlinelibrary.wiley.com/doi/abs/10.1111/ele.12376', 'https://

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 94/95 - keyword: environmental adaptation, tfidf: 0.03177437820097022
All links:  ['https://en.wikipedia.org/wiki/Introduced_species', 'https://www.fisheries.noaa.gov/insight/invasive-and-exotic-marine-species', 'https://www.nationalgeographic.org/encyclopedia/invasive-species/', 'https://www.adfg.alaska.gov/index.cfm?adfg=invasive.pathways', 'https://www.encyclopedia.com/environment/encyclopedias-almanacs-transcripts-and-maps/species-introduction', 'https://www.sciencedirect.com/topics/pharmacology-toxicology-and-pharmaceutical-science/exotic-species', 'https://dem.ri.gov/environmental-protection-bureau/water-resources/research-monitoring/aquatic-invasive-species-3', 'https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/1365-2664.12819', 'https://academic.oup.com/bioscience/article/49/8/619/254611', 'https://www.nwf.org/Educational-Resources/Wildlife-Guide/Threats-to-Wildlife/Invasive-Species', 'https://www.invasivespeciesinfo.gov/subject/pathways', 'https://meridian

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.nasdaq.com/market-activity/stocks/mrna: HTTPSConnectionPool(host='www.nasdaq.com', port=443): Read timed out. (read timeout=10)
Progress: 1/80 - keyword: mRNA, tfidf: 0.1151018444707789
All links:  ['https://nucleussec.com/', 'https://nucleusresearch.com/', 'https://www.genome.gov/genetics-glossary/Nucleus', 'https://www.iaea.org/resources/nucleus-information-resources', 'https://www.britannica.com/science/nucleus-biology', 'https://www.gallerynucleus.com/', 'https://www.nucleus.church/', 'https://en.wikipedia.org/wiki/Cell_nucleus', 'https://www.merriam-webster.com/dictionary/nucleus', 'https://roon.app/en/nucleus', 'https://nucleusrad.com/', 'https://www.energy.gov/science/doe-explainsnuclei', 'https://blueprintgenetics.com/nucleus-login/', 'https://www.pitzer.edu/the-nucleus/', 'https://gallerynucleus.com/artwork', 'https://store.roonlabs.com/products/nucleus-one/', 'https://www.ncbi.nlm.nih.gov/books/NBK549831/', 'https://www.nucleuscafe.com/', 'https:/

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.sigmaaldrich.com/US/en/technical-documents/technical-article/chemistry-and-synthesis/organic-reaction-toolbox/aldol-condensation-reaction: HTTPSConnectionPool(host='www.sigmaaldrich.com', port=443): Read timed out. (read timeout=10)
Progress: 21/80 - keyword: condensation reaction, tfidf: 0.07680886675053969
All links:  ['https://en.wikipedia.org/wiki/Stop_codon', 'https://www.sciencedirect.com/topics/biochemistry-genetics-and-molecular-biology/stop-codon', 'https://www.nature.com/articles/s41467-024-46703-z', 'https://www.sciencedirect.com/topics/immunology-and-microbiology/stop-codon', 'https://www.nature.com/articles/nature14896', 'https://www.news-medical.net/life-sciences/START-and-STOP-Codons.aspx', 'https://www.nature.com/articles/s41421-018-0019-0', 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7440625/', 'https://support.nlm.nih.gov/knowledgebase/article/KA-05234/en-us', 'https://pubs.acs.org/doi/10.1021/acschembio.3c00051', 'https://genesdev.cshlp

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 24/80 - keyword: rRNA, tfidf: 0.06256245284849107
All links:  ['https://study.com/academy/lesson/complementary-base-pairing-definition-lesson-quiz.html', 'https://www.genomicseducation.hee.nhs.uk/glossary/base-pairs/', 'https://www.nature.com/articles/srep04036', 'https://en.wikipedia.org/wiki/Complementarity_(molecular_biology)', 'https://www.dictionary.com/browse/complementary-base', 'https://arep.med.harvard.edu/labgc/adnan/projects/Utilities/revcomp.html', 'https://www.sciencedirect.com/science/article/pii/S0024379506002990', 'https://arxiv.org/abs/1301.6838', 'https://pubs.acs.org/doi/10.1021/acs.orglett.9b03801', 'https://onlinelibrary.wiley.com/doi/abs/10.1002/asia.201901728', 'https://www.genome.gov/genetics-glossary/Base-Pair', 'https://pubmed.ncbi.nlm.nih.gov/20876692/', 'https://biology.stackexchange.com/questions/82138/how-does-bonding-between-non-complementary-bases-occur', 'https://www.ncbi.nlm.nih.gov/books/NBK26821/', 'https://stackoverflow.com/questions/28934

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 25/80 - keyword: complementary bases, tfidf: 0.03336071181530543
All links:  ['https://www.youtube.com/watch?v=oefAI2x2CQM', 'https://www.sciencedirect.com/topics/agricultural-and-biological-sciences/protein-synthesis', 'https://en.wikipedia.org/wiki/Protein_biosynthesis', 'https://www.ncbi.nlm.nih.gov/books/NBK545161/', 'https://www.nature.com/scitable/topicpage/ribosomes-transcription-and-translation-14120660/', 'https://www.khanacademy.org/science/high-school-biology/hs-molecular-genetics/hs-rna-and-protein-synthesis/a/hs-rna-and-protein-synthesis-review', 'https://www.nature.com/articles/nature07228', 'https://pubs.rsc.org/en/content/articlelanding/2018/cs/c8cs00573g', 'https://www.nature.com/articles/s41467-021-21637-y', 'https://www.thermofisher.com/order/catalog/product/C10428', 'https://www.nature.com/articles/35021052', 'https://pubs.rsc.org/en/content/articlelanding/2020/cc/d0cc02582h', 'https://pubmed.ncbi.nlm.nih.gov/21683485/', 'https://www.int-res.com/articles/m

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 26/80 - keyword: protein synthesis, tfidf: 0.05431147567647998
All links:  ['https://en.wikipedia.org/wiki/DNA', 'https://medlineplus.gov/genetics/understanding/basics/dna/', 'https://www.ancestry.com/dna/', 'https://www.genome.gov/genetics-glossary/Deoxyribonucleic-Acid', 'https://www.britannica.com/science/DNA', 'https://www.idtdna.com/', 'https://dnacreates.com/', 'https://www.23andme.com/', 'https://adnas.com/', 'https://www.fbi.gov/how-we-can-help-you/dna-fingerprint-act-of-2005-expungement-policy/codis-and-ndis-fact-sheet', 'https://www.cancer.gov/publications/dictionaries/cancer-terms/def/dna', 'http://www.designandarchitecture.net/', 'https://www.cancer.gov/publications/dictionaries/genetics-dictionary/def/dna', 'https://www.ginkgobioworks.com/', 'https://www.dnalounge.com/', 'https://www.dnascript.com/', 'https://www.familytreedna.com/', 'https://www.whitehouse.gov/wp-content/uploads/2024/06/NSTC_National-Aquatic-eDNA-Strategy.pdf', 'https://www.who.int/teams/health-

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 27/80 - keyword: DNA, tfidf: 0.10705557136017403
All links:  ['https://en.wikipedia.org/wiki/cell_membrane', 'https://www.genome.gov/genetics-glossary/Cell-Membrane', 'https://www.britannica.com/science/cell-membrane', 'https://www.uniprot.org/locations/SL-0039', 'https://www.nature.com/scitable/topicpage/cell-membranes-14052567/', 'https://www.ncbi.nlm.nih.gov/books/NBK9928/', 'https://www.nature.com/articles/s41586-024-07527-5', 'https://www.ncbi.nlm.nih.gov/books/NBK9898/', 'https://www.nature.com/articles/s41467-021-26052-x', 'https://ashpublications.org/blood/article/112/10/3939/24600/Red-cell-membrane-past-present-and-future', 'https://pubs.acs.org/doi/10.1021/nl500618u', 'https://phys.org/tags/cell+membrane/', 'https://www.ebi.ac.uk/QuickGO/term/GO:0005886', 'https://www.merriam-webster.com/dictionary/cell%20membrane', 'https://www.sciencedirect.com/topics/materials-science/cell-membrane', 'https://openstax.org/books/concepts-biology/pages/3-4-the-cell-membrane', 'http

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 33/80 - keyword: amino acid sequence, tfidf: 0.04576860911841609
All links:  ['https://en.wikipedia.org/wiki/Start_codon', 'https://www.sciencedirect.com/topics/biochemistry-genetics-and-molecular-biology/start-codon', 'https://www.news-medical.net/life-sciences/START-and-STOP-Codons.aspx', 'https://www.ncbi.nlm.nih.gov/orffinder', 'https://www.nature.com/articles/s41586-023-06500-y', 'https://uk.linkedin.com/in/jason-mellad-54890622', 'https://startcodon.co/', 'https://www.nature.com/articles/s41467-021-26923-3', 'https://www.merriam-webster.com/medical/start%20codon', 'https://www.nature.com/articles/s42003-022-03534-2', 'https://www.milner.cam.ac.uk/startcodon/', 'https://elifesciences.org/articles/55637', 'https://www.researchgate.net/post/Cloning-a-C-terminal-GFP-fusion-is-it-okay-that-I-still-have-the-start-codon-for-both-my-gene-and-the-GFP', 'https://elifesciences.org/articles/39273', 'https://genesdev.cshlp.org/content/31/17/1717.full.html', 'https://depts.washington

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 40/80 - keyword: initiation, tfidf: 0.06534119497873062
All links:  ['https://en.wikipedia.org/wiki/Elongation', 'https://www.merriam-webster.com/dictionary/elongation', 'https://www.xometry.com/resources/3d-printing/elongation/', 'https://matmatch.com/learn/property/elongation', 'https://www.strouse.com/blog/what-is-elongation', 'https://iovs.arvojournals.org/article.aspx?articleid=2782714', 'https://www.genome.jp/pathway/map00062', 'https://www.zeiss.com/metrology/en/explore/topics/elongation-measurement.html', 'https://www.nature.com/articles/s41586-018-0479-2', 'https://astro.unl.edu/naap/ssm/modeling2.html', 'https://nepis.epa.gov/Exe/ZyPURL.cgi?Dockey=P100RF5I.TXT', 'https://pubmed.ncbi.nlm.nih.gov/28038841/', 'https://journals.biologists.com/jcs/article/131/10/jcs214346/56880/SunRiSE-measuring-translation-elongation-at-single', 'https://www.thesaurus.com/browse/elongation', 'https://pubmed.ncbi.nlm.nih.gov/23746255/', 'https://www.ncbi.nlm.nih.gov/gene/1915', 'https://

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 42/80 - keyword: termination, tfidf: 0.0660436785740178
All links:  ['https://my.clevelandclinic.org/health/articles/21532-enzymes', 'https://en.wikipedia.org/wiki/Enzyme', 'https://www.medicalnewstoday.com/articles/319704', 'https://www.hopkinsmedicine.org/health/wellness-and-prevention/digestive-enzymes-and-digestive-enzyme-supplements', 'https://www.britannica.com/science/enzyme', 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4692135/', 'https://www.cff.org/managing-cf/enzymes', 'https://www.genome.gov/genetics-glossary/Enzyme', 'https://www.healthline.com/health/why-are-enzymes-important', 'https://www.nature.com/articles/d41586-024-01461-2', 'https://www.fda.gov/drugs/drug-interactions-labeling/healthcare-professionals-fdas-examples-drugs-interact-cyp-enzymes-and-transporter-systems', 'https://www.megazyme.com/view-all-products/enzymes', 'https://www.sciencedirect.com/topics/biochemistry-genetics-and-molecular-biology/lysozyme', 'https://elementalenzymes.com/', 'https://

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 55/80 - keyword: initiation, tfidf: 0.06534119497873062
All links:  ['https://www.dol.gov/general/topic/termination', 'https://www.merriam-webster.com/dictionary/termination', 'https://www.nlm.nih.gov/nativevoices/timeline/488.html', 'https://www.sos.state.co.us/pubs/UCC/FAQs/termination.html', 'https://www.copyright.gov/rulemaking/mma-termination/', 'https://studyinthestates.dhs.gov/sevis-help-hub/student-records/completions-and-terminations/termination-reasons', 'https://www.fec.gov/help-candidates-and-committees/terminating-a-committee/', 'https://www.dhs.gov/sites/default/files/publications/21_0601_termination_of_mpp_program.pdf', 'https://www.ftc.gov/legal-library/browse/early-termination-notices', 'https://www.archives.gov/research/native-americans/bia/termination', 'https://ethics.wi.gov/Resources/CF-13%20Termination%20Request.pdf', 'https://www.pbgc.gov/prac/prem/termination-premiums', 'https://www.whitehouse.gov/briefing-room/presidential-actions/2021/01/20/proclamat

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 56/80 - keyword: termination, tfidf: 0.0660436785740178
All links:  ['https://foldingathome.org/', 'https://www.reddit.com/r/Folding/', 'https://www.youtube.com/c/FoldingIdeas', 'https://apply.jhu.edu/hopkins-insider/just-keep-folding/', 'https://foldingforum.org/', 'https://www.scottspizzatours.com/blog/when-did-we-start-folding-pizza/', 'https://www.neenahpaper.com/brands/neenah-folding-board-papers', 'https://github.com/helix-editor/helix/issues/1840', 'https://fr.mathworks.com/matlabcentral/answers/1954309-issues-with-code-folding-not-following-expected-rules', 'https://www.velotricbike.com/products/velotric-fold-1', 'https://www.merriam-webster.com/dictionary/folding', 'https://www.kelty.com/folding-cooler/', 'https://flyer.radioflyer.com/products/flyer-folding-cargo', 'https://www.kryptonitelock.com/en/products/product-information/current-key/005636.html', 'https://community.snowflake.com/s/question/0D53r00009qAnnQCAS/does-snowflake-support-query-folding-when-used-as-a-

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 58/80 - keyword: anticodons, tfidf: 0.02339659295330568
All links:  ['https://www.genome.gov/genetics-glossary/Transcription', 'https://www.khanacademy.org/science/ap-biology/gene-expression-and-regulation/transcription-and-rna-processing/a/overview-of-transcription', 'https://www.archives.gov/founding-docs/declaration-transcript', 'https://transcription.si.edu/', 'https://en.wikipedia.org/wiki/Transcription_factor', 'https://www.dropboxforum.com/t5/Delete-edit-and-organize/Transcription-for-audio-files-in-different-languages-not-working/td-p/742451/page/2', 'https://www.rev.com/', 'https://en.wikipedia.org/wiki/Transcription_(biology)', 'https://otter.ai/', 'https://www.gmrtranscription.com/', 'https://support.microsoft.com/en-us/office/view-live-transcription-in-microsoft-teams-meetings-dc1a8f23-2e20-4684-885e-2152e06a4a8b', 'https://support.zoom.com/hc/en/article?id=zm_kb&sysparm_article=KB0064927', 'https://dailytranscription.com/', 'https://trint.com/', 'https://www.tran

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 60/80 - keyword: genetic information, tfidf: 0.1127715131644649
All links:  ['https://en.wikipedia.org/wiki/Ribosome', 'https://www.genome.gov/genetics-glossary/Ribosome', 'https://www.britannica.com/science/ribosome', 'https://byjus.com/biology/ribosomes/', 'https://www.nature.com/articles/nature22998', 'https://bscb.org/learning-resources/softcell-e-learning/ribosome/', 'https://www.nature.com/articles/s41467-021-21637-y', 'https://www.rockefeller.edu/news/31051-study-reveals-how-ribosomes-are-assembled-in-human-cells/', 'https://www.nature.com/articles/nmicrobiol2016231', 'https://www.embopress.org/doi/10.15252/embj.2018100276', 'https://www.nature.com/articles/s41586-022-05623-y', 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10187299/', 'https://nhmu.utah.edu/articles/2023/09/ribosomes-venki-ramakrishnan', 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10635049/', 'https://micro.magnet.fsu.edu/cells/ribosomes/ribosomes.html', 'https://www.biorxiv.org/content/10.1101/2023.

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 66/80 - keyword: amino acid sequence, tfidf: 0.045769406959582
All links:  ['https://en.wikipedia.org/wiki/Ribosome', 'https://www.genome.gov/genetics-glossary/Ribosome', 'https://byjus.com/biology/ribosomes/', 'https://www.nature.com/articles/s41586-024-07508-8', 'https://www.ebi.ac.uk/QuickGO/term/GO:0005840', 'https://www.britannica.com/science/ribosome', 'https://www.genome.jp/kegg-bin/show_pathway?hsa03010', 'https://www.ebi.ac.uk/QuickGO/term/GO:0003735', 'https://www.genome.jp/pathway/ko03010+K01980', 'https://www.pnas.org/doi/full/10.1073/pnas.181342398', 'https://pdb101.rcsb.org/motm/121', 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8041214/', 'https://www.illumina.com/content/dam/illumina-marketing/documents/products/appnotes/appnote_riboseq.pdf', 'https://www.science.org/doi/10.1126/science.adf3208', 'https://www.jbc.org/article/S0021-9258(24)00024-3/fulltext', 'https://link.aps.org/doi/10.1103/PhysRevE.108.044404', 'https://genome.cshlp.org/content/early/2024/05

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 67/80 - keyword: ribosome, tfidf: 0.06125334557383675
All links:  ['https://www.healthline.com/nutrition/high-protein-foods', 'https://www.betterhealth.vic.gov.au/health/healthyliving/protein', 'https://en.wikipedia.org/wiki/Protein', 'https://nutritionsource.hsph.harvard.edu/what-should-you-eat/protein/', 'https://www.myplate.gov/eat-healthy/protein-foods', 'https://www.ncbi.nlm.nih.gov/protein/', 'https://www.rcsb.org/', 'https://medlineplus.gov/ency/article/002467.htm', 'https://www.webmd.com/fitness-exercise/good-protein-sources', 'https://www.proteinatlas.org/', 'https://www.health.harvard.edu/blog/how-much-protein-do-you-need-every-day-201506188096', 'https://www.theproteinbar.com/', 'https://www.bbcgoodfood.com/howto/guide/best-sources-protein', 'https://www.medicalnewstoday.com/articles/321522', 'https://store.trimhealthymama.com/product/optimized-whey-protein-powders/', 'https://www.mayoclinichealthsystem.org/hometown-health/speaking-of-health/are-you-getting-too-muc

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 76/80 - keyword: amino acid chain, tfidf: 0.0570598448253693
All links:  ['https://trnaband.bandcamp.com/album/istok', 'https://www.genome.gov/genetics-glossary/Transfer-RNA', 'https://en.wikipedia.org/wiki/transfer_RNA', 'https://www.nature.com/scitable/definition/trna-transfer-rna-256/', 'https://plants.usda.gov/home/plantProfile?symbol=trna', 'https://www.jbc.org/article/S0021-9258(23)02274-3/fulltext', 'https://gtrnadb.ucsc.edu/', 'https://www.cell.com/cell/abstract/S0092-8674(24)00397-0', 'https://www.ncbi.nlm.nih.gov/books/NBK6048/', 'https://www.facebook.com/trnaband/', 'https://www.ncbi.nlm.nih.gov/books/NBK6028/', 'https://www.promega.com/products/protein-detection/protein-labeling/transcend-trna/', 'https://www.ncbi.nlm.nih.gov/books/NBK6553/', 'https://rfam.org/family/RF00005', 'https://www.ncbi.nlm.nih.gov/gene/4553', 'https://www.scripps.edu/schimmel/', 'https://www.ncbi.nlm.nih.gov/gene/51651', 'https://pubmed.ncbi.nlm.nih.gov/31965079/', 'https://www.ncbi.nlm.n

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 78/80 - keyword: ribosome, tfidf: 0.06135866933926774
All links:  ['https://en.wikipedia.org/wiki/Stop_codon', 'https://www.sciencedirect.com/topics/biochemistry-genetics-and-molecular-biology/stop-codon', 'https://www.nature.com/articles/s41467-024-46703-z', 'https://www.sciencedirect.com/topics/immunology-and-microbiology/stop-codon', 'https://www.nature.com/articles/nature14896', 'https://www.news-medical.net/life-sciences/START-and-STOP-Codons.aspx', 'https://www.nature.com/articles/s41421-018-0019-0', 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7440625/', 'https://support.nlm.nih.gov/knowledgebase/article/KA-05234/en-us', 'https://pubs.acs.org/doi/10.1021/acschembio.3c00051', 'https://genesdev.cshlp.org/content/23/9/1106', 'https://www.jbc.org/article/S0021-9258(22)00574-9/fulltext', 'https://rnajournal.cshlp.org/content/26/9/1247.short', 'https://www.jbc.org/article/S0021-9258(22)00574-9/pdf', 'https://elifesciences.org/articles/52611', 'https://journals.asm.org/doi/1

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 1/101 - keyword: plasma membrane, tfidf: 0.084594984253374
All links:  ['https://en.wikipedia.org/wiki/Golgi_apparatus', 'https://www.britannica.com/science/Golgi-apparatus', 'https://www.genome.gov/genetics-glossary/golgi-body', 'https://www.ncbi.nlm.nih.gov/books/NBK9838/', 'https://www.hopkinsmedicine.org/news/newsroom/news-releases/2018/03/a-new-signaling-pathway-involving-the-golgi-apparatus-identified-in-cells-with-huntingtons-disease', 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8923104/', 'https://www.nature.com/scitable/topicpage/how-do-proteins-move-through-the-golgi-14397318/', 'https://www.sciencedirect.com/topics/biochemistry-genetics-and-molecular-biology/golgi-apparatus', 'https://inventbiotech.com/products/minute%E2%84%A2-golgi-apparatus-enrichment-kit-20-preps', 'https://www.bio-techne.com/cell-biology/cell-markers/golgi-apparatus-markers', 'https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3001051', 'https://www.cancer.gov/publications/

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 4/101 - keyword: diffusion, tfidf: 0.0917176459989854
All links:  ['https://www.osmosis.org/', 'https://osmosisbeauty.com/', 'https://www.osmosis.org/login', 'https://osmosis.zone/', 'https://www.britannica.com/science/osmosis', 'https://en.wikipedia.org/wiki/Osmosis', 'https://www.merriam-webster.com/dictionary/osmosis', 'https://wiki.openstreetmap.org/wiki/Osmosis', 'https://www.youtube.com/@osmosis', 'https://app.osmosis.zone/?to=OSMO&from=ATOM', 'https://www.osmosis.com/home/', 'https://www.kqed.org/quest/4441/the-science-behind-brining', 'https://www.netflix.com/title/80189898', 'https://github.com/openstreetmap/osmosis', 'https://app.osmosis.zone/pools', 'https://www.imdb.com/title/tt7043380/', 'https://www.coingecko.com/en/coins/osmosis', 'https://toytonics.bandcamp.com/album/osmosis', 'https://puretecwater.com/resources/the-basics-of-reverse-osmosis/', 'https://forum.breadtopia.com/t/starter-problem-used-reverse-osmosis-water/35546']
Progress: 5/101 - keyword: osmosis

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.nasdaq.com/market-activity/stocks/mrna: HTTPSConnectionPool(host='www.nasdaq.com', port=443): Read timed out. (read timeout=10)
Progress: 28/101 - keyword: mRNA, tfidf: 0.11516144696915902
All links:  ['https://trnaband.bandcamp.com/album/istok', 'https://www.genome.gov/genetics-glossary/Transfer-RNA', 'https://en.wikipedia.org/wiki/transfer_RNA', 'https://www.nature.com/scitable/definition/trna-transfer-rna-256/', 'https://plants.usda.gov/home/plantProfile?symbol=trna', 'https://www.jbc.org/article/S0021-9258(23)02274-3/fulltext', 'https://gtrnadb.ucsc.edu/', 'https://www.cell.com/cell/abstract/S0092-8674(24)00397-0', 'https://www.ncbi.nlm.nih.gov/books/NBK6048/', 'https://www.facebook.com/trnaband/', 'https://www.ncbi.nlm.nih.gov/books/NBK6028/', 'https://www.promega.com/products/protein-detection/protein-labeling/transcend-trna/', 'https://www.ncbi.nlm.nih.gov/books/NBK6553/', 'https://rfam.org/family/RF00005', 'https://www.ncbi.nlm.nih.gov/gene/4553', '

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 37/101 - keyword: ER, tfidf: 0.08105346717060466
All links:  ['https://en.wikipedia.org/wiki/Vacuole', 'https://www.genome.gov/genetics-glossary/Vacuole', 'https://forum.image.sc/t/vacuole-measurement/42606', 'https://www.britannica.com/science/vacuole', 'https://www.merriam-webster.com/dictionary/vacuole', 'https://www.nature.com/articles/s41467-023-39175-0', 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC158640/pdf/1010001.pdf', 'https://www.nature.com/articles/cdd201170', 'https://nph.onlinelibrary.wiley.com/doi/full/10.1111/nph.17070', 'https://www.ebi.ac.uk/QuickGO/GTerm?id=GO:0005773', 'https://byjus.com/biology/vacuoles/', 'https://www.ebi.ac.uk/QuickGO/term/GO:0007033', 'https://www.pnas.org/doi/full/10.1073/pnas.1701030114', 'https://rupress.org/jcb/article/216/10/3219/38987/Vps13-Mcp1-interact-at-vacuole-mitochondria', 'https://www.pnas.org/doi/full/10.1073/pnas.1117797109', 'https://journals.plos.org/plospathogens/article?id=10.1371/journal.ppat.1011996', 'https://p

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 41/101 - keyword: permeable, tfidf: 0.10861092074365947
All links:  ['https://www.ncbi.nlm.nih.gov/books/NBK26896/', 'https://www.sciencedirect.com/topics/agricultural-and-biological-sciences/carrier-protein', 'https://www.ncbi.nlm.nih.gov/books/NBK26815/', 'https://en.wikipedia.org/wiki/Membrane_transport_protein', 'https://pubs.acs.org/doi/10.1021/ja00226a005', 'https://febs.onlinelibrary.wiley.com/doi/10.1111/febs.15531', 'https://www.nature.com/articles/s41598-018-23528-7', 'https://www.jbc.org/article/S0021-9258(21)00099-5/fulltext', 'https://pubs.rsc.org/en/content/articlelanding/2018/np/c8np00044a', 'https://pubmed.ncbi.nlm.nih.gov/16760376/', 'https://study.com/academy/lesson/carrier-proteins-types-functions-quiz.html', 'https://pubmed.ncbi.nlm.nih.gov/7788802/', 'https://www.pnas.org/doi/full/10.1073/pnas.0705122104', 'https://pubmed.ncbi.nlm.nih.gov/12593757/', 'https://www.biologyonline.com/dictionary/carrier-protein', 'https://pubmed.ncbi.nlm.nih.gov/11072112/', '

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 47/101 - keyword: DNA, tfidf: 0.10703180534935164
All links:  ['https://en.wikipedia.org/wiki/RNA', 'https://www.genome.gov/genetics-glossary/RNA-Ribonucleic-Acid', 'https://rnajournal.cshlp.org/', 'https://www.britannica.com/science/RNA', 'https://www.fda.gov/news-events/press-announcements/fda-permits-marketing-first-point-care-hepatitis-c-rna-test', 'https://commonfund.nih.gov/Exrna', 'https://www.ncbi.nlm.nih.gov/books/NBK558999/', 'https://www.rnasociety.org/', 'https://www.pbs.org/wgbh/nova/labs/lab/rna/', 'https://www.albany.edu/rna', 'https://www.cancer.gov/publications/dictionaries/cancer-terms/def/rna', 'https://www.umassmed.edu/rti/', 'https://www.mdpi.com/journal/ncrna', 'http://irp.nih.gov/our-research/scientific-focus-areas/rna-biology', 'https://www.albany.edu/rna/news/2021-unafold-webserver-hosted-rna-institute-has-been-discontinued', 'https://www.nature.com/articles/d41586-024-01461-2', 'https://theconversation.com/explainer-what-is-rna-15169', 'https://www.u

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 53/101 - keyword: protein synthesis, tfidf: 0.055289956978637804
All links:  ['https://www.youtube.com/watch?v=oefAI2x2CQM', 'https://www.sciencedirect.com/topics/agricultural-and-biological-sciences/protein-synthesis', 'https://en.wikipedia.org/wiki/Protein_biosynthesis', 'https://www.ncbi.nlm.nih.gov/books/NBK545161/', 'https://www.nature.com/scitable/topicpage/ribosomes-transcription-and-translation-14120660/', 'https://www.khanacademy.org/science/high-school-biology/hs-molecular-genetics/hs-rna-and-protein-synthesis/a/hs-rna-and-protein-synthesis-review', 'https://www.nature.com/articles/nature07228', 'https://pubs.rsc.org/en/content/articlelanding/2018/cs/c8cs00573g', 'https://www.nature.com/articles/s41467-021-21637-y', 'https://www.thermofisher.com/order/catalog/product/C10428', 'https://www.nature.com/articles/35021052', 'https://pubs.rsc.org/en/content/articlelanding/2020/cc/d0cc02582h', 'https://pubmed.ncbi.nlm.nih.gov/21683485/', 'https://www.int-res.com/articles/m

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 54/101 - keyword: protein synthesis, tfidf: 0.055289956978637804
All links:  ['https://www.eeoc.gov/genetic-information-discrimination', 'https://www.hhs.gov/hipaa/for-professionals/special-topics/genetic-information/index.html', 'https://www.eeoc.gov/statutes/genetic-information-nondiscrimination-act-2008', 'https://www.congress.gov/bill/110th-congress/house-bill/493', 'https://www.eeoc.gov/youth/genetic-information', 'https://leg.mt.gov/bills/2023/billpdf/SB0351.pdf', 'https://www.ashg.org/advocacy/gina/', 'https://www.genome.gov/about-genomics/policy-issues/Genetic-Discrimination', 'https://www.ilga.gov/legislation/ilcs/ilcs3.asp?ActID=1567&ChapterID=35', 'https://www.dol.gov/agencies/ebsa/laws-and-regulations/laws/gina', 'https://www.federalregister.gov/documents/2016/05/17/2016-11557/genetic-information-nondiscrimination-act', 'https://le.utah.gov/~2021/bills/static/SB0227.html', 'https://www.dol.gov/agencies/oasam/centers-offices/civil-rights-center/statutes/genetic-inf

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 55/101 - keyword: genetic information, tfidf: 0.1127715131644649
All links:  ['https://www.mit.edu/~kardar/teaching/projects/chemotaxis(AndreaSchmidt)/gradients.htm', 'https://study.com/learn/lesson/concentration-gradient-examples.html', 'https://www.nature.com/articles/s41565-024-01654-w', 'https://www.sciencedirect.com/topics/engineering/concentration-gradient', 'https://www.nature.com/articles/s41557-024-01549-2', 'https://pubs.rsc.org/en/content/articlelanding/2017/ta/c6ta10297b', 'https://www.dictionary.com/browse/concentration-gradient', 'https://pubs.rsc.org/en/content/articlelanding/2017/ra/c7ra04494a', 'https://pubs.acs.org/doi/10.1021/acs.nanolett.6b02473', 'https://en.wikipedia.org/wiki/Fick%27s_laws_of_diffusion', 'https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1029/2022GL101407', 'https://www.anl.gov/cnm/article/rational-design-of-robust-nirich-cathode-materials-via-concentration-gradient-strategy', 'https://acp.copernicus.org/articles/14/5495/2014/', 'https:

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 61/101 - keyword: passive diffusion, tfidf: 0.04611403133117867
All links:  ['https://byjus.com/biology/facilitated-diffusion/', 'https://en.wikipedia.org/wiki/Facilitated_diffusion', 'https://link.aps.org/doi/10.1103/PhysRevLett.109.168103', 'https://www.khanacademy.org/science/ap-biology/cell-structure-and-function/facilitated-diffusion/a/diffusion-and-passive-transport', 'https://arxiv.org/abs/2209.00500', 'https://www.sciencedirect.com/topics/chemistry/facilitated-diffusion', 'https://bio.libretexts.org/Bookshelves/Introductory_and_General_Biology/Introductory_Biology_(CK-12)/02%3A_Cell_Biology/2.14%3A_Facilitated_Diffusion', 'https://pubs.rsc.org/en/content/articlelanding/2016/cp/c6cp00307a', 'https://www.science.org/doi/10.1126/science.1221648', 'https://pubs.acs.org/doi/10.1021/ja8054035', 'https://flexbooks.ck12.org/cbook/ck-12-biology-flexbook-2.0/', 'https://pubmed.ncbi.nlm.nih.gov/22463256/', 'https://www.biologyonline.com/dictionary/facilitated-diffusion', 'https:

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 66/101 - keyword: exoplasmic reticulum, tfidf: 0.009293907652264774
All links:  ['https://en.wikipedia.org/wiki/Endoplasmic_reticulum', 'https://www.britannica.com/science/endoplasmic-reticulum', 'https://www.nature.com/articles/s41586-024-07527-5', 'https://www.genome.gov/genetics-glossary/Endoplasmic-Reticulum-rough', 'https://www.ncbi.nlm.nih.gov/books/NBK9889/', 'https://www.sciencedirect.com/topics/immunology-and-microbiology/endoplasmic-reticulum', 'https://www.ncbi.nlm.nih.gov/books/NBK26841/', 'https://www.nih.gov/news-events/nih-research-matters/dietary-fats-influence-endoplasmic-reticulum-membrane', 'https://www.genome.jp/pathway/hsa04141', 'https://www.ebi.ac.uk/QuickGO/term/GO:0005783', 'https://www.cancer.gov/publications/dictionaries/cancer-terms/def/endoplasmic-reticulum', 'https://pubmed.ncbi.nlm.nih.gov/28062922/', 'https://byjus.com/biology/endoplasmic-reticulum/', 'https://www.uniprot.org/locations/SL-0095', 'https://pubs.rsc.org/en/Content/ArticleLanding/2

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 75/101 - keyword: diffusion, tfidf: 0.09162704524398121
All links:  ['https://www.osmosis.org/', 'https://osmosisbeauty.com/', 'https://www.osmosis.org/login', 'https://osmosis.zone/', 'https://www.britannica.com/science/osmosis', 'https://en.wikipedia.org/wiki/Osmosis', 'https://www.merriam-webster.com/dictionary/osmosis', 'https://wiki.openstreetmap.org/wiki/Osmosis', 'https://www.youtube.com/@osmosis', 'https://app.osmosis.zone/?to=OSMO&from=ATOM', 'https://www.osmosis.com/home/', 'https://www.kqed.org/quest/4441/the-science-behind-brining', 'https://www.netflix.com/title/80189898', 'https://github.com/openstreetmap/osmosis', 'https://app.osmosis.zone/pools', 'https://www.imdb.com/title/tt7043380/', 'https://www.coingecko.com/en/coins/osmosis', 'https://toytonics.bandcamp.com/album/osmosis', 'https://puretecwater.com/resources/the-basics-of-reverse-osmosis/', 'https://forum.breadtopia.com/t/starter-problem-used-reverse-osmosis-water/35546']
Progress: 76/101 - keyword: osmo

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 79/101 - keyword: diffusion, tfidf: 0.09171472974829313
All links:  ['https://en.wikipedia.org/wiki/Membrane', 'https://www.britannica.com/science/membrane-biology', 'https://en.wikipedia.org/wiki/cell_membrane', 'https://www.merriam-webster.com/dictionary/membrane', 'https://www.nature.com/articles/s41586-024-07527-5', 'https://www.moleculardevices.com/products/assay-kits/ion-channel/flipr-membrane-potential', 'https://www.nintendo.com/us/store/products/membrane-switch/', 'https://www.mayoclinic.org/tests-procedures/ecmo/about/pac-20484615', 'https://www.ebi.ac.uk/QuickGO/term/GO:0016020', 'https://www.genome.gov/genetics-glossary/Cell-Membrane', 'https://health.ucdavis.edu/physiology/', 'https://www.laticrete.com/en/products/9235-waterproofing-membrane', 'https://www.mdpi.com/journal/membranes', 'https://www.ttuhsc.edu/centers-institutes/membrane-protein-research/default.aspx', 'https://www.watertechnologies.com/products/spiral-wound-membranes', 'https://pubmed.ncbi.nlm.nih

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 83/101 - keyword: transport vesicles, tfidf: 0.04237646800447779
All links:  ['https://www.medicalnewstoday.com/articles/vesicle', 'https://www.ncbi.nlm.nih.gov/books/NBK9886/', 'https://www.nature.com/articles/34438', 'https://www.ncbi.nlm.nih.gov/books/NBK26941/', 'https://rupress.org/jcb/article/220/10/e202106115/212611/GOLPH3-and-GOLPH3L-are-broad-spectrum-COPI', 'https://pubmed.ncbi.nlm.nih.gov/9738960/', 'https://www.sciencedirect.com/topics/medicine-and-dentistry/transport-vesicle', 'https://www.cell.com/cell/pdf/0092-8674(93)90545-2.pdf', 'https://pubmed.ncbi.nlm.nih.gov/8161212/', 'https://www.pnas.org/doi/full/10.1073/pnas.97.8.3783', 'https://bio.libretexts.org/Bookshelves/Cell_and_Molecular_Biology/Book%3A_Cells_-_Molecules_and_Mechanisms_(Wong)/11%3A_Protein_Modification_and_Trafficking/11.06%3A_Vesicular_Transport', 'https://en.wikipedia.org/wiki/Vesicle_(biology_and_chemistry)', 'https://www.biorxiv.org/content/10.1101/555813v1.full.pdf', 'https://www.science.o

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 84/101 - keyword: transport vesicles, tfidf: 0.04237646800447779
All links:  ['https://www.youtube.com/watch?v=Ptmlvtei8hw', 'https://bio.libretexts.org/Bookshelves/Human_Biology/Human_Biology_(Wakim_and_Grewal)/05%3A_Cells/5.07%3A_Cell_Transport', 'https://www.nature.com/articles/s41586-024-07583-x', 'https://link.aps.org/doi/10.1103/PhysRevFluids.3.103603', 'https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1009728', 'https://www.khanacademy.org/test-prep/mcat/cells/transport-across-a-cell-membrane/a/passive-transport-and-active-transport-across-a-cell-membrane-article', 'https://journals.lww.com/cjasn/fulltext/2015/01000/collecting_duct_principal_cell_transport_processes.19.aspx', 'https://pubs.acs.org/doi/10.1021/jm050893b', 'https://www.khanacademy.org/test-prep/mcat/cells', 'https://www.sciencedaily.com/releases/2022/10/221024102852.htm', 'https://www.ncbi.nlm.nih.gov/books/NBK9847/', 'https://www.cell.com/trends/cell-biology/fulltext/S0962-8924(23)

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 85/101 - keyword: cell transport, tfidf: 0.03798358940746203
All links:  ['https://en.wikipedia.org/wiki/Membrane_protein', 'https://www.ncbi.nlm.nih.gov/books/NBK26878/', 'https://cube-biotech.com/knowledge/membrane-protein-stabilization/membrane-proteins/', 'https://www.sciencedirect.com/topics/medicine-and-dentistry/membrane-protein', 'https://www.nature.com/articles/nprot.2014.173', 'https://elifesciences.org/articles/34317', 'https://www.nature.com/articles/s41586-022-05330-8', 'https://pubs.acs.org/doi/10.1021/ja310901f', 'https://www.nature.com/articles/s41586-022-05336-2', 'https://pubs.acs.org/doi/10.1021/ja101481b', 'https://www.nature.com/articles/nmeth.3801', 'https://pubs.acs.org/doi/10.1021/ja101481b', 'https://www.pnas.org/doi/full/10.1073/pnas.0813167106', 'https://pubs.acs.org/doi/10.1021/ja409815g', 'https://bio.libretexts.org/Bookshelves/Introductory_and_General_Biology/Introductory_Biology_(CK-12)/02%3A_Cell_Biology/2.06%3A_Membrane_Proteins', 'https://pub

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://pubs.rsc.org/en/content/articlelanding/2008/em/b714725b: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 88/101 - keyword: semipermeable membrane, tfidf: 0.053085161631818076
All links:  ['https://en.wikipedia.org/wiki/Isotonic', 'https://www.merriam-webster.com/dictionary/isotonic', 'https://pubmed.ncbi.nlm.nih.gov/29458321/', 'https://www.khanacademy.org/science/ap-biology/cell-structure-and-function/mechanisms-of-transport-tonicity-and-osmoregulation/a/osmosis', 'https://www.biopac.com/product/electrode-gel-isotonic-114-g/', 'https://arxiv.org/abs/1909.03725', 'https://scikit-learn.org/stable/modules/isotonic.html', 'https://arxiv.org/abs/2301.02692', 'https://jamanetwork.com/journals/jamaotolaryngology/fullarticle/221846', 'https://www.sciencedirect.com/topics/medicine-and-dentistry/isotonic-solution', 'https://www.cdc.gov/dengue/training/cme/ccm/page57594.html', 'https://www.adinstruments.com/products/isotonic-transducers', 'https://www.uptodate.com/contents/causes-of-hyponatremia-without-hypotonicity-including-pseudohyponatremia', 'https://www.sciencedirect.com/topics/medi

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 91/101 - keyword: hypotonic, tfidf: 0.04196087274007397
All links:  ['https://en.wikipedia.org/wiki/Solute_pumping', 'https://pubs.acs.org/doi/10.1021/acs.langmuir.8b00193', 'https://pubs.aip.org/aip/jcp/article-abstract/151/19/194507/198158', 'https://pubs.acs.org/doi/10.1021/acs.chemrev.1c00459', 'https://www.ncbi.nlm.nih.gov/books/NBK26896/', 'https://aiche.onlinelibrary.wiley.com/doi/abs/10.1002/aic.690170607', 'https://jackwestin.com/resources/mcat-content/plasma-membrane/solute-transport-across-membranes', 'https://www.nature.com/articles/s41467-022-31703-8', 'https://www.chegg.com/homework-help/questions-and-answers/3-figure-33-simplified-diagram-plasma-membrane-structure-represents-channel-proteins-const-q112344851', 'https://www.sciencedirect.com/science/article/pii/B9780444812537500080/pdf?md5=012bb02b6a55f1a25012c517d497d0c7&pid=1-s2.0-B9780444812537500080-main.pdf', 'https://pubmed.ncbi.nlm.nih.gov/21103599/', 'https://www.khanacademy.org/science/ap-biology/cell-s

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://pubs.rsc.org/en/content/articlelanding/2023/me/d3me00073g: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Failed to scrape https://pubs.rsc.org/en/content/articlelanding/2017/lc/c6lc01486k: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Progress: 92/101 - keyword: solute pump, tfidf: 0.021278260349320295
All links:  ['https://www.modernatx.com/en-US', 'https://www.genome.gov/genetics-glossary/messenger-rna', 'https://finance.yahoo.com/quote/MRNA/', 'https://en.wikipedia.org/wiki/Messenger_RNA', 'https://www.cdc.gov/coronavirus/2019-ncov/downloads/vaccines/COVID-19-mRNA-infographic_G_508.pdf', 'https://www.nature.com/articles/nrd.2017.243', 'https://medlineplus.gov/genetics/understanding/therapy/mrnavaccines/', 'https://www.cdc.gov/coronavirus/2019-ncov/vaccines/different-vaccines/overview-COVID-19-vaccines.html', 'https://www.who.int/initiatives/the-mrna-vaccine-technolo

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.nasdaq.com/market-activity/stocks/mrna: HTTPSConnectionPool(host='www.nasdaq.com', port=443): Read timed out. (read timeout=10)
Progress: 93/101 - keyword: mRNA, tfidf: 0.11513574755917509
All links:  ['https://www.modernatx.com/en-US', 'https://www.genome.gov/genetics-glossary/messenger-rna', 'https://finance.yahoo.com/quote/MRNA/', 'https://en.wikipedia.org/wiki/Messenger_RNA', 'https://www.cdc.gov/coronavirus/2019-ncov/downloads/vaccines/COVID-19-mRNA-infographic_G_508.pdf', 'https://www.nature.com/articles/nrd.2017.243', 'https://medlineplus.gov/genetics/understanding/therapy/mrnavaccines/', 'https://www.cdc.gov/coronavirus/2019-ncov/vaccines/different-vaccines/overview-COVID-19-vaccines.html', 'https://www.who.int/initiatives/the-mrna-vaccine-technology-transfer-hub', 'https://www.cancer.gov/news-events/cancer-currents-blog/2024/glioblastoma-mrna-vaccine-layered-nanoparticle', 'https://www.hhs.gov/about/news/2024/07/02/hhs-provides-176-million-develop-

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.nasdaq.com/market-activity/stocks/mrna: HTTPSConnectionPool(host='www.nasdaq.com', port=443): Read timed out. (read timeout=10)
Progress: 94/101 - keyword: mRNA, tfidf: 0.11520410679416741
All links:  ['https://www.modernatx.com/en-US', 'https://www.genome.gov/genetics-glossary/messenger-rna', 'https://finance.yahoo.com/quote/MRNA/', 'https://en.wikipedia.org/wiki/Messenger_RNA', 'https://www.cdc.gov/coronavirus/2019-ncov/downloads/vaccines/COVID-19-mRNA-infographic_G_508.pdf', 'https://www.nature.com/articles/nrd.2017.243', 'https://medlineplus.gov/genetics/understanding/therapy/mrnavaccines/', 'https://www.cdc.gov/coronavirus/2019-ncov/vaccines/different-vaccines/overview-COVID-19-vaccines.html', 'https://www.who.int/initiatives/the-mrna-vaccine-technology-transfer-hub', 'https://www.cancer.gov/news-events/cancer-currents-blog/2024/glioblastoma-mrna-vaccine-layered-nanoparticle', 'https://www.hhs.gov/about/news/2024/07/02/hhs-provides-176-million-develop-

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.nasdaq.com/market-activity/stocks/mrna: HTTPSConnectionPool(host='www.nasdaq.com', port=443): Read timed out. (read timeout=10)
Progress: 95/101 - keyword: mRNA, tfidf: 0.11524340246609632
All links:  ['https://www.modernatx.com/en-US', 'https://www.genome.gov/genetics-glossary/messenger-rna', 'https://finance.yahoo.com/quote/MRNA/', 'https://en.wikipedia.org/wiki/Messenger_RNA', 'https://www.cdc.gov/coronavirus/2019-ncov/downloads/vaccines/COVID-19-mRNA-infographic_G_508.pdf', 'https://www.nature.com/articles/nrd.2017.243', 'https://medlineplus.gov/genetics/understanding/therapy/mrnavaccines/', 'https://www.cdc.gov/coronavirus/2019-ncov/vaccines/different-vaccines/overview-COVID-19-vaccines.html', 'https://www.who.int/initiatives/the-mrna-vaccine-technology-transfer-hub', 'https://www.cancer.gov/news-events/cancer-currents-blog/2024/glioblastoma-mrna-vaccine-layered-nanoparticle', 'https://www.hhs.gov/about/news/2024/07/02/hhs-provides-176-million-develop-

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.nasdaq.com/market-activity/stocks/mrna: HTTPSConnectionPool(host='www.nasdaq.com', port=443): Read timed out. (read timeout=10)
Progress: 96/101 - keyword: mRNA, tfidf: 0.11518389721024225
All links:  ['https://www.energy.gov/', 'https://www.energystar.gov/', 'https://www.entergy.com/', 'https://www.sciencedirect.com/journal/energy', 'https://www.eia.gov/', 'https://en.wikipedia.org/wiki/Energy', 'https://www.iea.org/', 'https://www.xcelenergy.com/', 'https://www.nvenergy.com/', 'https://www.merriam-webster.com/dictionary/energy', 'https://www.nrel.gov/', 'https://www.energytransfer.com/', 'https://www.ef.org/', 'https://www.hitachienergy.com/', 'https://outagemap.duke-energy.com/', 'https://www.energyvault.com/', 'https://www.energy.ca.gov/', 'https://www.britannica.com/science/energy', 'https://austinenergy.com/', 'https://www.cisa.gov/topics/critical-infrastructure-security-and-resilience/critical-infrastructure-sectors/energy-sector']
Progress: 97/101 -

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 101/101 - keyword: cell transport, tfidf: 0.03798358940746203
All links:  ['https://www.instagram.com/roses_are_rosie/?hl=en', 'https://en.wikipedia.org/wiki/Rose', 'https://www.rose.edu/', 'https://www.maggierosemusic.com/', 'https://www.realbakingwithrose.com/', 'https://www.rosebowlstadium.com/', 'https://www.carolinerosemusic.com/', 'https://www.rose-hulman.edu/', 'https://www.therosevenice.la/', 'https://therose.org/', 'https://skirose.com/', 'https://charlierose.com/', 'https://www.facebook.com/lucyrosemusic/', 'https://www.lelarose.com/', 'https://www.brandeis.edu/rose/', 'https://www.roseveleth.com/', 'https://feinstein.northwell.edu/institutes-researchers/institute-molecular-medicine/robert-s-boas-center-for-genomics-and-human-genetics/rose-research-outsmarts-endometriosis', 'https://twitter.com/lilagracerose', 'https://www.strose.edu/', 'https://twitter.com/ltoddrose?lang=en']
Failed to scrape https://www.rose-hulman.edu/: HTTPSConnectionPool(host='www.rose-hulman.e

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 6/110 - keyword: neck, tfidf: 0.11759239656949408
All links:  ['https://blinking.id/', 'https://en.wikipedia.org/wiki/Blinking', 'https://community.netgear.com/t5/WiFi-Range-Extenders-Nighthawk/AC2200-Wifi-Mesh-Extender-blinking-green-power-light/td-p/2309076', 'https://github.com/VSCodeVim/Vim/issues/352', 'https://community.particle.io/t/particle-photon-blinking-green/47566', 'https://community.ruckuswireless.com/t5/Access-Points-Indoor-and-Outdoor/AP-CTL-light-keeps-blinking-really-fast/m-p/53384', 'https://forums.ni.com/t5/LabVIEW/Blinking-indicator-different-when-compiled-vs-development/td-p/4139554', 'https://community.usa.canon.com/t5/Point-Shoot-Digital-Cameras/PowerShot-SX70-HS-Light-Blinking-Rapidly/td-p/443977', 'https://community.arlo.com/t5/Arlo-Q/Arlo-Q-will-no-longer-connect-to-wifi-blinking-amber/m-p/1785250', 'https://forum.prusa3d.com/forum/hardware-firmware-and-software-help/any-way-to-turn-off-blinking-bed-light/', 'https://community.arlo.com/t5/Arlo-Pro/B

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 14/110 - keyword: competition, tfidf: 0.11743924978575701
All links:  ['https://www.linkedin.com/posts/simonsinek_when-youre-struggling-heres-a-way-you-activity-7192188408771842049-z6bI', 'https://store.steampowered.com/app/1035560/Struggling/', 'https://www.washingtonpost.com/lifestyle/on-parenting/an-irresponsible-seventh-graders-mom-asks-should-we-let-her-fail/2016/01/12/9cd38cc4-b4ac-11e5-a76a-0b5145e8679a_story.html', 'https://www.instagram.com/coachbennett/p/CixqPedpbn-/?hl=en', 'https://www.bleedcubbieblue.com/2024/5/1/24146449/cubs-defense-struggling-dansby-swanson', 'https://www.chalkbeat.org/philadelphia/2023/6/12/23753332/reading-literacy-phonics-whole-language-philadelphia-science-of-reading/', 'https://ies.ed.gov/ncee/WWC/PracticeGuide/26', 'https://www.struggling-game.com/', 'https://www.newsweek.com/i-saw-homeless-woman-struggling-puppies-help-1917591', 'https://oversight.house.gov/hearing/security-at-stake-an-examination-of-dods-struggling-background-check-sys

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 18/110 - keyword: surrendered, tfidf: 0.045691638588367584
All links:  ['https://www.webmd.com/a-to-z-guides/signs-exhaustion', 'https://www.cdc.gov/extreme-heat/signs-symptoms/index.html', 'https://www.webmd.com/balance/how-tired-is-too-tired', 'https://www.mayoclinic.org/diseases-conditions/heat-exhaustion/symptoms-causes/syc-20373250', 'https://www.weather.gov/safety/heat-illness', 'https://www.noaa.gov/stories/heat-exhaustion-or-heat-stroke-know-signs-of-heat-illness', 'https://www.merriam-webster.com/dictionary/exhaustion', 'https://www.nature.com/articles/s41586-019-1325-x', 'https://www.redcross.org/get-help/how-to-prepare-for-emergencies/types-of-emergencies/extreme-heat-safety.html', 'https://www.nature.com/articles/nri3862', 'https://jamanetwork.com/journals/jamanetworkopen/fullarticle/2796562', 'https://www.justice.gov/jm/civil-resource-manual-34-exhaustion-administrative-remedies', 'https://www.betterhealth.vic.gov.au/health/conditionsandtreatments/fatigue', 'http

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 29/110 - keyword: Aunt Kolab, tfidf: 0.00961037619443837
All links:  ['https://www.supperrestaurant.com/', 'https://www.supperatemma.com/', 'https://www.merriam-webster.com/dictionary/supper', 'https://zh.wikipedia.org/zh-cn/en:supper', 'https://www.dictionary.com/browse/supper', 'https://www.lunchorsupper.com/', 'https://thehotelemma.com/culinary/supper/', 'https://johnaugustswanson.com/catalog/last-supper/', 'https://dictionary.cambridge.org/us/dictionary/english/supper', 'https://fourteenlines.blog/tag/supper-by-garrison-keillor/', 'https://untitledsupperclub.com/', 'https://alexandradudley.substack.com/', 'https://genevalakefrontrealty.com/blog/the-village-supper-club-fish-fry-review/', 'https://twitter.com/mariobrothblog?lang=en', 'https://www.ilovesupperclub.com/nashville-menu/', 'https://venturewisconsin.com/products/wisconsin-supper-club-dining-destinations-2024-2025', 'https://knottypinesupperclub.com/', 'https://www.thebrinybabe.com/raw-bars/mosquito-supper-club', '

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.washingtonpost.com/travel/2024/03/10/batik-air-pilots-fall-asleep-indonesia/: HTTPSConnectionPool(host='www.washingtonpost.com', port=443): Read timed out. (read timeout=10)
Progress: 48/110 - keyword: fell asleep, tfidf: 0.039172418327426746
All links:  ['https://workingsolutions.com/', 'https://www.wsj.com/lifestyle/relationships/retired-friends-work-life-08b1fde3', 'https://www.merriam-webster.com/dictionary/working', 'https://workingfamilies.org/', 'https://www.workingnomads.com/jobs', 'https://wd4c.org/', 'https://www.ipcc.ch/working-group/wg3/', 'https://www.ca.gov/working/', 'https://www.ipcc.ch/working-group/wg1/', 'https://slate.com/podcasts/working', 'https://dictionary.cambridge.org/dictionary/english/working', 'https://www.ohchr.org/en/special-procedures/wg-business', 'https://www.workingadvantage.com/', 'https://workingfamiliescredit.wa.gov/', 'https://news.stanford.edu/stories/2020/06/snapshot-new-working-home-economy', 'https://www.careers.wa

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://mn.gov/deed/paidleave/employers/faq/: HTTPSConnectionPool(host='mn.gov', port=443): Max retries exceeded with url: /deed/paidleave/employers/faq/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1122)')))
Progress: 61/110 - keyword: paid, tfidf: 0.1272333493606074
All links:  ['https://en.threetimes.kr/', 'https://www.imdb.com/title/tt0459666/', 'https://www.linkedin.com/pulse/why-i-hate-phrase-tell-em-three-times-nancy-duarte', 'https://www.reddit.com/r/KingkillerChronicle/comments/au9nip/thoughts_about_phrase_listen_three_times_reread/', 'https://en.wikipedia.org/wiki/Three_Times', 'https://www.dbrl.org/adults/spring-peepers-freeze-three-times-before-spring', 'https://www.prb.org/resources/black-women-over-three-times-more-likely-to-die-in-pregnancy-postpartum-than-white-women-new-research-finds/', 'https://community.logos.com/forums/t/35793.aspx', 'http

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 73/110 - keyword: high school graduation, tfidf: 0.09287171119027442
All links:  ['https://en.wikipedia.org/wiki/Diploma', 'https://www.ibo.org/programmes/diploma-programme/', 'https://www.umhb.edu/resources/registrar/diploma-request', 'https://www.homeschooldiploma.com/personalized-high-school-diploma-for-homeschools/', 'https://www.academicexcellence.com/diploma/', 'https://registrar.wisc.edu/cediploma/', 'https://registrar.uchicago.edu/records/diplomas/ce-diplomas/', 'https://registrar.washington.edu/students/cediploma/', 'https://registrar.buffalo.edu/diploma/', 'https://www.education.pa.gov/Postsecondary-Adult/GED%20and%20Commonwealth%20Secondary%20School%20Diploma/Pages/default.aspx', 'https://myusf.usfca.edu/graduation/duplicate-diploma', 'https://registrar.sdsu.edu/graduation/diploma', 'https://registrar.virginia.edu/diploma/diploma-information', 'https://registrar.utah.edu/handbook/diplomas.php', 'https://registrar.umd.edu/graduation-diplomas/diplomas', 'https://comm

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 86/110 - keyword: fish’s life, tfidf: 0.03216796575093983
All links:  ['https://www.merriam-webster.com/dictionary/opening', 'https://www.dropboxforum.com/t5/Create-upload-and-share/Dropbox-app-doesn-t-open-directly-when-opening-links-from/td-p/532678', 'https://olympics.com/en/paris-2024/the-games/ceremonies/opening-ceremony', 'https://www.bls.gov/jlt/home.htm', 'https://forum.asana.com/t/asana-links-opening-in-browser-not-desktop-app-mac/231024', 'https://www.bls.gov/news.release/jolts.nr0.htm', 'https://www.mdrc.org/work/projects/opening-doors', 'https://www.bso.org/events/opening-night-at-tanglewood-2024', 'https://community.alteryx.com/t5/Alteryx-Designer-Desktop-Discussions/Stop-the-Welcome-to-Alteryx-Window-from-opening-each-time-I-open/td-p/1046597', 'https://www.dictionary.com/browse/opening', 'https://www.rct.uk/visit/buckingham-palace', 'https://en.chateauversailles.fr/plan-your-visit/practical-information', 'https://www.mathworks.com/matlabcentral/answers/144273-n

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 88/110 - keyword: mouth, tfidf: 0.13451218378560167
All links:  ['https://www.waters.com/nextgen/us/en.html', 'https://en.wikipedia.org/wiki/Water', 'https://www.youtube.com/watch?v=XoiOOiuH8iI', 'https://www.facebook.com/rogerwaters/', 'https://www.fijiwater.com/', 'https://www.epa.gov/laws-regulations/summary-clean-water-act', 'https://www.charitywater.org/', 'https://www.instagram.com/rogerwaters/?hl=en', 'https://www.wef.org/', 'https://www.unwater.org/', 'https://www.waterstexas.com/', 'http://waters.house.gov/', 'https://www.britannica.com/science/water', 'https://twitter.com/rogerwaters?lang=en', 'https://www.mdpi.com/journal/water', 'https://www.usgs.gov/special-topics/water-science-school', 'https://www.charlottenc.gov/water/Home', 'https://www.un.org/en/observances/water-day', 'https://www.mayoclinic.org/healthy-lifestyle/nutrition-and-healthy-eating/in-depth/water/art-20044256', 'https://water.noaa.gov/']
Failed to scrape https://www.waters.com/nextgen/us/en.html: 

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 96/110 - keyword: struggled, tfidf: 0.020365871750175096
All links:  ['https://www.bedbathandbeyond.com/', 'https://www.ikea.com/us/en/cat/beds-bm003/', 'https://www.ashleyfurniture.com/c/furniture/bedroom/beds/', 'https://www.instagram.com/bed.band/?hl=en', 'https://genome.ucsc.edu/FAQ/FAQformat.html', 'https://www.thuma.co/products/the-bed', 'https://www.epa.gov/bedbugs', 'https://www.self.com/story/bed-rotting-self-care', 'https://floydhome.com/products/the-bed-frame', 'https://www.wayfair.com/furniture/sb0/beds-c46122.html', 'https://zenbivy.com/products/zenbivy-bed', 'https://www.bedstu.com/', 'https://en.wikipedia.org/wiki/Bed', 'https://www.target.com/c/beds-bedroom-furniture/-/N-4ym22', 'https://www.newyorker.com/magazine/1933/07/08/my-life-and-hard-times-i-the-night-the-bed-fell', 'https://www.cpsc.gov/Recalls/2024/Medline-Industries-Recalls-1-5-Million-Adult-Portable-Bed-Rails-Due-to-Serious-Entrapment-and-Asphyxia-Hazards-Two-Deaths-Reported', 'https://bigbarker.co

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 100/110 - keyword: Aunt Kolab, tfidf: 0.009608618673856744
All links:  ['https://www.goodmorningamerica.com/', 'https://www.instagram.com/goodfknmorning/?hl=en', 'https://in.pinterest.com/bknagar/morning/', 'https://goodmorningisaband.bandcamp.com/album/barnyard', 'https://tenor.com/search/good-morning-gifs', 'https://www.youtube.com/c/GoodMorningMagic', 'https://twitter.com/gmfb', 'https://abc.com/show/f17f433e-bd1f-4686-9994-9d3aea9f8c2d', 'https://www.reddit.com/r/indieheads/comments/1bkfwt1/fresh_album_good_morning_good_morning_seven/', 'https://goodmorning.band/', 'https://www.campdesoto.com/blog/2017/3/a-good-morning-for-an-adventure', 'https://www.theparisreview.org/blog/2017/07/19/where-farts-come-in/', 'https://www.facebook.com/abc7gmw/', 'https://www.linkedin.com/pulse/good-morning-america-how-you-john-mcmullen', 'https://www.goodmorningtacos.co/', 'https://education.lego.com/en-us/lessons/spikeessential-quirky-creations/spikeessential-good-morning-machine/', 'https

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.puzzledevents.com/: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Progress: 102/110 - keyword: puzzled, tfidf: 0.062005191254859086
All links:  ['https://www.realized1031.com/', 'https://www.dictionary.com/browse/realized', 'https://www.merriam-webster.com/thesaurus/realized', 'https://www.realizedworth.com/', 'https://www.merriam-webster.com/dictionary/realize', 'https://www.usaid.gov/cii/innovation-realized', 'https://forum.mbse-capella.org/t/issue-with-realized-exchange-when-transitioned-from-system-to-logical-architecture/3646', 'https://www.ey.com/en_us/transformation-realized', 'https://www.investopedia.com/terms/r/realizedprofit.asp', 'https://www.amnesty.org/en/documents/afr25/4569/2021/en/', 'https://www.thesaurus.com/browse/realized', 'https://twitter.com/idillionaire/status/1549342067138199552', 'https://investor.vanguard.com/investor-resources-education/taxes/realized-capital-gains', 'https://inequa

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 1/89 - keyword: 400 meters, tfidf: 0.02344924786496355
All links:  ['https://achievements.org/', 'https://www.trueachievements.com/', 'https://communityhub.strava.com/t5/strava-features-chat/achievements-in-the-activity-not-showing-but-showed-in-the-all/m-p/25462', 'https://www.lw.com/en/achievements', 'https://steamcommunity.com/app/1106840/discussions/0/6756004301614261743/', 'https://www.reddit.com/r/GroundedGame/comments/1c7pgkp/steam_achievements_broken/', 'https://steamcommunity.com/app/1324130/discussions/0/4029094770944900375/', 'https://www.xboxachievements.com/', 'https://www.reddit.com/r/Seaofthieves/comments/18ydp93/xbox_achievements_not_carrying_over_to_steam/', 'https://twitch.uservoice.com/forums/924712-creator-dashboard/suggestions/40143991-completed-achievements-not-marked-complete-for-aff', 'https://www.merriam-webster.com/dictionary/achievement', 'https://github.com/orgs/community/discussions/37577', 'https://answers.microsoft.com/en-us/xbox/forum/all/fortn

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://dps.mn.gov/divisions/dvs/forms-documents/Documents/Special-Plates-Brochure.pdf: The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.

Original exception(s) from parser:
 expected name token at '<![���T9N��L�\x1d�\x12�U٩|'
Failed to scrape https://www.fincen.gov/sites/default/files/shared/BOI-Informational-Brochure-April-2024.pdf: HTTPSConnectionPool(host='www.fincen.gov', port=443): Read timed out. (read timeout=10)
Progress: 9/89 - keyword: brochure, tfidf: 0.06174303137175282
All links:  ['https://litverse.substack.com/p/kids-today-cant-read-even-college', 'https://www.reddit.com/r/books/comments/pvqnk9/why_cant_i_comprehend_any_book_i_read/', 'https://community.alteryx.com/t5/Alteryx-Designer-Desktop-Discussions/Can-t-read-file-error-while-running-a-workflow-schedule/td-p/223583', 'https://hughmcguire.medium.com/why-can-t-we-read-anymore-503c38c131fe', 'https://github.com/nodejs/postject/issues/82', 

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 16/89 - keyword: competition, tfidf: 0.11743924978575701
All links:  ['https://en.wikipedia.org/wiki/Compliance_(film)', 'https://www.merriam-webster.com/dictionary/compliance', 'https://www.imdb.com/title/tt1971352/', 'https://oig.hhs.gov/compliance/', 'https://www.fincen.gov/boi/small-entity-compliance-guide', 'https://aws.amazon.com/compliance/', 'https://cloud.google.com/compliance', 'https://csa.fmcsa.dot.gov/', 'https://www.compliance.ai/', 'https://www.pcisecuritystandards.org/', 'https://www.irs.gov/businesses/corporations/foreign-account-tax-compliance-act-fatca', 'https://www.epa.gov/compliance', 'https://vsb.org/Site/Site/lawyers/compliance.aspx', 'https://dartmouthsports.com/sports/2018/5/30/584953.aspx', 'https://www.usaid.gov/safeguarding-and-compliance', 'https://ofac.treasury.gov/media/932436/download?inline', 'https://sundevilcompliance.asu.edu/', 'https://www.techtarget.com/searchdatamanagement/definition/compliance', 'https://www.travel.dod.mil/Policy-Regul

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.fincen.gov/boi/small-entity-compliance-guide: HTTPSConnectionPool(host='www.fincen.gov', port=443): Read timed out. (read timeout=10)
Progress: 17/89 - keyword: compliance, tfidf: 0.12164900727026116
All links:  ['https://www.confused.com/', 'https://www.merriam-webster.com/dictionary/confused', 'https://twitter.com/joshuahorowitz?lang=en', 'https://podcasts.apple.com/us/podcast/happy-sad-confused/id827905050', 'https://www.youtube.com/watch?v=cn5a57rH2XI', 'https://www.newyorker.com/culture/cultural-comment/the-dazed-and-confused-generation', 'https://www.youtube.com/watch?v=-CENNFcvRDo', 'https://www.theguardian.com/film/2019/jun/11/how-we-made-dazed-and-confused-richard-linklater-wiley-wiggins', 'https://dictionary.cambridge.org/dictionary/english/confused', 'https://www.dropboxforum.com/t5/Delete-edit-and-organize/Very-confused-with-regards-to-Smart-Sync/td-p/575128', 'https://www.instagram.com/theconfusedarab/?hl=en', 'https://www.dropboxforum.com/t5/C

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 20/89 - keyword: determination, tfidf: 0.05866962031976516
All links:  ['https://viewpointmag.com/2013/09/15/learning-to-struggle-my-story-between-workerism-and-feminism/', 'https://news.stanford.edu/stories/2019/09/embrace-struggle-education-professor-challenges-common-beliefs-teaching-learning', 'https://www.nytimes.com/2022/04/05/science/education-learning-challenge.html', 'https://journals.sagepub.com/doi/10.3102/00028312034001039', 'https://www.peterlang.com/series/eas', 'https://pubmed.ncbi.nlm.nih.gov/38131712/', 'https://www.zinnedproject.org/news/teach-black-freedom-struggle-classes/', 'https://christianscholars.com/the-struggle-for-soul-in-christian-higher-education-burtchaell-was-right-and-i-was-wrong-part-ii/', 'https://educationopensdoors.org/struggle-and-acceptance-my-experience-with-inclusion/', 'https://www.tandfonline.com/doi/full/10.1080/20004508.2022.2127263', 'https://www.wsj.com/articles/struggle-session-at-stanford-law-school-federalist-society-kyle-dunc

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 21/89 - keyword: educational struggle, tfidf: 0.018299205598396675
All links:  ['https://en.wikipedia.org/wiki/Embarrassment', 'https://www.psychologytoday.com/us/basics/embarrassment', 'https://www.merriam-webster.com/dictionary/embarrassment', 'https://dictionary.cambridge.org/dictionary/english/embarrassment', 'https://www.dictionary.com/browse/embarrassment', 'https://psycnet.apa.org/record/1998-04949-006', 'https://www.heinemann.com/products/e08877.aspx', 'https://psycnet.apa.org/record/2011-20730-001', 'https://www.texastribune.org/2022/05/19/john-cornyn-ken-paxton-texas-attorney-general-runoff/', 'https://www.ncronline.org/blogs/soul-seeing/embarrassment-doorway-grace', 'https://www.sciencedirect.com/science/article/pii/S0261379412001527', 'https://www.amazon.com/Weighted-Comfort-Embarrassment-Just-Play/dp/B0CBVRW4ND', 'https://journals.sagepub.com/doi/full/10.1177/1555412017721086', 'https://hbr.org/2023/01/how-to-move-past-an-embarrassing-moment-at-work', 'https://ww

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 32/89 - keyword: hall monitor, tfidf: 0.05569023051550113
All links:  ['https://www.imdb.com/title/tt0480687/', 'https://en.wikipedia.org/wiki/Hall_Pass', 'https://www.hallpassslc.com/', 'https://hallpassid.com/', 'https://pass.securly.com/login', 'https://mwlpdx.com/hall-pass/', 'https://www.minnesotaorchestra.org/hallpass/', 'https://domainoutdoor.com/products/hall-pass', 'https://www.in.gov/sos/elections/voter-information/voters-rights/voting-systems-in-indiana/hoosier-hall-pass/', 'https://www.rottentomatoes.com/m/hall_pass', 'https://www.bludot.com/hall-pass-wall-shelf.html', 'https://bpo.org/symphony-hall-pass/', 'https://www.knoxschools.org/Page/21477', 'https://legacyfoodhall.com/hall-pass-gift-card/', 'https://www.hallmanagementgroup.com/dining-rewards-2023/', 'https://www.hallpass80s.com/', 'https://www.bio-itworldexpo.com/exhibit-hall-keynote-pass', 'https://www.hallpasses.com/product/hall-pass-holder/', 'https://www.hallpassband.net/', 'https://www.reddit.com/r/bo

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 51/89 - keyword: no learning centers, tfidf: 0.0467442917699488
All links:  ['https://www.tabletmag.com/sections/community/articles/end-schoolyear-like-no-other-campus-essays', 'https://alastore.ala.org/lfsl2', 'https://www.quora.com/What-does-Stanford-offer-that-no-or-few-other-schools-have', 'https://medium.com/pi-top/meet-the-school-with-no-classes-no-classrooms-and-no-curriculum-7cc7be517cef', 'https://www.reddit.com/r/LGBTeens/comments/f6r8ov/relationships_why_are_there_no_other_gay_guys_at/', 'https://www.amazon.com/Leading-School-Librarians-There-Option/dp/0838915108', 'https://www.berea.edu/', 'https://www.nea.org/nea-today/all-news-articles/back-school-no-other', 'https://www.ucls.uchicago.edu/program', 'https://www.seattletimes.com/seattle-news/education/2024-graduates-look-back-on-a-high-school-experience-like-no-other/', 'https://www.zinnedproject.org/materials/a-school-year-like-no-other/', 'https://www.theguardian.com/us-news/commentisfree/article/2024/may/04/un

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 62/89 - keyword: realization, tfidf: 0.04943467346098506
All links:  ['https://www.merriam-webster.com/dictionary/recognition', 'https://www.nursingworld.org/organizational-programs/magnet/', 'https://www.sciencedirect.com/journal/pattern-recognition', 'https://www.greatplacetowork.com/resources/blog/creating-a-culture-of-recognition', 'https://cdphe.colorado.gov/co-green-business-recognition', 'https://cloud.google.com/speech-to-text', 'https://www.justice.gov/eoir/recognition-accreditation-roster-reports', 'https://www.nist.gov/programs-projects/face-recognition-vendor-test-frvt', 'https://www.cdacouncil.org/en/', 'https://www.onetonline.org/explore/workvalues/Recognition/', 'https://governor.ohio.gov/contact/recognition-request', 'https://www.pa.gov/en/agencies/dos/resources/professional-licensing-resources/act-31.html', 'https://www.ncqa.org/programs/health-care-providers-practices/patient-centered-specialty-practice-recognition-pcsp/', 'https://www.thesaurus.com/browse/r

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.sacredheart.edu/majors--programs/sixth-year-degree-in-literacy-102097/: HTTPSConnectionPool(host='www.sacredheart.edu', port=443): Max retries exceeded with url: /majors--programs/sixth-year-degree-in-literacy-102097/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1122)')))


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 65/89 - keyword: remedial reading, tfidf: 0.08111645550252358
All links:  ['https://onelook.com/?loc=rza&w=sad%20gaze', 'https://icds.ee/en/the-peculiarities-of-the-armenian-sad-gaze/', 'https://pubmed.ncbi.nlm.nih.gov/31886681/', 'https://www.quora.com/Do-people-with-BPD-have-intense-mysterious-or-sad-eyes-gaze', 'https://www.npr.org/sections/krulwich/2011/11/14/142240325/hey-why-are-you-crying', 'https://www.quora.com/Why-would-your-crush-look-at-you-with-sad-eyes', 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6283596/', 'https://www.reddit.com/r/RSbookclub/comments/1cmru3g/i_crave_female_gaze_writing_thats_not_a/', 'https://www.frontiersin.org/journals/human-neuroscience/articles/10.3389/fnhum.2013.00872/full', 'https://www.reddit.com/r/CasualConversation/comments/svt5cp/i_have_sad_eyes_people_always_ask_why_i_look_sad/', 'https://www.frontiersin.org/journals/neuroscience/articles/10.3389/fnins.2022.852484/full', 'https://link.springer.com/article/10.1007/s10608-014-9669-

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 74/89 - keyword: taught, tfidf: 0.0685115490682187
All links:  ['https://lhstoday.org/8863/uncategorized/mr-leonard/', 'https://www.a2schools.org/Page/3340', 'https://www.thehappybachelor.org/blog/teacher-appreciation-week-an-ode-to-mr-leonard-london', 'https://eagleexaminer.com/3107/student-life/faculty-focus-mr-shawn-leonard/', 'https://www.youtube.com/@ProfessorLeonard', 'https://www.dermodys.com/memorials/Leonard-Patrick/3540660/view-condolences.php', 'https://www.providenceschools.org/Page/4270', 'https://www.lcps.org/domain/5866', 'https://aohd.com/news/teacher-spotlight-mr-leonard/', 'https://www.yonkerspublicschools.org/domain/11288', 'https://www.detroitk12.org/Page/10538', 'http://www.leonardcohenforum.com/viewtopic.php?t=3090', 'https://www.mcpsva.org/domain/2282', 'https://twitter.com/leonard_sci?lang=en', 'https://www.dignitymemorial.com/obituaries/annapolis-md/leonard-moses-10166257', 'https://www.halleonard.com/biography/423/thomas-may', 'https://www.neisd.net/

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 4/96 - keyword: microdebris, tfidf: 0.02831903058172939
All links:  ['https://www.history.com/topics/cold-war/1950s', 'https://en.wikipedia.org/wiki/1950s', 'https://www.archives.gov/research/census/1950', 'https://1950census.archives.gov/', 'https://www.tva.com/about-tva/our-history/1950s', 'https://www.ssa.gov/oact/babynames/decades/names1950s.html', 'https://mn.gov/mnddc/parallels/five/5a/1.html', 'https://www.nps.gov/parkhistory/online_books/brochures/year-1950.htm', 'https://www.guggenheim.org/exhibition/zero-countdown-to-tomorrow-1950s60s-2', 'https://sitn.hms.harvard.edu/flash/2017/history-artificial-intelligence/', 'https://www.fieldmuseum.org/about/press/fish-have-been-swallowing-microplastics-1950s', 'https://www.aspeninstitute.org/blog-posts/native-people-continue-to-resist-1950s-policies/', 'https://www.sandia.gov/about/history/1950s/', 'https://jcprd.com/933/1950s-All-Electric-House', 'https://www.bmtflightphotos.af.mil/1950s/', 'https://www.ncaa.org/sports/2021/

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.bmtflightphotos.af.mil/1950s/: HTTPSConnectionPool(host='www.bmtflightphotos.af.mil', port=443): Read timed out. (read timeout=10)
Progress: 5/96 - keyword: 1950s, tfidf: 0.03278515546209692
All links:  ['https://www.imdb.com/title/tt3876910/', 'https://en.wikipedia.org/wiki/Dangerous_(2021_film)', 'https://smartgrowthamerica.org/dangerous-by-design/', 'https://www.elitedangerous.com/', 'https://danostequila.com/', 'https://www.reddit.com/r/EliteDangerous/', 'https://www.instagram.com/thedangeroussummer/?hl=en', 'https://forums.frontier.co.uk/forums/dangerous-discussion/', 'https://www.amazon.com/Dangerous-Milo-Yiannopoulos/dp/069289344X', 'https://www.merriam-webster.com/dictionary/dangerous', 'https://www.nytimes.com/2022/05/08/opinion/tiktok-twitter-china-bytedance.html', 'https://www.agriculture.pa.gov/Animals/DogLaw/Dangerous%20Dogs/Pages/default.aspx', 'https://www.theguardian.com/commentisfree/2016/dec/01/stephen-hawking-dangerous-time-planet-inequal

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 27/96 - keyword: communications satellite, tfidf: 0.07290156558632009
All links:  ['https://www.fmcsa.dot.gov/regulations/hours-service/elds/eld-malfunctions-and-data-diagnostic-events-faqs', 'https://www.merriam-webster.com/dictionary/malfunction', 'https://www.fmcsa.dot.gov/hours-service/elds/eld-malfunctions-and-data-diagnostic-events', 'https://eld.fmcsa.dot.gov/FAQ/Topics?name=ELD_Malfunctions_and_Data_Diagnostic_Events', 'https://www.dos.pa.gov/VotingElections/OtherServicesEvents/Documents/2023-Directive-Voting-System-Malfunctions-1.0.pdf.pdf', 'https://www.adventurecycling.org/blog/basic-bike-malfunctions-and-how-to-deal-with-them/', 'https://steamcommunity.com/app/1324130/discussions/0/5153834725011385016/', 'https://quartermaster.army.mil/adfsd/adfsd_mrb_one_stop.html', 'https://steamcommunity.com/app/323190/discussions/0/1869497226795775093/', 'https://community.st.com/t5/stm32-mcus-products/microcontroller-malfunctions/td-p/410617', 'https://www.reddit.com/r/ReadyO

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 30/96 - keyword: uncontrolled orbit, tfidf: 0.04009266464657739
All links:  ['https://imprec.bandcamp.com/album/colliding-bubbles', 'https://community.cloudflare.com/t/subscription-stream-basic-colliding-with-active-subscription-stream-basic-code-1226/432330', 'https://forum.gamemaker.io/index.php?threads/solved-how-do-you-get-the-id-of-an-instance-your-colliding-with.21219/', 'https://forum.godotengine.org/t/how-to-check-if-a-player-is-not-colliding/6320', 'https://forum.gamemaker.io/index.php?threads/checking-if-object-is-no-longer-colliding-with-collision-event.273/', 'https://www.reddit.com/r/NoMansSkyTheGame/comments/izccd2/ran_into_these_crazy_planets_colliding_into/', 'https://concentricrecords.bandcamp.com/album/colliding-wind', 'https://discourse.slicer.org/t/colliding-fronts-under-segment-where-the-vessel-bends-a-lot/16628', 'https://code.dccouncil.gov/us/dc/council/code/sections/50-2201.05c', 'https://discourse.mcneel.com/t/grouping-colliding-objects/79223', 'https

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 32/96 - keyword: American satellite, tfidf: 0.042331877650624364
All links:  ['https://spacenews.com/u-s-claims-recently-launched-russian-satellite-is-an-asat/', 'https://www.aljazeera.com/news/2024/6/28/iss-astronauts-take-cover-as-defunct-russian-satellite-shatters', 'https://www.spacecom.mil/Newsroom/News/Article-Display/Article/3819238/press-release-break-up-of-russian-owned-space-object/', 'https://spacenews.com/russian-satellite-breaks-up-creating-debris-in-low-earth-orbit/', 'https://www.spacecom.mil/Newsroom/News/Article-Display/Article/2285098/russia-conducts-space-based-anti-satellite-weapons-test/', 'https://www.npr.org/2024/05/30/nx-s1-4975741/what-to-know-russia-satellite-space-weapon-cosmos-2576', 'https://www.reuters.com/technology/space/russian-satellite-blasts-debris-space-forces-iss-astronauts-shelter-2024-06-27/', 'https://www.bbc.com/news/articles/cq55ww5j7e2o', 'https://www.reuters.com/world/europe/un-body-condemns-russian-satellite-interference-europe-20

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 57/96 - keyword: spacesuits, tfidf: 0.048545077118958266
All links:  ['https://impact.com/', 'https://www.merriam-webster.com/dictionary/impact', 'https://www.impactca.org/', 'https://impacttest.com/', 'https://www.impactinc.org/', 'https://www.impactcap.org/', 'https://impacthub.net/', 'https://impact.illinois.gov/', 'https://www.impact-net.org/', 'https://thegiin.org/publication/post/about-impact-investing/', 'https://netimpact.org/', 'https://www.instagram.com/impact/?hl=en', 'https://www.impact-initiatives.org/', 'https://www.impactplus.com/', 'https://iris.thegiin.org/', 'https://impact.net/', 'https://www.irs.gov/coronavirus/economic-impact-payments', 'https://www.ifpri.org/project/ifpri-impact-model/', 'https://attack.mitre.org/tactics/TA0040/', 'https://www.bcorporation.net/en-us/programs-and-tools/b-impact-assessment']
Failed to scrape https://impact.illinois.gov/: HTTPSConnectionPool(host='impact.illinois.gov', port=443): Max retries exceeded with url: / (Caused by 

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 60/96 - keyword: space exploration, tfidf: 0.09752750517712983
All links:  ['https://www.nasa.gov/headquarters/library/find/bibliographies/space-debris/', 'https://www.nhm.ac.uk/discover/what-is-space-junk-and-why-is-it-a-problem.html', 'https://www.pbs.org/newshour/science/analysis-why-trash-in-space-is-a-major-problem-with-no-clear-fix', 'https://www.esa.int/Space_Safety/Space_Debris/About_space_debris', 'https://en.wikipedia.org/wiki/Space_debris', 'https://science.nasa.gov/solar-system/10-things-whats-that-space-rock/', 'https://aerospace.org/article/brief-history-space-debris', 'https://www.nesdis.noaa.gov/news/does-space-junk-fall-the-sky', 'https://aerospace.org/article/space-debris-101', 'https://www.spacecom.mil/Newsroom/News/Article-Display/Article/2842957/russian-direct-ascent-anti-satellite-missile-test-creates-significant-long-last/', 'https://www.fcc.gov/document/fcc-takes-first-space-debris-enforcement-action', 'https://www.nationalgeographic.com/science/articl

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://www.insurance.ca.gov/0200-industry/0120-notices/upload/NOTICE-Change-in-Background-March-2023.pdf: The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.

Original exception(s) from parser:
 expected name token at '<![ѕ�9!\x1c�Us�k�n�q��-'


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 69/96 - keyword: background information, tfidf: 0.08526810522255576
All links:  ['https://www.youtube.com/watch?v=iS1g8G_njx8', 'https://dictionary.cambridge.org/dictionary/english/problem', 'https://www.youtube.com/watch?v=DVkkYlQNmbc', 'https://www.merriam-webster.com/dictionary/problem', 'https://twitter.com/mrproblemx?lang=en', 'https://community.hubspot.com/t5/Social-Tools/Problem-uploading-Instagram-Reels/m-p/742326', 'https://www.thesaurus.com/browse/problem', 'https://en.wikipedia.org/wiki/Problem_solving', 'https://github.com/zalando/problem', 'https://www.nytimes.com/2024/06/22/well/the-vexing-problem-of-the-medium-friend.html', 'http://problemsolverscaucus.house.gov/', 'https://tonsky.me/blog/centering/', 'https://www.pacourts.us/judicial-administration/court-programs/problem-solving-courts', 'https://www.nytimes.com/2023/04/02/opinion/democrats-liberalism.html', 'https://www.newyorker.com/culture/the-weekend-essay/the-problem-of-nature-writing', 'https://meta.stac

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 71/96 - keyword: concluding paragraph, tfidf: 0.06628850923770016
All links:  ['https://www.merriam-webster.com/dictionary/solution', 'https://www.britannica.com/science/solution-chemistry', 'https://en.wikipedia.org/wiki/Solution_(chemistry)', 'https://www.thesaurus.com/browse/solution', 'https://en.wikipedia.org/wiki/Solution', 'https://dictionary.cambridge.org/dictionary/english/solution', 'https://www.dictionary.com/browse/solution', 'https://scaledagileframework.com/solution/', 'https://solutions.covestro.com/en', 'https://www.compass.state.pa.us/cwis/public/home', 'https://www.energy.gov/scep/slsc/state-and-local-solution-center', 'https://risk.lexisnexis.com/', 'https://www.solutiontree.com/', 'https://dis.usaid.gov/', 'https://learn.microsoft.com/en-us/power-apps/developer/data-platform/introduction-solutions', 'https://www.energy.gov/eere/i2x/i2x-solution-e-xchanges', 'https://insulinaffordability.lilly.com/lilly-diabetes-solution-center', 'https://learn.microsoft.co

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 77/96 - keyword: informing, tfidf: 0.050902137968242465
All links:  ['https://portal.ct.gov/about/connecticut-historical-facts', 'https://www.fmprc.gov.cn/eng/wjdt_665385/2649_665393/202203/t20220302_10647120.html', 'https://www.nps.gov/gosp/learn/historyculture/golden-spike-national-historical-park-facts.htm', 'https://injuryfacts.nsc.org/motor-vehicle/historical-fatality-trends/deaths-and-rates/', 'https://www.gilderlehrman.org/history-resources/teacher-resources/historical-context-facts-about-slave-trade-and-slavery', 'https://nmaahc.si.edu/explore/stories/historical-legacy-juneteenth', 'https://www.annapolis.gov/588/History-of-Annapolis', 'https://parade.com/1099930/marynliles/history-facts/', 'https://www.rockhurst.edu/about/historical-facts', 'https://www.graham.az.gov/356/Memories-Historical-Facts', 'https://www.sjrwmd.com/waterways/st-johns-river/facts/', 'https://sedgwickcounty.colorado.gov/historical-facts', 'https://www.cityofclarksburgwv.com/266/Historical-Facts-L

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 79/96 - keyword: specific examples, tfidf: 0.05404674298720191
All links:  ['https://steamcommunity.com/app/1546710/discussions/0/3829788562462171468/', 'https://www.reddit.com/r/RatchetAndClank/comments/o4f5te/stuck_on_starstu_debris_field_golden_bolt_cant_go/', 'https://steamcommunity.com/app/648800/discussions/0/3044979493851155933/', 'https://www.reddit.com/r/IdlePlanetMiner/comments/rpjm10/debris_field/', 'https://forums.frontier.co.uk/threads/where-and-how-to-find-a-debris-field.370780/', 'https://www.smokelong.com/losing-and-finding-in-the-debris-field-a-review-of-in-the-debris-field/', 'https://www.cloudynights.com/topic/201703-arp-78-a-collision-with-a-debris-field/', 'https://forums.frontier.co.uk/threads/debris-fields-is-there-a-point-to-them.116907/', 'https://apps.ecology.wa.gov/cleanupsearch/site/16669', 'https://twitter.com/DJSnM/status/1671921121493647361', 'https://worldbuilding.stackexchange.com/questions/128138/how-to-explain-the-presence-of-a-dense-debris-

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape https://forums.oracle.com/ords/apexds/post/smart-view-error-when-adding-supporting-details-in-planning-1125: HTTPSConnectionPool(host='forums.oracle.com', port=443): Read timed out. (read timeout=10)
Progress: 87/96 - keyword: supporting details, tfidf: 0.06092393086413102
All links:  ['https://www.merriam-webster.com/dictionary/description', 'https://support.google.com/merchants/answer/6324468?hl=en', 'https://schema.org/description', 'https://www.dictionary.com/browse/description', 'https://en.wikipedia.org/wiki/Description', 'https://www.fcc.gov/audio-description', 'https://www.dublincore.org/specifications/dublin-core/dcmi-terms/elements11/description/', 'https://developers.google.com/search/docs/appearance/snippet', 'https://catds.uflib.ufl.edu/', 'https://www.piercecountywa.gov/969/Parcel-Property-Information', 'https://dictionary.cambridge.org/dictionary/english/description', 'https://www.nws.noaa.gov/mdl/gfslamp/docs/LAMP_description.shtml', 'https://www.dla.mi

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 89/96 - keyword: procedure, tfidf: 0.05450451302692536
All links:  ['https://www.reddit.com/r/askscience/comments/uhlfvw/what_would_be_observed_by_two_objects_moving_at/', 'https://worldbuilding.stackexchange.com/questions/216479/could-an-object-traveling-at-a-significant-fraction-of-light-speed-create-a-blac', 'https://www.reddit.com/r/askscience/comments/l4r4c/why_would_an_object_traveling_at_the_speed_of/', 'https://www.chegg.com/homework-help/questions-and-answers/620g-object-traveling-21m-s-collides-head-320g-object-traveling-opposite-direction-38m-s-c-q5217406', 'https://www.wyzant.com/resources/answers/825418/maximum-mass-of-an-object', 'https://www.chegg.com/homework-help/questions-and-answers/200-kg-mass-object-traveling-east-200-m-s-collides-300-kg-mass-object-traveling-west-100-m-q3157066', 'https://forum.arduino.cc/t/detect-a-small-object-traveling-in-high-speed-using-a-photomicrosensor/895376', 'https://www.chegg.com/homework-help/questions-and-answers/23-kg-obje

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 96/96 - keyword: space agencies, tfidf: 0.10036974906122739
All links:  ['https://www.brewdog.com/usa/doghouse-columbus-hotel', 'http://www.doghouseorlando.com/', 'https://www.doghouserecords.com/', 'https://nuhuskies.com/sports/2012/8/15/GEN_0815120313.aspx', 'https://www.firestonegrill.com/locations/dog-house-grill-fresno/', 'https://www.facebook.com/Lifeinthedoghousewithmichele/', 'https://www.imdb.com/title/tt1023500/', 'https://www.doghousedennis.com/', 'http://thedoghousediaries.com/', 'https://doghousememphis.com/', 'https://www.marincounty.org/depts/cu/history/flw-doghouse', 'https://www.amazon.com/dog-dog-houses/b?ie=UTF8&node=2975402011', 'https://en.wikipedia.org/wiki/Doghouse', 'https://www.talkbass.com/threads/history-of-the-doghouse-name.543293/', 'https://www.merriam-webster.com/dictionary/doghouse', 'https://www.northernspeech.com/apraxia-cas/kaufman-apraxia-whats-in-your-doghouse/', 'https://doghouserescueacademy.org/', 'https://twitter.com/happy_doghouse?lan

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 19/90 - keyword: affect, tfidf: 0.061083314889787965
All links:  ['https://www.niaaa.nih.gov/alcohols-effects-health/alcohols-effects-body', 'https://www.merriam-webster.com/dictionary/affect', 'https://www.cff.org/intro-cf/about-cystic-fibrosis', 'https://www.nichd.nih.gov/about/org/od/directors_corner/prev_updates/digital-media-child-development-feb2023', 'https://epaa.asu.edu/index.php/epaa/article/view/3699', 'https://www.who.int/news-room/fact-sheets/detail/climate-change-and-health', 'https://www.touro.edu/departments/writing-center/tutorials/affect-or-effect/', 'https://www.nih.gov/news-events/nih-research-matters/understanding-how-exercise-affects-body', 'https://it.usembassy.gov/how-climate-change-affects-the-food-crisis/', 'https://www.nih.gov/news-events/news-releases/endurance-exercise-affects-all-tissues-body-even-those-not-normally-associated-movement', 'https://www.cdc.gov/mentalhealth/learn/index.htm', 'https://www.epa.gov/mobile-source-pollution/learn-about-h

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 20/90 - keyword: affects, tfidf: 0.015364669933989808
All links:  ['https://en.wikipedia.org/wiki/Summer', 'https://www.instagram.com/summerwalker/?hl=en', 'https://www.meetsummer.com/', 'https://www.fns.usda.gov/summer/sunbucks', 'https://www.youtube.com/channel/UCiM8OI8ySYAoealLy-YRCbg', 'https://www.fns.usda.gov/summer/sunmeals', 'http://www.summerrayne.net/', 'https://otda.ny.gov/programs/summer-ebt/Receiving-Summer-EBT.asp', 'https://www.newyorker.com/books/page-turner/summer-reading-2024', 'https://www.merriam-webster.com/dictionary/summer', 'https://www.schools.nyc.gov/enrollment/summer/summer-rising', 'https://www.mass.gov/massachusetts-summer-ebt-program', 'https://summerfridays.com/', 'https://nyc.gov/summerstreets', 'https://www.nba.com/summer-league/2024', 'https://www.regmovies.com/promotions/summer-movie-express', 'https://www.capcom-games.com/showcase/next/en-us/', 'https://www.oregon.gov/odhs/food/pages/sebt.aspx', 'https://www.bloomingtonmn.gov/pr/summer-fete

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 35/90 - keyword: reflects, tfidf: 0.04204296480443529
All links:  ['https://www.kickstarter.com/projects/ryangrepper/coolest-cooler-21st-century-cooler-thats-actually', 'https://www.amazon.com/stores/Coolest/Homepage/page/E2CAB10A-3FCC-47DB-893B-C5370074CE8D', 'https://freakonomics.com/podcast/how-to-make-the-coolest-show-on-broadway/', 'https://www.climbing.com/places/worlds-13-coolest-climbing-gyms/', 'https://www.reddit.com/r/NoMansSkyTheGame/comments/175lyg5/whats_the_coolest_thing_youve_done_or_come_across/', 'https://coolestprojects.org/', 'https://www.reddit.com/r/leagueoflegends/comments/6doiqt/who_has_the_coolest_ultimate/', 'https://community.hubspot.com/t5/128172-RevOps-Discussions/What-s-the-coolest-thing-you-ve-built-on-with-HubSpot/td-p/411830', 'https://www.youtube.com/@CoolestThing', 'https://www.instagram.com/coolest.vibe/?hl=en', 'https://www.thesaurus.com/browse/coolest', 'https://www.instagram.com/a.ttis/p/C1cjrNcLyhT/', 'https://thehardtimes.net/music/hel

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 39/90 - keyword: testen, tfidf: 0.05086959911942531
All links:  ['https://www.amazon.com/air-cooler/s?k=air+cooler', 'https://www.icybreeze.com/', 'https://www.reddit.com/r/AmazonVine/comments/1ay5aeu/air_coolers_vs_air_conditioners_know_this_before/', 'https://www.cpaptalk.com/viewtopic/t164622/Need-cooler-air.html', 'https://www.homedepot.com/b/Heating-Venting-Cooling-Evaporative-Coolers/N-5yc1vZc4lr', 'https://grassrootsmotorsports.com/forum/grm/air-vs-liquid-oil-cooler/57008/page1/', 'https://vansairforce.net/threads/trapped-air-in-oil-cooler.224543/', 'https://mechanics.stackexchange.com/questions/50483/does-cooler-air-improve-turbo-performance', 'https://rpmsxs.com/products/rpm-cooler-air-vent-kit-universal-utv-cab-cooling-kit', 'https://schroff.nvent.com/products/enc29714-016', 'https://www.klamathcounty.org/1286/Cleaner-cooler-air-spaces', 'https://schroff.nvent.com/products/enc29714-017', 'https://www.intel.com/content/www/us/en/gaming/resources/cpu-cooler-liquid-coo

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 41/90 - keyword: absorb more energy, tfidf: 0.041834822800820876
All links:  ['https://sunlightmtn.com/', 'https://sunlightfoundation.com/', 'https://www.the-sunlight-group.com/', 'https://sunlightfinancial.com/', 'https://zh.wikipedia.org/zh-cn/en:sunlight', 'https://sunlight.io/', 'http://sunlightcafevegetarian.com/', 'https://www.odenseye.se/products/solmane-ii-sunlight-love-blusher', 'https://www.facebook.com/outrunthesunlight/', 'https://www.healthline.com/health/depression/benefits-sunlight', 'https://sunlightsolar.com/', 'https://www.sunlightsports.com/', 'https://www.sunlightfcu.com/', 'https://www.britannica.com/science/sunlight-solar-radiation', 'https://www.cancer.gov/about-cancer/causes-prevention/risk/sunlight', 'https://www.webmd.com/a-to-z-guides/ss/slideshow-sunlight-health-effects', 'https://community.arlo.com/t5/Arlo-Pro-3/Does-solar-panel-need-direct-sunlight-to-charge/m-p/1814786', 'https://theyoungrens.com/photographers/shooting-workflow/how-to-create-bea

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 59/90 - keyword: absorb heat, tfidf: 0.0711148683043349
All links:  ['https://www.sciencelearn.org.nz/resources/48-reflection-of-light', 'https://www.olympus-lifescience.com/en/microscope-resource/primer/lightandcolor/reflectionintro/', 'https://www.reddit.com/r/askscience/comments/qtqut/why_do_animals_eyes_reflect_light_when_humans_dont/', 'https://blenderartists.org/t/how-do-you-make-an-emission-surface-also-reflect-light/1263292', 'https://www.reddit.com/r/PokemonVGC/comments/13ryz57/reflect_light_screen_duration_increase/', 'https://blender.stackexchange.com/questions/153131/blender-2-8-how-do-i-make-a-texture-reflect-light', 'https://forum.powerscore.com/viewtopic.php?t=1804', 'https://mercymultiplied.com/reflect-the-light-of-christ/', 'https://forum.keyshot.com/index.php?topic=28418.0', 'https://worldbuilding.stackexchange.com/questions/209175/would-a-moon-with-plantlife-still-give-off-reflect-light', 'https://byjus.com/physics/reflection-of-light/', 'https://austinkleo

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Progress: 77/90 - keyword: dark color absorb, tfidf: 0.03263006543331085
All links:  ['https://www.reddit.com/r/explainlikeimfive/comments/d9h4uv/elif_why_do_dark_colors_attract_more_heat/', 'https://www.terminix.com/mosquitoes/what-attracts-mosquitoes/are-mosquitoes-attracted-to-color/', 'https://www.reddit.com/r/INTP/comments/l2x1tx/are_intps_naturally_attracted_to_dark_colors_and/', 'https://www.jcehrlich.com/help-and-advice/blog/mosquitoes/mosquitoes-and-the-color-red', 'https://mosquitojoe.com/blog/what-colors-attract-mosquitoes/', 'https://mosquitonix.com/blogs/news/are-mosquitos-attracted-to-black', 'https://healthcare.utah.edu/huntsmancancerinstitute/news/2023/08/what-clothing-best-sun-protection', 'https://www.quora.com/Do-darker-colored-cats-attract-more-fleas-than-lighter-colored-cats', 'https://www.cnn.com/2016/04/25/health/bedbugs-dark-color-sheets/index.html', 'https://www.quora.com/Is-it-true-that-blonde-guys-are-attracted-to-dark-haired-girls-and-vice-versa', 'https://w

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Failed to scrape http://www.allpaintingandrestoration.com/are-your-exterior-house-painting-colors-attracting-pests-in-fountain-hills-az-dark-paint-colors-attract-mosquitoes-yellow-orange-bugs-etc/: HTTPConnectionPool(host='www.allpaintingandrestoration.com', port=80): Read timed out. (read timeout=10)
Progress: 78/90 - keyword: dark color attract, tfidf: 0.02418397994856201
All links:  ['https://www.reddit.com/r/k9sports/comments/uzwhdm/how_to_keep_dog_cool_when_leaving_him_in_the_car/', 'https://www.quora.com/What-are-good-ways-to-keep-my-dog-cool-at-home-in-summer-while-Im-away-at-work', 'https://www.reddit.com/r/puppy101/comments/1538nvl/how_do_you_keep_your_dog_cool/', 'https://outdoors.stackexchange.com/questions/9493/how-do-you-keep-your-dog-cool-while-hiking-on-hot-sunny-days', 'https://www.quora.com/The-air-conditioning-broke-and-my-dog-is-hot-How-do-I-cool-him-down-until-its-fixed', 'https://theocbarkyard.wordpress.com/2015/04/12/how-to-keep-your-dog-cool-and-comfortable-in-th

In [284]:
import json

# Define the file paths
weighted_keywords_file = "weighted_keywords.json"
all_articles_file = "all_articles.json"

# Dump weighted_keywords into a json file
with open(weighted_keywords_file, "w") as f:
    json.dump(weighted_keywords, f)

# Dump all_articles into a json file
with open(all_articles_file, "w") as f:
    json.dump(all_articles, f)


In [285]:
from utils.feature_extractor import FeatureExtractor

feature_extractor = FeatureExtractor(prompts, only_prompts, model, doc2vec_model, ngram_results, weighted_keywords)

train model

In [471]:
from numpy import hstack

# run this for everythting ... so i can just do it once, and then save it as a csv to re-load ... 
def add_features_to_df(df, feature_extractor, add_word2vec=False, add_doc2vec=False, add_pos=False, add_prompt_overlap=False, add_weighted_keywords=False, add_lexical_overlap=False, add_stylistic_features=False, add_logical_operators=False, add_temporal_features=False):
    if add_word2vec:
        df['word2vec_features'] = None
    if add_doc2vec:
        df['doc2vec_features'] = None
    if add_pos:
        df['pos_features'] = None
    if add_prompt_overlap:
        df['prompt_overlap_features'] = None
    if add_weighted_keywords:
        df['weighted_keywords_features'] = None
    if add_lexical_overlap:
        df['lexical_overlap_features'] = None
    if add_stylistic_features:
        df['stylistic_features'] = None
    if add_logical_operators:
        df['logical_operators_features'] = None
    if add_temporal_features:
        df['temporal_features'] = None

    total_items = len(df)
    print(f"Processing {total_items} items...")

    # Iterate over each row in the training DataFrame
    num = 0
    for index, row in df.iterrows():
        # Extract features using the feature_extractor object
        if add_word2vec:
            word2vec_features = feature_extractor.word2vec(row['EssayText'])
            df.at[index, 'word2vec_features'] = word2vec_features
        if add_doc2vec:
            doc2vec_features = feature_extractor.doc2vec(row['EssayText'])
            df.at[index, 'doc2vec_features'] = doc2vec_features
        if add_pos:
            pos_features = feature_extractor.pos(row['EssaySet'], row['EssayText'])
            df.at[index, 'pos_features'] = pos_features
        if add_prompt_overlap:
            prompt_overlap_features = feature_extractor.prompt_overlap(row['EssaySet'], row['EssayText'])
            df.at[index, 'prompt_overlap_features'] = prompt_overlap_features
        if add_weighted_keywords:
            weighted_keywords_features = feature_extractor.weighted_domain_specific_keywords(row['EssaySet'], row['EssayText'])
            df.at[index, 'weighted_keywords_features'] = weighted_keywords_features
        if add_lexical_overlap:
            lexical_overlap_features = feature_extractor.lexical_overlap(row['EssaySet'], row['EssayText'])
            df.at[index, 'lexical_overlap_features'] = lexical_overlap_features
        if add_stylistic_features:
            stylistic_features = feature_extractor.stylistic_features(row['EssayText'])
            df.at[index, 'stylistic_features'] = stylistic_features
        if add_logical_operators:
            logical_operators_features = feature_extractor.logical_operators(row['EssayText'])
            df.at[index, 'logical_operators_features'] = logical_operators_features
        if add_temporal_features:
            temporal_features = feature_extractor.temporal_features(row['EssayText'])
            df.at[index, 'temporal_features'] = temporal_features

        # Update after every 1000 items
        if (num) % 1000 == 0:
            print(f"Processed {num} items.")
        num += 1

    return df

In [24]:
highest_score = {1: 3, 2: 3, 3: 2, 4: 2, 5: 3, 6: 3, 7: 2, 8: 2, 9: 2, 10: 2}
for essay_set in range(1, 11):
    essay_set_df = df[df['EssaySet'] == essay_set].copy(deep=True)
    augment_dataset(essay_set_df, essay_set)

In [23]:
import random
def augment_dataset(essay_set_df, essay_set):
    # dataset augmentation
    additional_training_data = []
    for es in list(range(1, 11)) + ([essay_set]*10):
        es_df = df[df['EssaySet'] == es]
        highest_score_value = highest_score[es]
        highest_score_responses = es_df[(es_df['Score1'] == highest_score_value) & (es_df['Score2'] == highest_score_value) & (es_df['split'] == "train")]

        if len(highest_score_responses) > 0:
            sampled_response = highest_score_responses.sample(n=1, random_state=42)
            if es == essay_set:
                essay_text = sampled_response['EssayText'].values[0]
                jumbled_text = ' '.join(random.sample(essay_text.split(), len(essay_text.split())))
                sampled_response['EssayText'] = jumbled_text
            sampled_response['EssaySet'] = essay_set
            sampled_response['CorrectedSpellingEssayText'] = sampled_response['EssayText']
            sampled_response['Score1'] = 0
            sampled_response['Score2'] = 0
            sampled_response['split_custom'] = sampled_response['split']
            additional_training_data.append(sampled_response)

    pd.concat(additional_training_data, ignore_index=True).to_csv(f'augmented_training_data/augmented_training_data_{essay_set}.csv', index=False)

    return pd.concat([essay_set_df] + additional_training_data, ignore_index=True)


In [503]:
# Save the updated DataFrame to a CSV file
import os
from datetime import datetime

augment_dataset(essay_set_df, essay_set)

# Execute the function to add features to the DataFrame and display the updated DataFrame
add_features_to_df(essay_set_df, feature_extractor, add_word2vec=True, add_doc2vec=True, add_pos=True, add_prompt_overlap=True, add_weighted_keywords=True, add_lexical_overlap=True, add_stylistic_features=True, add_logical_operators=True, add_temporal_features=True)

# Create the folder if it doesn't exist
if not os.path.exists('updated_features'):
    os.makedirs('updated_features')

# Get the current timestamp
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

# Save the updated DataFrame to a CSV file with the timestamp
essay_set_df.to_csv(f'updated_features/updated_features_{timestamp}.csv', index=False)


Processing 3452 items...
Processed 0 items.
Processed 1000 items.
Processed 2000 items.
Processed 3000 items.


In [510]:
# Save the updated DataFrame to a CSV file
import os
from datetime import datetime

# Initialize a dictionary to store DataFrames for each essay set
essay_set_dfs = {}

# Loop through essay sets from 1 to 10
for essay_set in range(1, 11):
    essay_set_df = df[df['EssaySet'] == essay_set].copy(deep=True)

    # Dataset Augmentation
    augment_dataset(essay_set_df, essay_set)

    # Execute the function to add features to the DataFrame
    add_features_to_df(essay_set_df, feature_extractor, add_word2vec=True, add_doc2vec=True, add_pos=True, add_prompt_overlap=True, add_weighted_keywords=True, add_lexical_overlap=True, add_stylistic_features=True, add_logical_operators=True, add_temporal_features=True)
    
    # Store the updated DataFrame in the dictionary
    essay_set_dfs[essay_set] = essay_set_df
    
    # Create the folder if it doesn't exist
    if not os.path.exists('updated_features'):
        os.makedirs('updated_features')
    
    # Get the current timestamp
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    
    # Save the updated DataFrame to a CSV file with the timestamp
    essay_set_df.to_csv(f'updated_features/updated_features_{essay_set}_{timestamp}.csv', index=False)

Processing 2787 items...
Processed 0 items.
Processed 1000 items.
Processed 2000 items.
Processing 2130 items...
Processed 0 items.
Processed 1000 items.
Processed 2000 items.
Processing 3152 items...
Processed 0 items.
Processed 1000 items.
Processed 2000 items.
Processed 3000 items.
Processing 2897 items...
Processed 0 items.
Processed 1000 items.
Processed 2000 items.
Processing 2992 items...
Processed 0 items.
Processed 1000 items.
Processed 2000 items.
Processing 2995 items...
Processed 0 items.
Processed 1000 items.
Processed 2000 items.
Processing 2999 items...
Processed 0 items.
Processed 1000 items.
Processed 2000 items.
Processing 2999 items...
Processed 0 items.
Processed 1000 items.
Processed 2000 items.
Processing 2997 items...
Processed 0 items.
Processed 1000 items.
Processed 2000 items.
Processing 2734 items...
Processed 0 items.
Processed 1000 items.
Processed 2000 items.


In [511]:
import pickle

# Save the dictionary of DataFrames to a pickle file
with open(f'essay_set_dfs/essay_set_dfs_{timestamp}.pkl', 'wb') as f:
    pickle.dump(essay_set_dfs, f)

In [512]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import cohen_kappa_score, accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

# features = ['pos_features', 'prompt_overlap_features', 'weighted_keywords_features', 'lexical_overlap_features', 'stylistic_features', 'logical_operators_features', 'temporal_features']
def test_model(essay_set, essay_set_dfs, features, verbose=False):
    essay_set_df = essay_set_dfs[essay_set]

    # prepare data set 
    scaler = MinMaxScaler(feature_range=(0, 1))

    # generate training data 
    training_data = essay_set_df[(essay_set_df['DataSet'] == 'Train')].copy(deep=True)
    training_data = training_data.dropna(subset=['word2vec_features', 'doc2vec_features'])
    X_train = np.vstack(training_data[features].apply(lambda x: np.hstack(x), axis=1))
    X_train = scaler.fit_transform(X_train)
    y_train = training_data['Score1']

    num_features = X_train.shape[1]
    print(f"Number of features in X_train: {num_features}")

    # Define hyperparameter ranges
    max_depths = [50, 75, 100, 150, 200]
    n_estimators_list = [50, 75, 100, 150, 200]

    # Initialize variables to store the best model and its score
    best_kappa_score = -1
    best_params = None
    best_clf = None

    # Initialize a list to store results
    results = []

    # Iterate over all combinations of hyperparameters
    for max_depth in max_depths:
        for n_estimators in n_estimators_list:
            # Train model
            clf = RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators)
            clf.fit(X_train, y_train)
            
            # Validate model
            val_set = essay_set_df[(essay_set_df['DataSet'] == 'Priva')].copy(deep=True)
            val_set = val_set.dropna(subset=['word2vec_features', 'doc2vec_features'])
            
            X_val = np.vstack(val_set[features].apply(lambda x: np.hstack(x), axis=1).values)
            X_val = scaler.transform(X_val)
            y_val = val_set['Score1']
            
            y_pred = clf.predict(X_val)
            
            # Calculate evaluation metrics
            conf_matrix = confusion_matrix(y_val, y_pred)
            kappa_score = cohen_kappa_score(y_val, y_pred, weights='quadratic', sample_weight=None)
            accuracy = accuracy_score(y_val, y_pred)
            
            # Print the results for the current hyperparameters
            if verbose:
              print(f"Max Depth: {max_depth}, N Estimators: {n_estimators}")
              print(f"Cohen Kappa Score (Weighted): {kappa_score}")
              print(f"Accuracy: {accuracy}")
            
            # Append the results to the list
            results.append({
                'max_depth': max_depth,
                'n_estimators': n_estimators,
                'qwk': kappa_score,
                'accuracy': accuracy
            })
            
            # Update the best model if the current one is better
            if kappa_score > best_kappa_score:
                best_kappa_score = kappa_score
                best_params = (max_depth, n_estimators)
                best_clf = clf

    # Print the best hyperparameters and their score
    print(f"Essay Set: {essay_set}")
    print(f"Best Hyperparameters: Max Depth: {best_params[0]}, N Estimators: {best_params[1]}")
    print(f"Best Cohen Kappa Score (Weighted): {best_kappa_score}")

    # Convert results to DataFrame
    results_df = pd.DataFrame(results)

    # Get the current timestamp
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

    # Save the results to a CSV file
    results_df.to_csv(f'{essay_set}_results_{timestamp}_{"_".join(features)}.csv', index=False)

    return best_clf


In [513]:
features = ['pos_features', 'prompt_overlap_features', 'weighted_keywords_features', 'lexical_overlap_features', 'stylistic_features', 'logical_operators_features', 'temporal_features']

best_models = {}
for essay_set in range(1, 11):
    best_models[essay_set] = test_model(essay_set, essay_set_dfs, features)

Number of features in X_train: 593
Essay Set: 1
Best Hyperparameters: Max Depth: 75, N Estimators: 150
Best Cohen Kappa Score (Weighted): 0.702013385848459
Number of features in X_train: 542
Essay Set: 2
Best Hyperparameters: Max Depth: 100, N Estimators: 200
Best Cohen Kappa Score (Weighted): 0.6251395070526697
Number of features in X_train: 540
Essay Set: 3
Best Hyperparameters: Max Depth: 200, N Estimators: 50
Best Cohen Kappa Score (Weighted): 0.2622528311870077
Number of features in X_train: 210
Essay Set: 4
Best Hyperparameters: Max Depth: 100, N Estimators: 150
Best Cohen Kappa Score (Weighted): 0.6464078034139936
Number of features in X_train: 119
Essay Set: 5
Best Hyperparameters: Max Depth: 50, N Estimators: 75
Best Cohen Kappa Score (Weighted): 0.7046103056902663
Number of features in X_train: 147
Essay Set: 6
Best Hyperparameters: Max Depth: 100, N Estimators: 75
Best Cohen Kappa Score (Weighted): 0.6029569403151103
Number of features in X_train: 479
Essay Set: 7
Best Hyper

In [514]:
features = ['word2vec_features', 'doc2vec_features', 'pos_features', 'prompt_overlap_features', 'weighted_keywords_features', 'lexical_overlap_features', 'stylistic_features', 'logical_operators_features', 'temporal_features']

best_models = {}
for essay_set in range(1, 11):
    best_models[essay_set] = test_model(essay_set, essay_set_dfs, features)

Number of features in X_train: 1193
Essay Set: 1
Best Hyperparameters: Max Depth: 200, N Estimators: 150
Best Cohen Kappa Score (Weighted): 0.7161071764278867
Number of features in X_train: 1142
Essay Set: 2
Best Hyperparameters: Max Depth: 75, N Estimators: 150
Best Cohen Kappa Score (Weighted): 0.5529111649007871
Number of features in X_train: 1140
Essay Set: 3
Best Hyperparameters: Max Depth: 200, N Estimators: 150
Best Cohen Kappa Score (Weighted): 0.2281011000019264
Number of features in X_train: 810
Essay Set: 4
Best Hyperparameters: Max Depth: 100, N Estimators: 75
Best Cohen Kappa Score (Weighted): 0.6232305604033352
Number of features in X_train: 719
Essay Set: 5
Best Hyperparameters: Max Depth: 50, N Estimators: 75
Best Cohen Kappa Score (Weighted): 0.6386815044372447
Number of features in X_train: 747
Essay Set: 6
Best Hyperparameters: Max Depth: 50, N Estimators: 75
Best Cohen Kappa Score (Weighted): 0.5983764319855682
Number of features in X_train: 1079
Essay Set: 7
Best H