### Create a medical terms dictionary from https://www.medicinenet.com/

In [None]:
%pip install selenium
%pip install tqdm

#### Import necessary libraries

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import re
from tqdm import tqdm
from selenium.common.exceptions import TimeoutException

from textblob import TextBlob

width = 1440
height = 990

#### Open Chrome Web browser

In [2]:
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)

#### Web scrap medical terms to make glossary

In [3]:

base_url = 'https://www.medicinenet.com/script/main/alphaidx.asp?p='
alphabets = 'abcdefghijklmnopqrstuvwxyz'
med_words = {}

for letter in tqdm(alphabets):
    curr_url=base_url+letter+"_dict"
    med_words[letter] = []
    
    driver.set_page_load_timeout(10)
    try:
        driver.get(curr_url)
    except TimeoutException:
        driver.execute_script("window.stop();")
        
    elements = driver.find_elements(By.XPATH,'//div[@class="AZ_results"]/ul/li/a')
    for element in elements:
        med_words[letter].append(element.text)

driver.quit()


  0%|          | 0/26 [00:00<?, ?it/s]

100%|██████████| 26/26 [02:52<00:00,  6.62s/it]


In [15]:
sum(len(med_words[letter]) for letter in med_words)

4205

In [16]:
med_words

{'a': ['A (adenine)',
  'A-',
  'A-T',
  'a.c.',
  'AAA',
  'AAAS',
  'AAD',
  'AAO',
  'AAP',
  'Aarskog syndrome',
  'Aarskog-Scott syndrome',
  'Aase-Smith syndrome I',
  'Aase-Smith syndrome II',
  'Ab-',
  'Abate',
  'Abatement',
  'Abdomen',
  'Abdomen, acute',
  'Abdominal',
  'Abdominal aorta',
  'Abdominal aortic aneurysm',
  'Abdominal cavity',
  'Abdominal guarding',
  'Abdominal hysterectomy',
  'Abdominal pain',
  'Abducent nerve',
  'Abduction',
  'Abductor muscle',
  'Aberration',
  'Abiotic',
  'Abiotrophy',
  'Ablate',
  'Ablation',
  'Abnormal',
  'Abortifacient',
  'Abortion',
  'Abortive',
  'Abortive polio',
  'ABR test',
  'Abrade',
  'Abrasion',
  'Abs',
  'Abscess',
  'Abscission',
  'Abse',
  'Absence of the breast',
  'Absence of the nipple',
  'Absence seizure',
  'Absent eye',
  'Absinthism',
  'Absolute neutrophil count',
  'Absorb',
  'Absorption',
  'Abstinence',
  'AC joint',
  'Acanthamoeba',
  'Acapnia',
  'Acaricide',
  'ACC',
  'Access',
  'Accessory

#### Copy the terms into file

In [11]:
file_path = 'med_terms.txt'
try:
    with open(file_path, 'r') as file:
        pass
except FileNotFoundError:
    with open(file_path, 'w') as file:
        for letter in med_words.values():
            for word in letter:
                file.write(word + '\n')

#### Implement spell check

Sample Usage

In [23]:
text = "Why can't yu spel corrctly?"
tb = TextBlob(text)
tb

TextBlob("Why can't yu spel corrctly?")

In [21]:
print(tb.correct())

Why can't you spell correctly?


In [26]:
txt = '''
1 yeas olf old i patent paseo ‘Vinee >

A reliable ioral; Sa at Chomapape

with the clu cowplaints Bn
ita . ‘ ~~ fog

tle ipsa ae pl se

Geen)

J2aes Sawiduews Ww onset Set | cal «
Dusation > aday of

p” OAV! Ay

Qi vamal vanation : ue |

No Aq4: oy S —_
Rokoving “ae wudieation: N 4
Jupe of five: High grade .
oneewa t
Nl ells & vig" :
IN

ee i 4
Auaation —Stne \d days |
i, ue k weak anoticed.
No expe to onlvonn Kamp

ie
No a Hficourt itand sid

Nofomieel Wee

————————————

Kc
\
\
'''

In [29]:
txt = re.findall(r'[a-zA-Z0-9]+', txt)
txt

['1',
 'yeas',
 'olf',
 'old',
 'i',
 'patent',
 'paseo',
 'Vinee',
 'A',
 'reliable',
 'ioral',
 'Sa',
 'at',
 'Chomapape',
 'with',
 'the',
 'clu',
 'cowplaints',
 'Bn',
 'ita',
 'fog',
 'tle',
 'ipsa',
 'ae',
 'pl',
 'se',
 'Geen',
 'J2aes',
 'Sawiduews',
 'Ww',
 'onset',
 'Set',
 'cal',
 'Dusation',
 'aday',
 'of',
 'p',
 'OAV',
 'Ay',
 'Qi',
 'vamal',
 'vanation',
 'ue',
 'No',
 'Aq4',
 'oy',
 'S',
 'Rokoving',
 'ae',
 'wudieation',
 'N',
 '4',
 'Jupe',
 'of',
 'five',
 'High',
 'grade',
 'oneewa',
 't',
 'Nl',
 'ells',
 'vig',
 'IN',
 'ee',
 'i',
 '4',
 'Auaation',
 'Stne',
 'd',
 'days',
 'i',
 'ue',
 'k',
 'weak',
 'anoticed',
 'No',
 'expe',
 'to',
 'onlvonn',
 'Kamp',
 'ie',
 'No',
 'a',
 'Hficourt',
 'itand',
 'sid',
 'Nofomieel',
 'Wee',
 'Kc']