In [None]:
import os
import requests
import re
from bs4 import BeautifulSoup 

### Function to Get HTML of the Medium Article
The `get_page` function prompts the user to enter a Medium article URL. It validates the URL and fetches the HTML content of the article using the `requests` library.


In [None]:
def get_page():
    global url

    # Code here - Ask the user to input "Enter url of a medium article: " and collect it in url
    url = input("Enter url of a medium article: ")
    # Code ends here

    # handling possible error
    if not re.match(r'https?://medium.com/', url):
        print('Please enter a valid website, or make sure it is a medium article')
        sys.exit(1)

    # Code here - Call get method in requests object, pass url and collect it in res
    res = requests.get(url)
    # Code ends here

    res.raise_for_status()  # raises an error for invalid HTTP status
    soup = BeautifulSoup(res.text, 'html.parser')
    return soup

### Function to Clean the Extracted HTML Text
The `clean` function removes all HTML tags from the fetched content and replaces certain tags (like `<br>` and `<li>`) with specific characters.


In [None]:
def clean(text):
    rep = {"<br>": "\n", "<br/>": "\n", "<li>": "\n"}
    rep = dict((re.escape(k), v) for k, v in rep.items())
    pattern = re.compile("|".join(rep.keys()))
    text = pattern.sub(lambda m: rep[re.escape(m.group(0))], text)
    text = re.sub('\<(.*?)\>', '', text)
    return text

### Function to Collect Text from the Article
The `collect_text` function extracts the paragraphs (`<p>` tags) from the Medium article and appends them to a text variable.


In [None]:
def collect_text(soup):
    text = f'url: {url}\n\n'
    para_text = soup.find_all('p')
    print(f"paragraphs text = \n {para_text}")
    for para in para_text:
        text += f"{para.text}\n\n"
    return text

### Function to Save the Extracted Text to a File
The `save_file` function saves the scraped content into a text file in a directory named `scraped_articles`.


In [None]:
def save_file(text):
    if not os.path.exists('./scraped_articles'):
        os.mkdir('./scraped_articles')
    name = url.split("/")[-1]
    print(name)
    fname = f'scraped_articles/{name}.txt'

    # Code here - write a file using with (2 lines)
    with open(fname, 'w', encoding='utf-8') as file:
        file.write(text)
    # Code ends here

    print(f'File saved in directory {fname}')


### Instructions to Run the Script
To run the script, execute the code in the following cell, and enter a valid Medium article URL when prompted.
- Example URL: `https://medium.com/@subashgandyer/papa-what-is-a-neural-network-c5e5cc427c7`


In [None]:
if __name__ == '__main__':
    text = collect_text(get_page())
    save_file(text)

### Conclusion
This notebook demonstrates how to scrape a Medium article and save the extracted text to a file. The extracted content is stored in a directory called `scraped_articles`.
