In [1]:
import requests
from bs4 import BeautifulSoup

# URL of the blog article you want to scrape
url = 'https://medium.com/@raghavrastogi75/cons-overwhelming-the-pros-to-make-a-decision-consider-the-cost-of-inaction-28b14aad3120'

# Specify the path to the directory where you want to save the file
# Make sure this directory exists on your system
directory_path = './text_extract/'

# Specify the filename
filename = 'article_content.txt'

# Combine the directory path and filename
full_path = directory_path + filename

# Send a GET request to the article's URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content of the page
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all paragraph tags. You might need to adjust this selector
    # depending on the structure of the webpage.
    paragraphs = soup.find_all('p')
    
    # Extract the text from each paragraph and join them into one string
    article_content = ' '.join(paragraph.text for paragraph in paragraphs)
    
    # Split the content into words and limit to the first 1024 words
    words = article_content.split()[:850]
    
    # Join the limited words back into a string
    limited_content = ' '.join(words)
    
    # Open the file at the specified path in write mode ('w') and write the content
    with open(full_path, 'w', encoding='utf-8') as file:
        file.write(limited_content)
        
    print(f'Article content with up to 1024 words saved to {full_path}')
else:
    print(f'Failed to retrieve the webpage. Status code: {response.status_code}')


Article content with up to 1024 words saved to ./text_extract/article_content.txt


In [5]:
from transformers import pipeline

# Initialize the summarization pipeline
summarizer = pipeline("summarization")

# Function to read text from a file
def read_text_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

# Function to write summary to a file
def write_summary_to_file(summary_text, file_path="summary.txt"):
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(summary_text)

# Path to your text file
file_path = './text_extract/article_content.txt'

# Read the text from the file
text = read_text_from_file(file_path)

# Generate summary. You may need to adjust the max_length and min_length according to your needs
summary = summarizer(text, max_length=300, min_length=200, do_sample=False)

# Print the summary
print(summary[0]['summary_text'])

# Optional: Write the summary to a file
write_summary_to_file(summary[0]['summary_text'], "./summary/summary_output.txt")


No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


 There is a segment of people who is a firm believer in intuition . But need we to understand when is it appropriate to decide rationally and when on intuition completely? Intuition works best when you have already made a decision on a similar thing earlier and are faced with another decision . When we are trying to decide based on consequences of our action, things like how would it affect my life if I go abroad, will my life improve eventually if I leave the current job, will it backfire, will I regret this decision, etc., we are only focused on one aspect and missing out another huge factor from this equation . We must rely on our rational capabilities to make a rational decision in these situations . The cost of inaction in many situations would be even more enticing despite the risks of taking a big risk . You are not asking everyone to take a ‘calculated one decision, but a 'Calculated one’ You are the best judge for it for it’s the best


In [8]:
from transformers import pipeline

def generate_image_prompt(text_summary, model_name='EleutherAI/gpt-neo-2.7B', max_length=100, max_new_tokens=None):
    """
    Generates an image prompt from a text summary using a specified transformer model.

    Parameters:
    - text_summary (str): The text summary to generate an image prompt from.
    - model_name (str): Identifier for the pre-trained model on Hugging Face.
    - max_length (int): Maximum total length of the input and output (deprecated if max_new_tokens is set).
    - max_new_tokens (int): Maximum length of the generated tokens.

    Returns:
    - str: The generated image prompt.
    """
    # Initialize the text generation pipeline with the specified model
    generator = pipeline('text-generation', model=model_name, tokenizer=model_name)

    # Generate text based on the summary
    generation_kwargs = {
        "max_length": max_length,
        "num_return_sequences": 1
    }
    if max_new_tokens is not None:
        # If max_new_tokens is specified, use it and remove max_length from kwargs
        generation_kwargs["max_new_tokens"] = max_new_tokens
        del generation_kwargs["max_length"]

    prompt = generator(f"Create a vivid image description based on the following summary: {text_summary}",
                       **generation_kwargs)

    return prompt[0]['generated_text']


# Path to your text file
file_path = './summary/summary_output.txt'

# Read the text from the file
text = read_text_from_file(file_path)

# Generate the image prompt
prompt = generate_image_prompt(text, max_new_tokens=50)  # Example: limiting generation to 50 new tokens
print("Generated Image Prompt:", prompt)

: 