In [1]:
# Awesome Text Only News
# https://github.com/localjo/awesome-text-only-news?tab=readme-ov-file

import requests
from bs4 import BeautifulSoup
import pandas as pd
# import sleep
import time

pd.set_option('display.max_colwidth', None)  # This ensures full text is shown

In [2]:
# Step 1: Fetch the webpage
base_url = "https://lite.cnn.com/"
response = requests.get(base_url)

print(response.status_code)

200


In [3]:
# Step 2: Parse the HTML
soup = BeautifulSoup(response.text, 'html.parser')

In [4]:
def get_article_content(link):
    try:
        article_response = requests.get(link)
        article_soup = BeautifulSoup(article_response.text, 'html.parser')
        
        # This assumes the main article content is in <p> tags, adjust if necessary
        paragraphs = article_soup.find_all('p')
        content = ' '.join([p.get_text() for p in paragraphs])
        return content.strip()
    except Exception as e:
        return f"Error fetching content: {e}"

In [5]:
# Step 3: Extract headlines and links
data = []
for headline in soup.find_all('a'):
    title = headline.get_text().strip()
    link = headline['href']
    # Construct full URL
    
    # Check if link is a relative URL, if so, prepend the base URL
    if link.startswith('/'):
        full_link = base_url + link
    elif link.startswith('http'):  # It's already a full link
        full_link = link
    else:
        # Handle unexpected cases
        full_link = base_url + '/' + link

    # Fetch the article content
    content = get_article_content(full_link)
    
    # Append title, link, and content to the data list
    data.append({"Title": title, "Link": full_link, "Content": content})
    time.sleep(0.2)  # Be nice to the server

# Convert to DataFrame
df = pd.DataFrame(data)

In [6]:
# Step 4: Print first 5 article titles and links for debugging
df.head(5)

Unnamed: 0,Title,Link,Content
0,CNN,https://lite.cnn.com//,Latest Stories \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings
1,African health officials hopeful mpox shots will start within days as WHO says outbreak is ‘not the new Covid’,https://lite.cnn.com//2024/08/20/africa/argentina-quarantines-mpox-ship-intl-latam/index.html,"By Alex Stambaugh, Larry Madowo and Kathleen Magramo, CNN\n \nUpdated: \n 8:21 PM EDT, Wed August 21, 2024\n \n Source: CNN\n \n African nations could begin vaccinations against mpox within days, according to the continent’s top public health agency, as a World Health Organization official said the spread of a deadlier strain of the virus could be controlled and “was not the new Covid.”\n \n The Democratic Republic of Congo (DRC) is at the epicenter of an mpox outbreak declared a global health emergency last week by WHO, with the deadlier clade Ib strain that is spreading quickly in the country detected in at least four other African nations.\n \n “We didn’t start vaccinations yet. We’ll start in a few days if we are sure that everything is in place. End of next week, vaccines will start to arrive in DRC and other countries,” Africa CDC Director General Jean Kaseya told a briefing on Tuesday.\n \n The viral disease, formerly known as monkeypox, can spread easily between people and from infected animals through close contact such as touching, kissing or sex, as well as through contaminated materials like sheets, clothing and needles, according to WHO. Symptoms include a fever, a painful rash, headache, muscle and back pain, low energy and enlarged lymph nodes.\n \n Around 1,400 mpox infections have been reported across Africa over the past week, bringing the total number of cases on the continent to nearly 19,000 since the start of the year – up more than 100% on the same period last year, according to the Africa CDC. The latest outbreak has killed more than 500 people, the agency’s latest available data shows.\n \n That’s prompted a scramble for vaccines as health officials in Africa work with overseas partners to meet a massive shortfall of doses.\n \n “We need to have vaccines,” Kaseya told NPR last week. “Today, we are just talking about almost 200,000 doses (becoming) available. We need at least 10 million doses. The vaccine is so expensive — we can put it around $100 per dose. There are not so many countries in Africa that can afford the cost of this vaccine.”\n \n The European Union and Danish vaccine maker Bavarian Nordic have so far pledged support, the European Commission said last week. Japan and the United States have also offered doses, Reuters reported, citing the DRC’s health minister.\n \n WHO’s declaration of a global health emergency is the second time in two years that the United Nations health agency has raised the alarm over the spread of mpox, which for decades had been found largely in central and western Africa.\n \n Mpox is characterized by two genetic clades, I and II. A clade is a broad grouping of viruses that has evolved over decades that has distinct genetic and clinical differences.\n \n Clade II was responsible for a global outbreak that was also declared to be a global health emergency from July 2022 to May 2023. But the new outbreak is driven by clade I, which causes more severe disease. The subtype that’s responsible for most of the ongoing spread, clade Ib, is relatively new.\n \n Last week, the first clade lb case outside Africa was confirmed in Sweden in a patient who had recently traveled to the continent.\n \n But with nations worldwide on high alert for the virus, a WHO official on Tuesday played down fears of a new pandemic as he called for a coordinated response to the outbreak.\n \n “Mpox is not the new Covid,” WHO Europe Director Hans Kluge told a press briefing.\n \n While more research is needed on the clade Ib strain, its spread can be controlled, he said.\n \n “We know how to control mpox. And, in the European region, the steps needed to eliminate its transmission altogether,” Kluge said.\n \n “The need for a coordinated response is now greatest in the African region,” he said. “We can, and must, tackle mpox together – across regions and continents.”\n \n Kluge’s comments came as the Philippines and Thailand reported cases of mpox in travelers who had been to Africa. Meanwhile, Argentina’s health ministry said Wednesday that tests carried out on a crew member of a cargo ship that was placed in quarantine were negative for mpox.\n \nThis story has been updated with additional information.\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings"
2,Oil giant Halliburton hit by apparent cyberattack disrupting systems,https://lite.cnn.com//2024/08/21/business/oil-giant-halliburton-apparent-cyberattack/index.html,"By Matt Egan, CNN\n \nUpdated: \n 8:12 PM EDT, Wed August 21, 2024\n \n Source: CNN\n \n Oil giant Halliburton said Wednesday it’s grappling with a computer system issue reportedly linked to a cyberattack.\n \n A person familiar with the matter told Reuters that Halliburton was hit by a cyberattack that appears to be impacting business operations at the company’s Houston campus and some global networks.\n \n Halliburton would neither confirm nor deny to CNN that it is the victim of a cyberattack, but the oilfield services firm did acknowledge an unspecified “issue.”\n \n “We are aware of an issue affecting certain company systems and are working diligently to assess the cause and potential impact,” a Halliburton spokesperson said in a statement. “We have activated our pre-planned response plan and are working internally and with leading experts to remediate the issue.”\n \n Halliburton declined to describe the nature of the incident.\n \n Representatives for the Energy Department and the American Petroleum Institute were not immediately available to comment.\n \n Cyberattacks have increasingly disrupted operations at businesses.\n \n A ransomware attack on software provider CDK Global caused trouble at thousands of auto dealers across the United States earlier this year. CDK appears to have paid a $25 million ransom to hackers to resolve the massive disruption, sources previously told CNN.\n \n During the spring of 2021, a ransomware attack paralyzed the Colonial Pipeline, causing panic buying that wiped out supplies at gas stations along the East Coast.\n \n Eric Noonan, CEO of security and IT provider CyberSheath, said the apparent cyberattack on Halliburton is another reminder of the vulnerability of companies that operate critical infrastructure like energy.\n \n “Critical infrastructure operators in the United States get to decide how well they do or do not employ cybersecurity controls,” Noonan said. “This is a situation that cannot continue in perpetuity without enormous costs to the American people.”\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings"
3,‘Seismic shift.’ Kroger’s fight against a federal agency could upend antitrust,https://lite.cnn.com//2024/08/20/business/kroger-albertsons-ftc-lawsuit/index.html,"By Matt Egan, CNN\n \nUpdated: \n 8:04 PM EDT, Wed August 21, 2024\n \n Source: CNN\n \n Supermarket chain Kroger is suing a federal agency in a daring move that aims to weaken how Washington fights mergers that threaten to jack up prices on consumers.\n \n The lawsuit alleges the Federal Trade Commission is violating the US Constitution by using an in-house tribunal to challenge Kroger’s $25 billion mega-merger with Albertsons — the biggest proposed supermarket deal in American history.\n \n The FTC typically enforces antitrust in one of two ways: It can sue companies and take them to directly to court, or it can attempt to use its own in-house administrative law judges to issue a ruling or reach a settlement outside of court. In the case of Kroger, the FTC actually pursued both options, seeking to block the merger in court and make a ruling on it with its in-house administrators.\n \n Companies can challenge the FTC’s administrative decisions in court. But Kroger claimed that tribunal process is unconstitutional.\n \n The Kroger lawsuit, filed Monday in federal court in Cincinnati, relies in part on a landmark Supreme Court ruling in June that further curbed the power of regulatory agencies. The new lawsuit represents a key test case and the latest effort to reshape the administrative state in America.\n \n Experts say a Kroger victory would usher in a seismic shift in how the federal government challenges mergers. US officials fear that weakening regulators’ ability to fight monopolies could hurt consumers by giving corporations too much power to raise prices and close stores.\n \n “It’s huge. If they are successful and appellate courts agree, one of the primary arms for antitrust oversight is literally being cut off at the elbow,” said Christine Bartholomew, a professor of law at the University of Buffalo School of Law. “It would be a radical shift in the way we do antitrust in this country.”\n \n Bartholomew said she’s “troubled” by the lawsuit because if the FTC is no longer able to use its own in-house tribunals, there would likely be less antitrust scrutiny of corporate mergers.\n \n “The risk is more deals, more concentration and potentially higher prices and less consumer choice,” she said.\n \n In February, the FTC sued to block the $25 billion deal between Kroger and Albertsons that was announced in 2022. The agency argued the merger would lift prices, shut stores and wipe out jobs.\n \n Kroger has argued the deal would do the exact opposite, helping the supermarket chains compete against non-union giants including Amazon, Costco and Walmart.\n \n In a statement to CNN, Kroger said its “business model is built on a foundation of bringing customers lower prices and more choices.”\n \n The company said it’s committed to investing $1 billion after the Albertsons merger closes to reduce prices for customers in stores across the country as well as $1.3 billion to enhance the customer experience. “Kroger has a proven track record of lowering prices in previous mergers,” the company said.\n \n The FTC is challenging the Kroger deal on two fronts: in federal court and via an administrative trial.\n \n Next week, Kroger faces the court in Portland, Oregon on the deal. The FTC has asked the judge in that case to temporarily block the Kroger-Albertsons deal while the in-house tribunal weighs the transaction.\n \n In the new lawsuit, Kroger argues that the FTC “clearly violates the Constitution” in two ways: The administrative law judge is not removable by the President of the United States and the case is being fought in the executive branch instead of the courts.\n \n Kroger cites the landmark Supreme Court decision in June where justices limited the power of the Securities and Exchange Commission to enforce rules with in-house reviews instead of jury trials.\n \n “We stand prepared to defend this merger in the upcoming trial in federal court — the appropriate venue for this matter to be heard,” Kroger CEO Rodney McMullen said in a statement. “We are asking the Court to halt what amounts to an unlawful proceeding before the FTC’s own in-house tribunal.”\n \n McMullen added that the merger between Kroger and Albertson’s is “squarely focused on ensuring we bring customers lower prices starting day one while securing the future of good-paying union jobs.”\n \n The FTC declined to comment on the Kroger lawsuit.\n \n A source familiar with the matter dismissed the Kroger lawsuit as a “desperate Hail Mary.”\n \n The move comes as part of a broader effort by corporate America to push back on what some view as regulatory overreach.\n \n Even some Democratic megadonors (ones with business interests at stake) have argued that the FTC’s efforts to fight monopolies have gone too far during the Biden administration.\n \n The Supreme Court has recently ruled in favor of businesses fighting regulations, including overturning the Chevron Doctrine in its blockbuster June decision that weakens the ability of agencies to approve regulations.\n \n “The whole administrative structure is under legal attack right now,” said Rebecca Haw Allensworth, an antitrust law professor at Vanderbilt Law School.\n \n Allensworth said a Kroger victory would amount to a “seismic shift” in how the FTC and other federal agencies operate.\n \n “It would definitely take an arrow out of the quiver of the FTC,” she said.\n \n Noting the recent series of Supreme Court decisions against federal agencies, Allensworth said a Kroger victory can’t be ruled out.\n \n “The law is whatever the Supreme Court says it is. We can’t be sure a claim like this would be rejected,” she said.\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings"
4,Biden spoke with Netanyahu Wednesday at a critical moment in Gaza talks,https://lite.cnn.com//2024/08/21/politics/biden-netanyahu-call-gaza-ceasefire/index.html,"By Alex Marquardt, Benjamin Brown, Arlette Saenz and Oren Liebermann, CNN\n \nUpdated: \n 7:48 PM EDT, Wed August 21, 2024\n \n Source: CNN\n \n President Joe Biden spoke with Israeli Prime Minister Benjamin Netanyahu on Wednesday about “the ceasefire and hostage release deal and diplomatic efforts to de-escalate regional tensions,” White House spokesperson Emilie Simons posted on X.\n \n Vice President Kamala Harris also joined the call, which came at a critical moment in the Gaza ceasefire negotiations. US Secretary of State Antony Blinken said earlier this week in Israel that it may be the last opportunity for a deal.\n \n Biden made the call from Santa Ynez, California, where he is vacationing this week.\n \n A major sticking point in the ongoing negotiations is the future of the Philadelphi corridor in southern Gaza.\n \n On Wednesday, Israeli public broadcaster Kan reported that Israel’s withdrawal from the 8.7-mile strip of land along the border between Gaza and Egypt would be an element of the proposal agreed to by Netanyahu.\n \n Blinken and other US officials have declined to detail the “bridging proposal” the US laid out last week, which is designed to bring Israel and Hamas closer to a ceasefire agreement. But Hamas insists that the IDF withdraw fully from Gaza and that citizens must be allowed to return home. Israeli officials counter that a solution must be found to better control the Egypt-Gaza border, which has long been used for smuggling.\n \n But the prime minister’s office quickly denied any intention to leave the area, which Israeli troops seized in May.\n \n “Israel will insist on the achievement of all of its objectives for the war, as they have been defined by the Security Cabinet, including that Gaza never again constitutes a security threat to Israel. This requires securing the southern border,” his office said in a statement on Wednesday, describing Kan’s reporting as “incorrect.”\n \n The statement also appeared to contradict comments by Blinken on Tuesday, when he said that Netanyahu had agreed to the bridging proposal and its proposed troop withdrawals.\n \n “The agreement is very clear on the schedule and the locations of IDF withdrawals from Gaza, and Israel has agreed to that,” said Blinken in remarks to reporters before departing Qatar.\n \n Blinken was speaking in Qatar at the end of a short trip to the region as the US makes what it hopes is a final push for all sides to agree to a ceasefire agreement that would halt the fighting in Gaza and free the Israeli hostages held by Hamas.\n \n The top US diplomat said Monday in Israel that the onus would be on Hamas to also agree to the latest proposal to bridge the outstanding issues.\n \n A US senior administration official has previously criticized Netanyahu’s reported comments about not withdrawing as “maximalist statements” that are “not constructive to getting a ceasefire deal across the finish line.”\n \n CIA Director Bill Burns and other senior negotiators are expected to return to Cairo for further high-level talks this weekend.\n \n Meanwhile, the USS Abraham Lincoln carrier strike group arrived in the Middle East on Wednesday, according to US Central Command, bolstering US forces as the region braces for a potential Iranian attack.\n \n The aircraft carrier was ordered to the Middle East in early August in the wake of the killing of Hamas leader Ismail Haniyeh in Tehran. Iranian officials vowed to retaliate against Israel for the assassination.\n \n On August 11, Defense Secretary Lloyd Austin ordered the carrier and its accompanying destroyers to accelerate their transit to the Middle East amid widespread concerns that Iran may be preparing for a large-scale, coordinated attack against Israel with its regional proxies.\n \n On Tuesday, Iran said that its response to Israel must be “carefully calibrated” to avoid derailing the ceasefire negotiations. But Iran’s mission to the UN made it clear that a response was still coming. “The timing, conditions, and manner of Iran’s response will be meticulously orchestrated to ensure that it occurs at a moment of maximum surprise,” said the Iranian mission in a statement to CNN.\n \n The USS Abraham Lincoln carrier strike group joins the USS Theodore Roosevelt carrier strike group. The USS Theodore Roosevelt is expected to remain in the region for several more days before it returns to the US, according to three US defense officials.\n \n The Pentagon previously announced that the USS Abraham Lincoln would replace the USS Theodore Roosevelt.\n \n The US also has the USS Wasp Amphibious Ready Group in the eastern Mediterranean Sea, several destroyers and the USS Georgia, a guided missile submarine.\n \nThis story has been updated with additional developments.\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings"


In [7]:
OLLAMA_API_URL = "http://localhost:11434/api/generate"

In [9]:
import requests
import json

# Ollama API URL (default server address)
OLLAMA_API_URL = "http://localhost:11434/api/generate"

# Define the function to use Gemma 2 model in Ollama
def generate_response(prompt):
    # Define the payload with the prompt and model name
    payload = {
        "model": "gemma2:2b",  # Specify the model
        "prompt": prompt
    }
    
    try:
        # Send a request to Ollama API with streaming enabled
        response = requests.post(OLLAMA_API_URL, json=payload, stream=True)

        # Initialize an empty string to collect the full response
        full_response = ""

        # Iterate over the streaming chunks of the response
        for chunk in response.iter_lines():
            if chunk:
                # Convert the byte chunk to a string
                chunk_data = json.loads(chunk.decode("utf-8"))
                
                # Accumulate the "response" field
                full_response += chunk_data.get("response", "")
        
        # Return the full response generated by the model
        return full_response
    
    except requests.exceptions.RequestException as e:
        # Handle any exceptions during the request
        return f"Request failed: {e}"
    except json.JSONDecodeError as e:
        # Handle JSON decoding errors
        return f"JSON decode error: {e}"

# Example usage of the function
if __name__ == "__main__":
    # Provide a prompt to the model
    prompt_text = "Explain the concept of machine learning."
    # Generate a response from the Gemma 2 model
    result = generate_response(prompt_text)
    print("Response from Gemma 2:")
    print(result)

Response from Gemma 2:
Imagine teaching a dog a trick. You show them what to do, reward them when they get it right, and correct them when they make mistakes. Over time, the dog learns the task through repetition and feedback.

Machine learning is kind of like that for computers.  Instead of humans giving direct instructions, machine learning uses algorithms (like those "tricks" for dogs) to train computers based on data. 

Here's a breakdown:

**What it is:**

* **Data-driven:** Machine learning relies heavily on massive datasets. The more data it has, the better its predictions become.
* **Algorithmic learning:**  Algorithms are like recipes that teach machines how to analyze and learn from the data. These algorithms use mathematical formulas to identify patterns and make connections in the data. 
* **Goal-oriented:** Machine learning aims to create models that can perform specific tasks or solve problems, such as:
    * Recognizing images
    * Predicting customer behavior
    * Ide

In [12]:
df.shape

(106, 3)

In [13]:
df_sample = df.sample(5)
df_sample

Unnamed: 0,Title,Link,Content
101,Go to the full CNN experience,https://www.cnn.com/,Show all Show all Show all © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved. CNN Sans ™ & © 2016 Cable News Network.
39,New data shows US job growth has been far weaker than initially reported,https://lite.cnn.com//2024/08/21/economy/bls-jobs-revisions/index.html,"By Alicia Wallace, CNN\n \nUpdated: \n 12:45 PM EDT, Wed August 21, 2024\n \n Source: CNN\n \n US job growth during much of the past year was significantly weaker than initially estimated, according to new data released Wednesday.\n \n The Bureau of Labor Statistics’ preliminary annual benchmark review of employment data suggests that there were 818,000 fewer jobs in March of this year than were initially reported.\n \nEvery year, the BLS conducts a revision to the data from its monthly survey of businesses’ payrolls, then benchmarks the March employment level to those measured by the Quarterly Census of Employment and Wages program.\n \n The preliminary data marks the largest downward revision since 2009 and shows that the labor market wasn’t quite as red hot as initially thought. However, job growth was still historically strong.\n \n When spread through the prior year, the average monthly job gain from April 2023 through March 2024 was 173,500 versus nearly 242,000, an analysis of BLS data shows.\n \n “It is important for markets to remember that these are not job losses, it is just that the job count was never that high,” wrote Chris Rupkey, chief economist at FwdBonds in a note Wednesday. “The economy apparently did not need those phantom ‘lost’ workers, because robust real consumer expenditures powered very strong [economic] growth in the second half of last year.”\n \n The downward adjustments were limited to the private sector, with nearly half in the professional and business services industry (revised down by 358,000, or 1.6%.) Other sectors showing large negative swings included the information industry (down 68,000, or 2.3%), leisure and hospitality (down 150,000, or 0.9%) and manufacturing (down 115,000, or -0.9%).\n \n The estimates released by the Labor Department on Wednesday — after an uncharacteristic delay of more than half an hour — are preliminary and will not be finalized until February 2025.\n \n While Wednesday’s revision won’t change the existing monthly employment data for now, it serves as another critical gauge for the overall health and activity of the US labor market. Job growth has dropped off more than expected in recent months, making for an even more tenuous situation for the Federal Reserve and its weighing of interest rate cuts.\n \n Fed Chair Jerome Powell is scheduled to deliver a speech Friday in Jackson Hole, Wyoming, at the Kansas City Fed’s annual economic symposium. Economists and analysts say the revisions will likely put further pressure on the central bank to loosen its monetary policy.\n \n “This may be the wake-up call for Powell and [Fed policymakers] that they need to commit to cuts and forward guidance more explicitly,” Michael Block, co-founder and chief strategy officer at AgentSmyth, told CNN.\n \n Wednesday’s preliminary downward revision was expected, economists say, noting the lagged but far more accurate Quarterly Census of Employment and Wages, or QCEW, has shown a slower pace of job gains than the more timely, but less comprehensive, monthly employment surveys and estimates (*more on that methodology and the revision process later).\n \n However, the size of the preliminary revision was a little surprising, said Ryan Sweet, chief US economist at Oxford Economics. He attributed that to how the BLS tries to capture new business formation and the closure of establishments (known as the birth-death model).\n \n The pandemic had a seismic impact on the US economy and the labor market, and its aftershocks still linger to this day. New business applications surged, but with births come deaths, and the BLS’ model has been overstating the new business formation and understating deaths, Sweet told CNN.\n \n To that end, “this is really just a counting issue” and a measurement issue versus a red flag about the health of the labor market, Torsten Slok, chief economist at Apollo Global Management, told CNN.\n \n “160 million people have a job,” Slok said. ”Telling me that over the last 12 months it wasn’t 160 million, it was only 159.2 million is not making too much of a difference to how the Fed and financial markets are thinking about the economy.”\n \n Other economists cautioned that Wednesday’s numbers are still preliminary (the final benchmark revisions will be released alongside the January jobs report in February 2025), and that while the QCEW does pick up some of the impact from the recent surge in immigration, it might not fully reflect undocumented workers.\n \n While this rearview look suggests that job growth was softening a bit sooner than previously thought, the labor market was solid then, Sweet said. And it still has some good fundamentals going for it now, he added, noting that the unemployment rate increased because more people were looking for work (versus higher layoffs) and employment-to-population ratios remain high.\n \n “I think the key is really going to be August employment,” Sweet said, noting the upcoming jobs report due out in a couple weeks.\n \n Economic data is revised often — especially as more comprehensive information becomes available — to provide a clearer, more accurate, picture of the dynamics at play.\n \n One of the most illustrative examples of this is the BLS’ labor market data and, specifically, the all-important jobs report.\n \n The BLS’ monthly snapshot of the labor market is comprised of two surveys: One of households (which provides demographic data and feeds into the all-important unemployment rate) and the other of businesses (designed to measure employment, hours and earnings).\n \n The latter establishment survey is responsible for the monthly estimates of how many jobs were added or lost.\n \n It’s important to note the “estimates” part of this: It is a survey after all, albeit a fairly robust one (the BLS surveys well north of 100,000 businesses and government agencies, representing roughly 629,000 individual worksites).\n \n When the market-moving jobs report is released, that initial estimate is often based on incomplete data and thus will be revised twice further in the two jobs reports that follow as the BLS receives more information.\n \n In addition to the surveying, the BLS also incorporates methodology to try to capture employment activity at new businesses and those that have closed.\n \n Even then, the monthly numbers aren’t final and fully comprehensive.\n \n That’s where the annual benchmark revisions come in. And the first part of that process is what happened Wednesday.\n \n Every year, the BLS conducts annual benchmark revisions to replace these sample-based employment estimates with fuller employment counts as recorded in the QCEW.\n \n The QCEW provides a more comprehensive read on the number of businesses, employees and wages at the state, regional and county level because it derives that data from quarterly tax reports submitted by businesses to their states.\n \n Given that process, the QCEW comes at a substantial lag: The data for the first quarter of 2024 was also released Wednesday and showed that in March 2024, national employment increased to 153.6 million, a 1.3% increase during the past year.\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings"
11,A tornado sank a luxury sailing boat off the coast of Sicily. Here’s what we know,https://lite.cnn.com//2024/08/20/europe/bayesian-yacht-what-we-know-intl/index.html,"By Barbie Latza Nadeau and Sana Noor Haq, CNN\n \nUpdated: \n 6:40 PM EDT, Wed August 21, 2024\n \n Source: CNN\n \n Emergency workers in southern Italy are still hunting for at least one person who remains missing after a tornado sank a luxury yacht early Monday – prompting an air and naval operation off the coast of Sicily.\n \n Twenty-two people are thought to have been on the yacht when the tragedy struck. Fifteen people were rescued from the wreckage, according to Italy’s Coast Guard. One body was recovered on Monday and five more were found on Wednesday, after divers struggled to reach deep inside the yacht around 50 meters underwater (approximately 150 feet).\n \n Two Americans and four Britons were among the six initially reported missing – including British tech tycoon Mike Lynch, Jonathan Bloomer, chairman of Morgan Stanley International, and Chris Morvillo, a prominent lawyer. The five bodies found Wednesday have not been identified, while the body recovered Monday is thought to be that of the onboard chef Ricardo Thomas.\n \n Here’s what we know.\n \n A small waterspout – a type of tornado – spun over the Mediterranean island early Monday, likely capsizing the sailing boat amid lashings of rain and strong thunderstorms.\n \n The British-flagged yacht, called the “Bayesian,” was anchored about a half a mile from the port of Porticello, on Sicily’s northern coast. The vessel sank after its mast broke in half in the storm, Salvatore Cocina, head of Sicily’s Civil Protection, told CNN on Tuesday.\n \n Eyewitnesses described furious gales and hurricane-like winds that left an avalanche of debris near the pier.\n \n More than a dozen survivors were spotted in the area hanging onto life rafts, according to the captain of a nearby boat, who steadied his ship to avoid colliding with the Bayesian.\n \n “We got this strong hurricane gust and we had to start the engine to keep the ship in an angled position,” Karsten Bower told reporters in Palermo on Monday. “After the storm was over, we noticed that the ship behind us was gone.”\n \n Bower and his crew rescued four injured people, he said, before calling Italy’s Coast Guard – who later rescued the remaining survivors.\n \n One of those rescued – a child – was airlifted to the children’s hospital in Palermo. Eight people were hospitalized in total, according to the mayor’s office.\n \n The girl’s mother, Charlotte, described how she battled to hold onto Sofia, her 1-year-old daughter, as reported by Italian news agency ANSA.\n \n “In two seconds I lost the baby in the sea, then I immediately hugged her again amidst the fury of the waves. I held her tightly, close to me, while the sea was stormy,” she told journalists. “Many were screaming.”\n \n The mother and daughter were later reunited with the father, James, according to a doctor at the local children’s hospital in Palermo.\n \n “The survivors are very tired and are constantly asking about the missing people,” the doctor, Domenico Cipolla, said Monday. “They are talking and crying all the time because they have realized that there is little hope of finding their friends alive.”\n \n Italy’s fire brigade dispatched helicopters to aid in the search, officials said Monday. After an unsuccessful attempt on Monday, the brigade sent divers to try and enter the sunken ship Tuesday, recommencing the operation on Wednesday.\n \n The depth of the wreck means divers can only work there for limited periods of time, according to Marco Tilotta, an inspector for the diving unit of Palermo’s local fire brigade. The Italian fire brigade said Monday its divers had reached the yacht’s hull 49 meters (160 feet) below sea level.\n \n The United Kingdom’s Marine Accident Investigation Branch (MAIB) said it has deployed a team of four inspectors to Palermo which is conducting a preliminary assessment of the scene.\n \n Twenty-two people were on board the Bayesian, which was flying under a British flag and had mostly British passengers and crew, in addition to two Anglo-French, one Irish and one Sri Lankan person, a spokesperson for Italy’s Coast Guard told CNN.\n \n A troupe of high-profile guests are among those missing, including Lynch, the 59-year-old British tech investor who fought a fraud case earlier this year in the United States – which spiraled from the disastrous $11 billion sale of his company to tech firm Hewlett-Packard (HP) in 2011.\n \n His 18-year-old daughter was also named missing. Lynch’s wife, Angela Bacares, survived the accident. Bacares told Italian daily newspaper La Repubblica that she woke up at 4 a.m. local time, as the boat tilted. She and her husband were not initially concerned, she said, but became worried when the windows of the yacht shattered and chaos erupted.\n \n Bacares spoke to the newspaper while sitting in a wheelchair, at a hospital in the Sicilian town of Termini Imerese. She had abrasions on her feet and bandages on other parts of her body, it reported.\n \n Bloomer, the finance tycoon, and Morvillo, a prominent lawyer – and both their wives – are also among the missing, according to Salvatore Cocina, head of Sicily’s Civil Protection.\n \n Morvillo, an American partner at Clifford Chance, was involved in successfully defeating the US fraud case against Lynch in June. Another employee of the firm, Ayla Ronald, and her partner, survived the incident, according to a spokesperson for Clifford Chance.\n \n The body that was recovered from the vessel on Monday was identified as the onboard chef Ricardo Thomas, an Antiguan citizen, Reuters reported.\n \n Built in 2008, the 56-meter (184-foot) yacht was manufactured by Italian company Perini Navi, Reuters reported. According to the Associated Press, the boat has been available for charter for $215,000 (€195,000) per week.\n \n Lynch’s wife is linked to the yacht. The Bayesian is held by the company Revtom Limited, according to records from the maritime information service Equasis. The company’s latest annual return from April lists Bacares as the proprietor.\n \n “Bayesian,” the name given to the vessel, is linked to the statistical theory on which Lynch built his fortune, according to Reuters.\n \n The yacht’s mast stood 72.27 meters (237 feet) high above the designated water line, just short of the world’s tallest mast which is 75.2 meters, according to Guinness World Records. It was the tallest aluminium mast in the world, the Perini Navi website said.\n \n Perini Navi is known for making “good quality boats,” according to Caroline White, deputy editor of BOAT International, a media group serving the superyacht industry.\n \n White told CNN that if the Bayesian dismasted, “it should theoretically become more stable.” “But it might be a different story if you are in the middle of a violent storm with incredibly strong winds pinning you to the water,” she added.\n \n Strong storms across Sicily brought torrential rainfall late Sunday. Initial reports suggest a small waterspout, which developed over the area Monday morning, could have been behind the yacht’s sinking.\n \n Waterspouts – one of several types of tornadoes – are spinning columns of air that form over water, or move from land out to water. They are often accompanied by high winds, high seas, hail and dangerous lightning. While they are most common over tropical oceans, they can form almost anywhere.\n \n Matthew Schanck, chair of the Maritime Search and Rescue Council, told CNN that while waterspouts in Sicily are rare, “there is a risk” they can happen – just not every day.\n \n Waterspouts rely on warm waters to gain energy and the Mediterranean Sea has been very hot, reaching a record daily median of 28.9 degrees Celsius (84 Fahrenheit) last week, according to preliminary data from researchers at the Institute of Marine Sciences in Spain.\n \n Local temperatures have been even higher, with waters around Sicily reaching almost 30 degrees Celsius (86 Fahrenheit), almost 3 degrees more than normal, Italian climatologist Luca Mercalli told CNN.\n \n “Warmer oceans have more energy and more humidity to transfer to the atmosphere, the most important fuels for storms,” he said.\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings"
67,Prosecutors say this doctor profited off Matthew Perry’s addiction. He’s returning to practice this week,https://lite.cnn.com//2024/08/21/us/matthew-perry-salvador-plasencia-ketamine-doctor/index.html,"By Emma Tucker and Cheri Mossburg, CNN\n \nUpdated: \n 7:25 AM EDT, Wed August 21, 2024\n \n Source: CNN\n \n A doctor who federal prosecutors say profited off actor Matthew Perry by taking advantage of his addiction issues and supplying him with ketamine, the drug that ultimately killed him, will resume seeing patients at his practice this week, his lawyer told CNN on Tuesday.\n \n Dr. Salvador Plasencia, of Santa Monica, California, is among five people charged in connection with the death of the beloved actor. Prosecutors say an underground network of drug sellers and suppliers were responsible for distributing the ketamine that killed Perry.\n \n Perry, who starred as Chandler Bing on “Friends,” died in October 2023 at age 54 from “acute effects of ketamine” and subsequent drowning, according to the Los Angeles County Medical Examiner’s Office autopsy report.\n \n Stefan Sacks, an attorney for Plasencia, said his client will return to his practice at Malibu Canyon Urgent Care “anytime this week.”\n \n Plasencia has pleaded not guilty to one count of conspiracy to distribute ketamine, seven counts of distribution of ketamine and two counts of altering and falsifying documents or records related to the federal investigation.\n \n Three of the five people charged in connection with Perry’s death have reached plea agreements. When asked Tuesday if Plasencia had considered a plea agreement, his attorney said it was “too early” to say because the defense team had yet to receive any of the prosecution’s evidence in the case.\n \n Plasencia was released last week after posting a $100,000 bond and surrendering his passport and DEA license, which had allowed him to prescribe controlled substances. His trial is set for October 8.\n \n The Medical Board of California is aware of the federal charges against Plasencia and another doctor who has been charged in connection with Perry’s death, board spokesperson Alexandria Schembra told CNN on Sunday. Both doctors are under investigation by the board, but no restrictions have been imposed on them, Schembra said.\n \n Plasencia’s medical license is active through October 2024, according to the medical board.\n \n Plasencia’s attorney said the doctor may see patients both in person and via tele-health, “depending on what the patient actually needs.”\n \n Under the judge’s order, Plasencia’s patients must sign a consent form acknowledging the pending federal case against him and that Plasencia cannot prescribe any controlled substances.\n \n The implications of a pending criminal case against a physician on their medical license involves two separate processes – criminal proceedings and professional licensure – moving in parallel, legal experts told CNN.\n \n Licensed physicians who are under investigation by a state medical board are entitled to due process while the medical inquiry is ongoing, which can lead to their license being suspended, revoked or not renewed, according to the Federation of State Medical Boards.\n \n In the investigation, medical boards can take into consideration evidence from criminal proceedings as a factor in enforcing certain regulations on licenses – the standard in California and nationally.\n \n There are restrictions in most states, including California, on considering someone’s criminal background or ongoing cases against them as a factor in their employment or license if the issue has no relation to their professional practice, according to Lindsay Wiley, a professor at the University of California, Los Angeles School of Law.\n \n However, when the allegations are “right at the core of being reasonably related to the job qualification,” like for Plasencia, taking the criminal element into account is “fair game,” she continued.\n \n “The requirements that the implications for your license have to be reasonably related to the charges against you, that’s meant to kind of protect people from overreaching consequences of criminal actions,” Wiley said.\n \n Investigations by state medical boards in the legal licensing process are notoriously slow, Wiley said, but criminal courts can impose sanctions or conditions on a defendant’s release to protect public safety as they await trial.\n \n While Plasencia is allowed to return to work, the judge’s suspension of his DEA license and requiring the consent form were two such court-imposed conditions to protect public safety while the medical board conducts its separate investigation.\n \n About one month before Perry’s death, Plasencia learned the actor was interested in purchasing ketamine and contacted another physician – Dr. Mark Chavez, according to court documents.\n \n Chavez provided Plasencia with the ketamine given to Perry through a fraudulent prescription, according to a document outlining the allegations against him.\n \n “Defendant Plasencia saw this as an opportunity to profit off of Mr. Perry,” US Attorney Martin Estrada has said, adding Plasencia wrote in text messages that he wanted to be the actor’s sole supplier.\n \n According to Estrada, in a September 2023 message, Plasencia wrote: “I wonder how much this moron will pay?”\n \n Over the next several weeks, prosecutors said, Plasencia purchased ketamine from Chavez, sold vials of the drug to Perry’s assistant and taught the assistant how to administer the drug.\n \n Plasencia also went to Perry’s house to drop off ketamine and even injected the drug for Perry in the back of a vehicle in a parking lot, prosecutors said.\n \n On October 12, more than two weeks before Perry’s death, Plasencia “administered a large dose” to Perry that caused Perry’s systolic blood pressure to spike and he froze up, unable to speak or move, prosecutors said.\n \n If convicted, Plasencia would face up to a decade in prison for each ketamine-related count and up to 20 years for each count of records falsification, the Department of Justice said in a news release.\n \nCNN’s Jack Hannah and Taylor Romine contributed to this report.\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings"
43,See where abortions are banned and legal — and where it’s still in limbo,https://lite.cnn.com//us/abortion-access-restrictions-bans-us-dg/index.html,"By Annette Choi and Devan Cole, CNN\n \nUpdated: \n 11:53 AM EDT, Wed August 21, 2024\n \n Source: CNN\n \n Following the US Supreme Court’s June 2022 decision in Dobbs v. Jackson Women’s Health Organization, which eliminated a constitutional right to abortion nationwide, nearly two dozen US states have banned or limited access to the procedure. States where abortion is most limited report higher rates of maternal and infant mortality, as well as greater economic insecurity.\n \n The fight over abortion is well underway in state legislatures and courts. Most recently, the Iowa Supreme Court upheld a six-week abortion ban passed by the state legislature last year. The ban went into effect on July 29, replacing the previous 22-week limit.\n \n The Supreme Court ruled to allow emergency abortions in Idaho for now and unanimously rejected a lawsuit challenging the Food and Drug Administration’s approach to regulating mifepristone, the first of two drugs used in medication abortion. The abortion pill will continue to be available to patients via mail, without an in-person doctor’s visit.\n \n CNN is tracking these legal challenges as they make their way through the courts. Here’s where abortion access currently stands in the United States.\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings"


In [17]:
# use generate_response function to generate summaries for each article, prompt cotain instructions for the model and title/content contain the article text
df_sample['Summary'] = df_sample.apply(lambda x: generate_response(f"Write exact tilte text first, add newline, then summarize the content of article in short paragraph in less than 3 sentences preserving key indicators or numbers \n\n titled '{x['Title']}'\n\n{x['Content']}"), axis=1)
df_sample.head(5)

Unnamed: 0,Title,Link,Content,Summary
101,Go to the full CNN experience,https://www.cnn.com/,Show all Show all Show all © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved. CNN Sans ™ & © 2016 Cable News Network.,"## Go to the full CNN experience\n\nCNN offers a comprehensive news experience, delivering real-time updates and in-depth analysis on current events across various platforms. Stay informed with breaking news, live coverage, and insightful reporting from around the globe. \n\n"
39,New data shows US job growth has been far weaker than initially reported,https://lite.cnn.com//2024/08/21/economy/bls-jobs-revisions/index.html,"By Alicia Wallace, CNN\n \nUpdated: \n 12:45 PM EDT, Wed August 21, 2024\n \n Source: CNN\n \n US job growth during much of the past year was significantly weaker than initially estimated, according to new data released Wednesday.\n \n The Bureau of Labor Statistics’ preliminary annual benchmark review of employment data suggests that there were 818,000 fewer jobs in March of this year than were initially reported.\n \nEvery year, the BLS conducts a revision to the data from its monthly survey of businesses’ payrolls, then benchmarks the March employment level to those measured by the Quarterly Census of Employment and Wages program.\n \n The preliminary data marks the largest downward revision since 2009 and shows that the labor market wasn’t quite as red hot as initially thought. However, job growth was still historically strong.\n \n When spread through the prior year, the average monthly job gain from April 2023 through March 2024 was 173,500 versus nearly 242,000, an analysis of BLS data shows.\n \n “It is important for markets to remember that these are not job losses, it is just that the job count was never that high,” wrote Chris Rupkey, chief economist at FwdBonds in a note Wednesday. “The economy apparently did not need those phantom ‘lost’ workers, because robust real consumer expenditures powered very strong [economic] growth in the second half of last year.”\n \n The downward adjustments were limited to the private sector, with nearly half in the professional and business services industry (revised down by 358,000, or 1.6%.) Other sectors showing large negative swings included the information industry (down 68,000, or 2.3%), leisure and hospitality (down 150,000, or 0.9%) and manufacturing (down 115,000, or -0.9%).\n \n The estimates released by the Labor Department on Wednesday — after an uncharacteristic delay of more than half an hour — are preliminary and will not be finalized until February 2025.\n \n While Wednesday’s revision won’t change the existing monthly employment data for now, it serves as another critical gauge for the overall health and activity of the US labor market. Job growth has dropped off more than expected in recent months, making for an even more tenuous situation for the Federal Reserve and its weighing of interest rate cuts.\n \n Fed Chair Jerome Powell is scheduled to deliver a speech Friday in Jackson Hole, Wyoming, at the Kansas City Fed’s annual economic symposium. Economists and analysts say the revisions will likely put further pressure on the central bank to loosen its monetary policy.\n \n “This may be the wake-up call for Powell and [Fed policymakers] that they need to commit to cuts and forward guidance more explicitly,” Michael Block, co-founder and chief strategy officer at AgentSmyth, told CNN.\n \n Wednesday’s preliminary downward revision was expected, economists say, noting the lagged but far more accurate Quarterly Census of Employment and Wages, or QCEW, has shown a slower pace of job gains than the more timely, but less comprehensive, monthly employment surveys and estimates (*more on that methodology and the revision process later).\n \n However, the size of the preliminary revision was a little surprising, said Ryan Sweet, chief US economist at Oxford Economics. He attributed that to how the BLS tries to capture new business formation and the closure of establishments (known as the birth-death model).\n \n The pandemic had a seismic impact on the US economy and the labor market, and its aftershocks still linger to this day. New business applications surged, but with births come deaths, and the BLS’ model has been overstating the new business formation and understating deaths, Sweet told CNN.\n \n To that end, “this is really just a counting issue” and a measurement issue versus a red flag about the health of the labor market, Torsten Slok, chief economist at Apollo Global Management, told CNN.\n \n “160 million people have a job,” Slok said. ”Telling me that over the last 12 months it wasn’t 160 million, it was only 159.2 million is not making too much of a difference to how the Fed and financial markets are thinking about the economy.”\n \n Other economists cautioned that Wednesday’s numbers are still preliminary (the final benchmark revisions will be released alongside the January jobs report in February 2025), and that while the QCEW does pick up some of the impact from the recent surge in immigration, it might not fully reflect undocumented workers.\n \n While this rearview look suggests that job growth was softening a bit sooner than previously thought, the labor market was solid then, Sweet said. And it still has some good fundamentals going for it now, he added, noting that the unemployment rate increased because more people were looking for work (versus higher layoffs) and employment-to-population ratios remain high.\n \n “I think the key is really going to be August employment,” Sweet said, noting the upcoming jobs report due out in a couple weeks.\n \n Economic data is revised often — especially as more comprehensive information becomes available — to provide a clearer, more accurate, picture of the dynamics at play.\n \n One of the most illustrative examples of this is the BLS’ labor market data and, specifically, the all-important jobs report.\n \n The BLS’ monthly snapshot of the labor market is comprised of two surveys: One of households (which provides demographic data and feeds into the all-important unemployment rate) and the other of businesses (designed to measure employment, hours and earnings).\n \n The latter establishment survey is responsible for the monthly estimates of how many jobs were added or lost.\n \n It’s important to note the “estimates” part of this: It is a survey after all, albeit a fairly robust one (the BLS surveys well north of 100,000 businesses and government agencies, representing roughly 629,000 individual worksites).\n \n When the market-moving jobs report is released, that initial estimate is often based on incomplete data and thus will be revised twice further in the two jobs reports that follow as the BLS receives more information.\n \n In addition to the surveying, the BLS also incorporates methodology to try to capture employment activity at new businesses and those that have closed.\n \n Even then, the monthly numbers aren’t final and fully comprehensive.\n \n That’s where the annual benchmark revisions come in. And the first part of that process is what happened Wednesday.\n \n Every year, the BLS conducts annual benchmark revisions to replace these sample-based employment estimates with fuller employment counts as recorded in the QCEW.\n \n The QCEW provides a more comprehensive read on the number of businesses, employees and wages at the state, regional and county level because it derives that data from quarterly tax reports submitted by businesses to their states.\n \n Given that process, the QCEW comes at a substantial lag: The data for the first quarter of 2024 was also released Wednesday and showed that in March 2024, national employment increased to 153.6 million, a 1.3% increase during the past year.\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings","## New data shows US job growth has been far weaker than initially reported \n\nNew data released by the Bureau of Labor Statistics reveals a significant downward revision to initial estimates of job growth in the US. A preliminary annual benchmark review indicates there were 818,000 fewer jobs than previously reported in March 2024. The revised estimates reflect a slowdown in job gains compared to earlier reports, with an average monthly gain of 173,500 from April 2023 to March 2024. While job growth was still historically strong, the revisions suggest the labor market wasn't as robust as initially perceived.\n\n\n"
11,A tornado sank a luxury sailing boat off the coast of Sicily. Here’s what we know,https://lite.cnn.com//2024/08/20/europe/bayesian-yacht-what-we-know-intl/index.html,"By Barbie Latza Nadeau and Sana Noor Haq, CNN\n \nUpdated: \n 6:40 PM EDT, Wed August 21, 2024\n \n Source: CNN\n \n Emergency workers in southern Italy are still hunting for at least one person who remains missing after a tornado sank a luxury yacht early Monday – prompting an air and naval operation off the coast of Sicily.\n \n Twenty-two people are thought to have been on the yacht when the tragedy struck. Fifteen people were rescued from the wreckage, according to Italy’s Coast Guard. One body was recovered on Monday and five more were found on Wednesday, after divers struggled to reach deep inside the yacht around 50 meters underwater (approximately 150 feet).\n \n Two Americans and four Britons were among the six initially reported missing – including British tech tycoon Mike Lynch, Jonathan Bloomer, chairman of Morgan Stanley International, and Chris Morvillo, a prominent lawyer. The five bodies found Wednesday have not been identified, while the body recovered Monday is thought to be that of the onboard chef Ricardo Thomas.\n \n Here’s what we know.\n \n A small waterspout – a type of tornado – spun over the Mediterranean island early Monday, likely capsizing the sailing boat amid lashings of rain and strong thunderstorms.\n \n The British-flagged yacht, called the “Bayesian,” was anchored about a half a mile from the port of Porticello, on Sicily’s northern coast. The vessel sank after its mast broke in half in the storm, Salvatore Cocina, head of Sicily’s Civil Protection, told CNN on Tuesday.\n \n Eyewitnesses described furious gales and hurricane-like winds that left an avalanche of debris near the pier.\n \n More than a dozen survivors were spotted in the area hanging onto life rafts, according to the captain of a nearby boat, who steadied his ship to avoid colliding with the Bayesian.\n \n “We got this strong hurricane gust and we had to start the engine to keep the ship in an angled position,” Karsten Bower told reporters in Palermo on Monday. “After the storm was over, we noticed that the ship behind us was gone.”\n \n Bower and his crew rescued four injured people, he said, before calling Italy’s Coast Guard – who later rescued the remaining survivors.\n \n One of those rescued – a child – was airlifted to the children’s hospital in Palermo. Eight people were hospitalized in total, according to the mayor’s office.\n \n The girl’s mother, Charlotte, described how she battled to hold onto Sofia, her 1-year-old daughter, as reported by Italian news agency ANSA.\n \n “In two seconds I lost the baby in the sea, then I immediately hugged her again amidst the fury of the waves. I held her tightly, close to me, while the sea was stormy,” she told journalists. “Many were screaming.”\n \n The mother and daughter were later reunited with the father, James, according to a doctor at the local children’s hospital in Palermo.\n \n “The survivors are very tired and are constantly asking about the missing people,” the doctor, Domenico Cipolla, said Monday. “They are talking and crying all the time because they have realized that there is little hope of finding their friends alive.”\n \n Italy’s fire brigade dispatched helicopters to aid in the search, officials said Monday. After an unsuccessful attempt on Monday, the brigade sent divers to try and enter the sunken ship Tuesday, recommencing the operation on Wednesday.\n \n The depth of the wreck means divers can only work there for limited periods of time, according to Marco Tilotta, an inspector for the diving unit of Palermo’s local fire brigade. The Italian fire brigade said Monday its divers had reached the yacht’s hull 49 meters (160 feet) below sea level.\n \n The United Kingdom’s Marine Accident Investigation Branch (MAIB) said it has deployed a team of four inspectors to Palermo which is conducting a preliminary assessment of the scene.\n \n Twenty-two people were on board the Bayesian, which was flying under a British flag and had mostly British passengers and crew, in addition to two Anglo-French, one Irish and one Sri Lankan person, a spokesperson for Italy’s Coast Guard told CNN.\n \n A troupe of high-profile guests are among those missing, including Lynch, the 59-year-old British tech investor who fought a fraud case earlier this year in the United States – which spiraled from the disastrous $11 billion sale of his company to tech firm Hewlett-Packard (HP) in 2011.\n \n His 18-year-old daughter was also named missing. Lynch’s wife, Angela Bacares, survived the accident. Bacares told Italian daily newspaper La Repubblica that she woke up at 4 a.m. local time, as the boat tilted. She and her husband were not initially concerned, she said, but became worried when the windows of the yacht shattered and chaos erupted.\n \n Bacares spoke to the newspaper while sitting in a wheelchair, at a hospital in the Sicilian town of Termini Imerese. She had abrasions on her feet and bandages on other parts of her body, it reported.\n \n Bloomer, the finance tycoon, and Morvillo, a prominent lawyer – and both their wives – are also among the missing, according to Salvatore Cocina, head of Sicily’s Civil Protection.\n \n Morvillo, an American partner at Clifford Chance, was involved in successfully defeating the US fraud case against Lynch in June. Another employee of the firm, Ayla Ronald, and her partner, survived the incident, according to a spokesperson for Clifford Chance.\n \n The body that was recovered from the vessel on Monday was identified as the onboard chef Ricardo Thomas, an Antiguan citizen, Reuters reported.\n \n Built in 2008, the 56-meter (184-foot) yacht was manufactured by Italian company Perini Navi, Reuters reported. According to the Associated Press, the boat has been available for charter for $215,000 (€195,000) per week.\n \n Lynch’s wife is linked to the yacht. The Bayesian is held by the company Revtom Limited, according to records from the maritime information service Equasis. The company’s latest annual return from April lists Bacares as the proprietor.\n \n “Bayesian,” the name given to the vessel, is linked to the statistical theory on which Lynch built his fortune, according to Reuters.\n \n The yacht’s mast stood 72.27 meters (237 feet) high above the designated water line, just short of the world’s tallest mast which is 75.2 meters, according to Guinness World Records. It was the tallest aluminium mast in the world, the Perini Navi website said.\n \n Perini Navi is known for making “good quality boats,” according to Caroline White, deputy editor of BOAT International, a media group serving the superyacht industry.\n \n White told CNN that if the Bayesian dismasted, “it should theoretically become more stable.” “But it might be a different story if you are in the middle of a violent storm with incredibly strong winds pinning you to the water,” she added.\n \n Strong storms across Sicily brought torrential rainfall late Sunday. Initial reports suggest a small waterspout, which developed over the area Monday morning, could have been behind the yacht’s sinking.\n \n Waterspouts – one of several types of tornadoes – are spinning columns of air that form over water, or move from land out to water. They are often accompanied by high winds, high seas, hail and dangerous lightning. While they are most common over tropical oceans, they can form almost anywhere.\n \n Matthew Schanck, chair of the Maritime Search and Rescue Council, told CNN that while waterspouts in Sicily are rare, “there is a risk” they can happen – just not every day.\n \n Waterspouts rely on warm waters to gain energy and the Mediterranean Sea has been very hot, reaching a record daily median of 28.9 degrees Celsius (84 Fahrenheit) last week, according to preliminary data from researchers at the Institute of Marine Sciences in Spain.\n \n Local temperatures have been even higher, with waters around Sicily reaching almost 30 degrees Celsius (86 Fahrenheit), almost 3 degrees more than normal, Italian climatologist Luca Mercalli told CNN.\n \n “Warmer oceans have more energy and more humidity to transfer to the atmosphere, the most important fuels for storms,” he said.\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings","## A tornado sank a luxury sailing boat off the coast of Sicily, leaving 22 people missing. \n\nA small waterspout caused the sinking of the 56-meter ""Bayesian"" yacht in Sicilian waters. The incident left 22 passengers missing, including prominent figures like finance tycoon Jonathan Bloomer and lawyer Michael Morvillo. The yacht was a Perini Navi creation, known for its quality, and had been available for charter at $215,000 per week. While rare, waterspouts can occur in the warm Mediterranean waters this time of year due to rising temperatures. \n"
67,Prosecutors say this doctor profited off Matthew Perry’s addiction. He’s returning to practice this week,https://lite.cnn.com//2024/08/21/us/matthew-perry-salvador-plasencia-ketamine-doctor/index.html,"By Emma Tucker and Cheri Mossburg, CNN\n \nUpdated: \n 7:25 AM EDT, Wed August 21, 2024\n \n Source: CNN\n \n A doctor who federal prosecutors say profited off actor Matthew Perry by taking advantage of his addiction issues and supplying him with ketamine, the drug that ultimately killed him, will resume seeing patients at his practice this week, his lawyer told CNN on Tuesday.\n \n Dr. Salvador Plasencia, of Santa Monica, California, is among five people charged in connection with the death of the beloved actor. Prosecutors say an underground network of drug sellers and suppliers were responsible for distributing the ketamine that killed Perry.\n \n Perry, who starred as Chandler Bing on “Friends,” died in October 2023 at age 54 from “acute effects of ketamine” and subsequent drowning, according to the Los Angeles County Medical Examiner’s Office autopsy report.\n \n Stefan Sacks, an attorney for Plasencia, said his client will return to his practice at Malibu Canyon Urgent Care “anytime this week.”\n \n Plasencia has pleaded not guilty to one count of conspiracy to distribute ketamine, seven counts of distribution of ketamine and two counts of altering and falsifying documents or records related to the federal investigation.\n \n Three of the five people charged in connection with Perry’s death have reached plea agreements. When asked Tuesday if Plasencia had considered a plea agreement, his attorney said it was “too early” to say because the defense team had yet to receive any of the prosecution’s evidence in the case.\n \n Plasencia was released last week after posting a $100,000 bond and surrendering his passport and DEA license, which had allowed him to prescribe controlled substances. His trial is set for October 8.\n \n The Medical Board of California is aware of the federal charges against Plasencia and another doctor who has been charged in connection with Perry’s death, board spokesperson Alexandria Schembra told CNN on Sunday. Both doctors are under investigation by the board, but no restrictions have been imposed on them, Schembra said.\n \n Plasencia’s medical license is active through October 2024, according to the medical board.\n \n Plasencia’s attorney said the doctor may see patients both in person and via tele-health, “depending on what the patient actually needs.”\n \n Under the judge’s order, Plasencia’s patients must sign a consent form acknowledging the pending federal case against him and that Plasencia cannot prescribe any controlled substances.\n \n The implications of a pending criminal case against a physician on their medical license involves two separate processes – criminal proceedings and professional licensure – moving in parallel, legal experts told CNN.\n \n Licensed physicians who are under investigation by a state medical board are entitled to due process while the medical inquiry is ongoing, which can lead to their license being suspended, revoked or not renewed, according to the Federation of State Medical Boards.\n \n In the investigation, medical boards can take into consideration evidence from criminal proceedings as a factor in enforcing certain regulations on licenses – the standard in California and nationally.\n \n There are restrictions in most states, including California, on considering someone’s criminal background or ongoing cases against them as a factor in their employment or license if the issue has no relation to their professional practice, according to Lindsay Wiley, a professor at the University of California, Los Angeles School of Law.\n \n However, when the allegations are “right at the core of being reasonably related to the job qualification,” like for Plasencia, taking the criminal element into account is “fair game,” she continued.\n \n “The requirements that the implications for your license have to be reasonably related to the charges against you, that’s meant to kind of protect people from overreaching consequences of criminal actions,” Wiley said.\n \n Investigations by state medical boards in the legal licensing process are notoriously slow, Wiley said, but criminal courts can impose sanctions or conditions on a defendant’s release to protect public safety as they await trial.\n \n While Plasencia is allowed to return to work, the judge’s suspension of his DEA license and requiring the consent form were two such court-imposed conditions to protect public safety while the medical board conducts its separate investigation.\n \n About one month before Perry’s death, Plasencia learned the actor was interested in purchasing ketamine and contacted another physician – Dr. Mark Chavez, according to court documents.\n \n Chavez provided Plasencia with the ketamine given to Perry through a fraudulent prescription, according to a document outlining the allegations against him.\n \n “Defendant Plasencia saw this as an opportunity to profit off of Mr. Perry,” US Attorney Martin Estrada has said, adding Plasencia wrote in text messages that he wanted to be the actor’s sole supplier.\n \n According to Estrada, in a September 2023 message, Plasencia wrote: “I wonder how much this moron will pay?”\n \n Over the next several weeks, prosecutors said, Plasencia purchased ketamine from Chavez, sold vials of the drug to Perry’s assistant and taught the assistant how to administer the drug.\n \n Plasencia also went to Perry’s house to drop off ketamine and even injected the drug for Perry in the back of a vehicle in a parking lot, prosecutors said.\n \n On October 12, more than two weeks before Perry’s death, Plasencia “administered a large dose” to Perry that caused Perry’s systolic blood pressure to spike and he froze up, unable to speak or move, prosecutors said.\n \n If convicted, Plasencia would face up to a decade in prison for each ketamine-related count and up to 20 years for each count of records falsification, the Department of Justice said in a news release.\n \nCNN’s Jack Hannah and Taylor Romine contributed to this report.\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings","## Prosecutors say this doctor profited off Matthew Perry’s addiction. He’s returning to practice this week \n\nA California doctor, Dr. Salvador Plasencia, accused of exploiting actor Matthew Perry's drug addiction by supplying him with the deadly ketamine that ultimately led to his death, will resume practicing medicine this week despite pending federal charges. The investigation into Perry's death revealed an alleged network of suppliers and a case-specific plea agreement situation has been delayed due to evidence delays. While Plasencia faces up to 20 years in prison for record falsification, he maintains his innocence, awaiting trial on October 8th. His medical license remains active through 2024 while under investigation by the Medical Board of California. \n"
43,See where abortions are banned and legal — and where it’s still in limbo,https://lite.cnn.com//us/abortion-access-restrictions-bans-us-dg/index.html,"By Annette Choi and Devan Cole, CNN\n \nUpdated: \n 11:53 AM EDT, Wed August 21, 2024\n \n Source: CNN\n \n Following the US Supreme Court’s June 2022 decision in Dobbs v. Jackson Women’s Health Organization, which eliminated a constitutional right to abortion nationwide, nearly two dozen US states have banned or limited access to the procedure. States where abortion is most limited report higher rates of maternal and infant mortality, as well as greater economic insecurity.\n \n The fight over abortion is well underway in state legislatures and courts. Most recently, the Iowa Supreme Court upheld a six-week abortion ban passed by the state legislature last year. The ban went into effect on July 29, replacing the previous 22-week limit.\n \n The Supreme Court ruled to allow emergency abortions in Idaho for now and unanimously rejected a lawsuit challenging the Food and Drug Administration’s approach to regulating mifepristone, the first of two drugs used in medication abortion. The abortion pill will continue to be available to patients via mail, without an in-person doctor’s visit.\n \n CNN is tracking these legal challenges as they make their way through the courts. Here’s where abortion access currently stands in the United States.\n See Full Web Article \n\n Go to the full CNN experience\n \n \n © 2024 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.\n \n\n Terms of Use\n \n\n |\n \n\n Privacy Policy\n \n\n |\n \n\n Ad Choices\n \n\n |\n \n\n Cookie Settings","# See Where Abortions Are Banned and Legal — and Where It's Still in Limbo\n\nThis article from CNN provides an overview of the evolving legal landscape surrounding abortion access in the US following the Dobbs v. Jackson Women's Health Organization ruling. The article highlights key developments such as:\n\nStates have implemented varying restrictions on abortion, with some completely banning it while others maintain limited or ""in limbo"" status. This has resulted in a significant increase in maternal and infant mortality rates. Additionally, the article discusses legal battles over abortion access, including a recent Iowa Supreme Court ruling upholding a six-week abortion ban and the Supreme Court's decision to allow emergency abortions in Idaho. Despite challenges, medication abortion via mail will continue due to the FDA’s approach to regulating mifepristone.\n"


In [4]:
# https://ai.stanford.edu/~amaas/data/sentiment/
# read positive reviews from aclImdb folder, train folder, pos folder, list all txt files and sampling 100 files
import os
import random

# Define the path to the positive reviews
pos_reviews_path = "aclImdb/train/pos"
neg_reviews_path = "aclImdb/train/neg"

# List all the files in the positive reviews folder
pos_files = os.listdir(pos_reviews_path)
neg_files = os.listdir(neg_reviews_path)
print(len(pos_files))
print(len(neg_files))

# Sample 100 files randomly
random.seed(0)
sampled_pos_files = random.sample(pos_files, 10)
sampled_neg_files = random.sample(neg_files, 10)
print(sampled_pos_files[:5])
print(sampled_neg_files[:5])

12500
12500
['8650_10.txt', '9723_9.txt', '6588_9.txt', '10388_7.txt', '2173_10.txt']
['11559_4.txt', '3299_1.txt', '2973_2.txt', '4415_4.txt', '11194_1.txt']


In [7]:
import pandas as pd
import os

pd.set_option('display.max_colwidth', None)  # None means no limit on column width

# set dataframe id, rating, review, label
# Create an empty list to store the data
data = []

# Iterate over the sampled positive files
for file in sampled_pos_files:
    # Read the content of the file
    with open(os.path.join(pos_reviews_path, file), "r", encoding="utf-8") as f:
        review = f.read()
    
    # Extract unique_id and rating from the file name
    unique_id, rating = file.split('_')  # Assuming file format is {unique_id}_{rating}.txt
    rating = rating.split('.')[0]  # Remove the .txt extension
    
    # Append the data in the format [id, rating, review, label]
    data.append([unique_id, rating, review, "positive"])  # Assuming the label is "positive"
   
for file in sampled_neg_files:
    # Read the content of the file
    with open(os.path.join(neg_reviews_path, file), "r", encoding="utf-8") as f:
        review = f.read()
    
    # Extract unique_id and rating from the file name
    unique_id, rating = file.split('_')  # Assuming file format is {unique_id}_{rating}.txt
    rating = rating.split('.')[0]  # Remove the .txt extension
    
    # Append the data in the format [id, rating, review, label]
    data.append([unique_id, rating, review, "negative"])  # Assuming the label is "positive"

# Create a DataFrame from the collected data
df_reviews = pd.DataFrame(data, columns=["id", "rating", "review", "label"])
df_reviews

Unnamed: 0,id,rating,review,label
0,8650,10,Kalifornia is the story of a writer and his girlfriend photographer who are looking for someone to help pay gas money and take turns at the wheel for a cross country road trip to famous murder sights. Ironically a serial killer and his girlfriend answer the post. Kalifornia is a diamond in the rough and a very intriguing journey with a serial killer. Great performances all around by the leads with Pitt in particular being exceptional. Check it out!!,positive
1,9723,9,"I really, really enjoyed watching this movie! At first, seeing its poster I thought it was just another easy romantic comedy ... but it is simply more than this! I personally believe that this idea (that I'm sure a good part of the viewers had just before they saw the movie) it's yet another important part of the big concept of this movie itself (or even of its marketing strategy)! What I mean is: Nowadays we are slaves to images! To impressions! I went to the cinema to view this film having the wrong impression, the wrong expectations, and at the end I felt how superficial I could be! To exemplify it comes to my mind the sequence near the end in which Sidney buys the plane ticket to go back to New York and as he is asked to 'give an autograph', meaning to sign for the ticket, he believes that just because he got on TV thanks to the scandal at the awards he is now some kind of celebrity. And this is just, I believe, the climax of this main theme around which the movies revolves. Above this, I believe the movie also offers us a solution to get along with this, illustrated throughout the movie by Sidney's attitude: don't become too serious about yourself or about anybody else ... ""even saints were people in the beginning"" ... as Sophie once says in the movie. The saints of the moment are the stars. We attribute them an 'aura' of perfection, of eternal happiness, but the reality is much less than that. Even the saints of any religion are images, ideal models of how to behave and how to live your life. Even they were not for real ... they became 'for real' after they died and we looked back at them. And that's the catch: we need our saints! we need our stars! We strive for them as if it wasn't for them we wouldn't have anything to strive for. And television and all other media are means to create and capture our strivings. We desperately need benchmarks in regard to which to measure ourselves. And that's how we got in the cinema to watch this movie in the first place: to see if we can fit the benchmark, or if the benchmark is to small for us. This time it was larger than we expected.",positive
2,6588,9,"After viewing several episodes of this series, I have come to the conclusion that television producers are completely devoid of any form of originality. Here is an old science fiction standby, ingeniously wrapped in the form of a truly original concept - and still they can only -almost - make it work.<br /><br />The dialog is good! The male actors are reasonably proficient at their professions. Most of the characters are well drawn, with special kudos to the hero and his more than likeable side-kick. And most of the episode plots come across as palatable. So what could be wrong? How about the, the female characters and the cosmeticly perfect actresses who are chosen to portray them. <br /><br />The producers insist on portraying the female characters in this - almost good - series, in a manner that makes the end product appear to be a misplaced cheerleader. Why, I ask, why?<br /><br />The episodes all fall flat whenever the female guest star or recurring character comes on screen. These actresses are all totally unbelievable in their roles, and you don't actually have to see them to know they are incapable of their acting assignments. A blind person could tell. Just listen to them talk. They deliver their dialog with all the drama and effect of a 16 year old at the high school prom. Who would believe these women are Phd scientist, senators, corporate executives and medical doctors?<br /><br />In a nut shell, if the producers have their choice of a Stockard Channing or a Morgan Fairchild, guess who they'll choose - every time? And of course, the series suffers for it. Too bad!",positive
3,10388,7,"I'm a huge fan of the Dukes of Hazzard TV show. And I really enjoyed this flick. I enjoyed myself here a lot more than I did with other summer blockbusters.<br /><br />It's funny hearing people rail against this movie with excuses like ""lame plot"" and ""it's much cruder than the show."" Does ANYONE remember the crudeness of the humor in the pilot episode? Daisy makes incest jokes and Bo says that Luke had probably fathered half the kids in the orphanage. The only reason it was cleaned up is because it changed to and earlier time slot.<br /><br />And as far as the plot goes. It was the perfect Dukes plot. In fact as a remake it probably stays truer to the source material than any TV show that has migrated to the big screen.<br /><br />While Sean William Scott and Johnny Knoxville aren't EXACTLY like their small screen versions, they do a great job and work very well together. I wasn't too keen on Burt's Boss Hogg though. And I would have like a little bit more incompetence from Sheriff Roscoe. In the movie Roscoe is a little... scary.<br /><br />And who didn't have a smile on their face as the General Lee is racing through the streets of Atlanta and the back roads of Hazzard?<br /><br />Folks, allow yourself to enjoy a movie that is just an excuse for nostalgia, bikinis and car chases, you won't be sorry. It's just a great dumb movie!",positive
4,2173,10,"It is often only after years pass that we can look back and see those stars who are truly stars. As that French film critic, whose name escapes me, said: ""There is no Garbo. There is no Dietrich. There is only Louise Brooks""; and there is, thank heavens! Louise Brooks! This is the third of her European masterpieces. But it is also an exceptional film for being one, if not the, first French talkie, for following a script written by famed René Clair, for reportedly being finished (the direction, that is) by Georg Pabst, and for incorporating the voice of Edith Piaf before she was well known! So much talent working on and in a film, how couldn't it turn out to be a masterpiece?! And that's what this film is. It's a shame Louise Brooks was blackballed by Hollywood when she came back to the States--so much talent cast so arrogantly by the wayside! In the film, in addition to getting to watch Louise Brooks in action, it's great to see pictures of Paris ca. 1930 and to hear Piaf's young voice. I never get tired of this film!",positive
5,4241,10,"I loved this movie!!! The characters were people that you could feel for. The young man back from the service still in love with the girl he left behind. Tom Drake is always perfect in the romantic lead as well as Donna Reed as the love of his life. The looks he gives her as if he has been starved for the sight of her as well as her hesitation and confusion as too her feelings for him were played very well. The rest of the quirky characters at the store were perfect as they tried to bring them together. The most touching scene however, was the young couple at his great grandfather's house. I laughed in parts, cried in some and thoroughly enjoyed watching this movie. In fact I've re-watched it about 5 times. A definite must see for total romantics.",positive
6,901,8,"Well... Vivah is quite a good, but typical Sooraj Barjatya's movie. It shows different aspects of Indian families, wedding ceremonies, etc. The movie is more than good, but not extremely good.<br /><br />The big plus point of the movie is direction, music and Shahid and Amrita's acting (especially the last scene). The movie starts with a common factor of Indian movie--- A girl being disliked by her step-mother. Story is the movie is a bit common, but it is good, good enough to make the movie a blockbuster.<br /><br />Minus point is common story of Bollywood movie. Overall I would say it is two times must watch movie, if you want to see typical Indian family values and of-course love!",positive
7,10980,7,"I saw this film a couple of weeks ago, and it's been stuck in my head ever since. It stars two spellbinding characters in what is unfortunately a mediocre documentary. To get the true story of the Beales, I had to wade through all of the DVD's bonus material and commentaries and search the web.<br /><br />Although the Maysles and their fans (not to mention Edith and Edie themselves) bristle at the suggestion that this film is exploitative, this is exploitation in the truest sense of the word. Very little effort is every made to explain the Beales or how they came to the condition they were in - the Maysles approach seems to be to just turn the camera on and wait for Edith and Edie to say something outrageous. The sound, even on the Criterion re-release is poor and difficult to follow. Although I appreciate this film was made somewhat early in the history of documentary film, it's ironic to compare it to Geraldo Rivera's (!) far superior series on the sexual abuse of mentally retarded patients at Willowbrook State School in Staten Island from 1972, four years before Grey Gardens was shot.<br /><br />To paraphrase a review in the New Yorker, there were many things Edith and Edie needed in their lives, and a documentary wasn't one of them.<br /><br />As for Edith and Edie, the thing I kept thinking while watching the film was ""where the hell is their family""? They were living in dangerous, unhealthy, unsafe conditions. How is it that Jackie O, married to one of the richest men on Earth (or the wealthy Bouvier family themselves) couldn't afford to get Edith and Edie a decent home? Or at the very least hire a part-time housekeeper or caregiver to come in and keep an eye on them both? It's shameful and a lasting disgrace to the entire Bouvier family.<br /><br />Although this review may sound negative I would strongly recommend Grey Gardens to anyone who enjoys documentaries. Perhaps someday someone will come along and do a documentary about this documentary - bringing in the rich backstory (and afterstory) of the Beales and the whole subsection of Hamptons society in the 1970's.",positive
8,5212,7,"STAR RATING: ***** Saturday Night **** Friday Night *** Friday Morning ** Sunday Night * Monday Morning <br /><br />A notably bad actor, getting by on his (now fading) looks rather than any strong dramatic talent, Richard Gere has always occupied a rather curious position in the American Hollywood scene, always a sure bet in leading man roles who still holds a notable presence today. But nowadays he seems to have settled more into these sort of direct to DVD/limited release roles and as such maybe seems to be more settled in his forte now.<br /><br />He has to draw on some stern matter here as hardened, cynical case worker Earl Babbage, one such worker assigned to a few hundred sex offenders in one area of the US, who along with his new protégé Allison Allthrop (Claire Danes) must take to his latest case, delving into the abduction of a young woman while trying to forgive himself for a case he failed on ages ago.<br /><br />This is a certain dive into the darker side of humanity, treading on material definitely not for the squeamish or those looking for light viewing. And as such it's a pretty strong, compelling film, unflinching and not constrained by it's direct to DVD budget. The only thing really pulling it back is the overly used jittery, fast cutting camera sequences used in the more dramatic moments that look a bit corny after a while. But it's still some of the solidest material I've seen Gere in, relentlessly getting darker and more over the edge as it goes on. ***",positive
9,4277,10,"""Ko to tamo peva"" is one of the best films I ever saw. A tragicomedy with very deep implications on the fate of humankind shown through the eyes of seemingly very plain and common people from a God-forsaken Serbian province just before the start of the World War II. I saw it in a small movie theater in Russia where the film had had a very limited distribution, and I had no chance to come across it ever since. It is such a pity that this excellent film is almost forgotten now. I searched for a VHS or DVD copy of it many times, and alas - could find none. I would be most grateful to other fans of this little gem of movie-making for a suggestion of the ways to purchase a copy.",positive


In [13]:
# input review text to generate sentiment analysis using generate_response function
df_reviews['Sentiment'] = df_reviews.apply(lambda x: generate_response(f"Analyze the sentiment of the following review and return only positive or negative string: \n\n {x['review']}"), axis=1)

In [14]:
df_reviews

Unnamed: 0,id,rating,review,label,Sentiment
0,8650,10,Kalifornia is the story of a writer and his girlfriend photographer who are looking for someone to help pay gas money and take turns at the wheel for a cross country road trip to famous murder sights. Ironically a serial killer and his girlfriend answer the post. Kalifornia is a diamond in the rough and a very intriguing journey with a serial killer. Great performances all around by the leads with Pitt in particular being exceptional. Check it out!!,positive,positive \n
1,9723,9,"I really, really enjoyed watching this movie! At first, seeing its poster I thought it was just another easy romantic comedy ... but it is simply more than this! I personally believe that this idea (that I'm sure a good part of the viewers had just before they saw the movie) it's yet another important part of the big concept of this movie itself (or even of its marketing strategy)! What I mean is: Nowadays we are slaves to images! To impressions! I went to the cinema to view this film having the wrong impression, the wrong expectations, and at the end I felt how superficial I could be! To exemplify it comes to my mind the sequence near the end in which Sidney buys the plane ticket to go back to New York and as he is asked to 'give an autograph', meaning to sign for the ticket, he believes that just because he got on TV thanks to the scandal at the awards he is now some kind of celebrity. And this is just, I believe, the climax of this main theme around which the movies revolves. Above this, I believe the movie also offers us a solution to get along with this, illustrated throughout the movie by Sidney's attitude: don't become too serious about yourself or about anybody else ... ""even saints were people in the beginning"" ... as Sophie once says in the movie. The saints of the moment are the stars. We attribute them an 'aura' of perfection, of eternal happiness, but the reality is much less than that. Even the saints of any religion are images, ideal models of how to behave and how to live your life. Even they were not for real ... they became 'for real' after they died and we looked back at them. And that's the catch: we need our saints! we need our stars! We strive for them as if it wasn't for them we wouldn't have anything to strive for. And television and all other media are means to create and capture our strivings. We desperately need benchmarks in regard to which to measure ourselves. And that's how we got in the cinema to watch this movie in the first place: to see if we can fit the benchmark, or if the benchmark is to small for us. This time it was larger than we expected.",positive,positive \n
2,6588,9,"After viewing several episodes of this series, I have come to the conclusion that television producers are completely devoid of any form of originality. Here is an old science fiction standby, ingeniously wrapped in the form of a truly original concept - and still they can only -almost - make it work.<br /><br />The dialog is good! The male actors are reasonably proficient at their professions. Most of the characters are well drawn, with special kudos to the hero and his more than likeable side-kick. And most of the episode plots come across as palatable. So what could be wrong? How about the, the female characters and the cosmeticly perfect actresses who are chosen to portray them. <br /><br />The producers insist on portraying the female characters in this - almost good - series, in a manner that makes the end product appear to be a misplaced cheerleader. Why, I ask, why?<br /><br />The episodes all fall flat whenever the female guest star or recurring character comes on screen. These actresses are all totally unbelievable in their roles, and you don't actually have to see them to know they are incapable of their acting assignments. A blind person could tell. Just listen to them talk. They deliver their dialog with all the drama and effect of a 16 year old at the high school prom. Who would believe these women are Phd scientist, senators, corporate executives and medical doctors?<br /><br />In a nut shell, if the producers have their choice of a Stockard Channing or a Morgan Fairchild, guess who they'll choose - every time? And of course, the series suffers for it. Too bad!",positive,negative \n
3,10388,7,"I'm a huge fan of the Dukes of Hazzard TV show. And I really enjoyed this flick. I enjoyed myself here a lot more than I did with other summer blockbusters.<br /><br />It's funny hearing people rail against this movie with excuses like ""lame plot"" and ""it's much cruder than the show."" Does ANYONE remember the crudeness of the humor in the pilot episode? Daisy makes incest jokes and Bo says that Luke had probably fathered half the kids in the orphanage. The only reason it was cleaned up is because it changed to and earlier time slot.<br /><br />And as far as the plot goes. It was the perfect Dukes plot. In fact as a remake it probably stays truer to the source material than any TV show that has migrated to the big screen.<br /><br />While Sean William Scott and Johnny Knoxville aren't EXACTLY like their small screen versions, they do a great job and work very well together. I wasn't too keen on Burt's Boss Hogg though. And I would have like a little bit more incompetence from Sheriff Roscoe. In the movie Roscoe is a little... scary.<br /><br />And who didn't have a smile on their face as the General Lee is racing through the streets of Atlanta and the back roads of Hazzard?<br /><br />Folks, allow yourself to enjoy a movie that is just an excuse for nostalgia, bikinis and car chases, you won't be sorry. It's just a great dumb movie!",positive,positive \n
4,2173,10,"It is often only after years pass that we can look back and see those stars who are truly stars. As that French film critic, whose name escapes me, said: ""There is no Garbo. There is no Dietrich. There is only Louise Brooks""; and there is, thank heavens! Louise Brooks! This is the third of her European masterpieces. But it is also an exceptional film for being one, if not the, first French talkie, for following a script written by famed René Clair, for reportedly being finished (the direction, that is) by Georg Pabst, and for incorporating the voice of Edith Piaf before she was well known! So much talent working on and in a film, how couldn't it turn out to be a masterpiece?! And that's what this film is. It's a shame Louise Brooks was blackballed by Hollywood when she came back to the States--so much talent cast so arrogantly by the wayside! In the film, in addition to getting to watch Louise Brooks in action, it's great to see pictures of Paris ca. 1930 and to hear Piaf's young voice. I never get tired of this film!",positive,positive \n
5,4241,10,"I loved this movie!!! The characters were people that you could feel for. The young man back from the service still in love with the girl he left behind. Tom Drake is always perfect in the romantic lead as well as Donna Reed as the love of his life. The looks he gives her as if he has been starved for the sight of her as well as her hesitation and confusion as too her feelings for him were played very well. The rest of the quirky characters at the store were perfect as they tried to bring them together. The most touching scene however, was the young couple at his great grandfather's house. I laughed in parts, cried in some and thoroughly enjoyed watching this movie. In fact I've re-watched it about 5 times. A definite must see for total romantics.",positive,positive \n
6,901,8,"Well... Vivah is quite a good, but typical Sooraj Barjatya's movie. It shows different aspects of Indian families, wedding ceremonies, etc. The movie is more than good, but not extremely good.<br /><br />The big plus point of the movie is direction, music and Shahid and Amrita's acting (especially the last scene). The movie starts with a common factor of Indian movie--- A girl being disliked by her step-mother. Story is the movie is a bit common, but it is good, good enough to make the movie a blockbuster.<br /><br />Minus point is common story of Bollywood movie. Overall I would say it is two times must watch movie, if you want to see typical Indian family values and of-course love!",positive,positive \n
7,10980,7,"I saw this film a couple of weeks ago, and it's been stuck in my head ever since. It stars two spellbinding characters in what is unfortunately a mediocre documentary. To get the true story of the Beales, I had to wade through all of the DVD's bonus material and commentaries and search the web.<br /><br />Although the Maysles and their fans (not to mention Edith and Edie themselves) bristle at the suggestion that this film is exploitative, this is exploitation in the truest sense of the word. Very little effort is every made to explain the Beales or how they came to the condition they were in - the Maysles approach seems to be to just turn the camera on and wait for Edith and Edie to say something outrageous. The sound, even on the Criterion re-release is poor and difficult to follow. Although I appreciate this film was made somewhat early in the history of documentary film, it's ironic to compare it to Geraldo Rivera's (!) far superior series on the sexual abuse of mentally retarded patients at Willowbrook State School in Staten Island from 1972, four years before Grey Gardens was shot.<br /><br />To paraphrase a review in the New Yorker, there were many things Edith and Edie needed in their lives, and a documentary wasn't one of them.<br /><br />As for Edith and Edie, the thing I kept thinking while watching the film was ""where the hell is their family""? They were living in dangerous, unhealthy, unsafe conditions. How is it that Jackie O, married to one of the richest men on Earth (or the wealthy Bouvier family themselves) couldn't afford to get Edith and Edie a decent home? Or at the very least hire a part-time housekeeper or caregiver to come in and keep an eye on them both? It's shameful and a lasting disgrace to the entire Bouvier family.<br /><br />Although this review may sound negative I would strongly recommend Grey Gardens to anyone who enjoys documentaries. Perhaps someday someone will come along and do a documentary about this documentary - bringing in the rich backstory (and afterstory) of the Beales and the whole subsection of Hamptons society in the 1970's.",positive,Negative \n
8,5212,7,"STAR RATING: ***** Saturday Night **** Friday Night *** Friday Morning ** Sunday Night * Monday Morning <br /><br />A notably bad actor, getting by on his (now fading) looks rather than any strong dramatic talent, Richard Gere has always occupied a rather curious position in the American Hollywood scene, always a sure bet in leading man roles who still holds a notable presence today. But nowadays he seems to have settled more into these sort of direct to DVD/limited release roles and as such maybe seems to be more settled in his forte now.<br /><br />He has to draw on some stern matter here as hardened, cynical case worker Earl Babbage, one such worker assigned to a few hundred sex offenders in one area of the US, who along with his new protégé Allison Allthrop (Claire Danes) must take to his latest case, delving into the abduction of a young woman while trying to forgive himself for a case he failed on ages ago.<br /><br />This is a certain dive into the darker side of humanity, treading on material definitely not for the squeamish or those looking for light viewing. And as such it's a pretty strong, compelling film, unflinching and not constrained by it's direct to DVD budget. The only thing really pulling it back is the overly used jittery, fast cutting camera sequences used in the more dramatic moments that look a bit corny after a while. But it's still some of the solidest material I've seen Gere in, relentlessly getting darker and more over the edge as it goes on. ***",positive,positive \n
9,4277,10,"""Ko to tamo peva"" is one of the best films I ever saw. A tragicomedy with very deep implications on the fate of humankind shown through the eyes of seemingly very plain and common people from a God-forsaken Serbian province just before the start of the World War II. I saw it in a small movie theater in Russia where the film had had a very limited distribution, and I had no chance to come across it ever since. It is such a pity that this excellent film is almost forgotten now. I searched for a VHS or DVD copy of it many times, and alas - could find none. I would be most grateful to other fans of this little gem of movie-making for a suggestion of the ways to purchase a copy.",positive,positive \n


In [15]:
from crawl4ai import WebCrawler
crawler = WebCrawler()
crawler.warmup()
result = crawler.run(url="https://cdn.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A005930&cID=&MenuYn=Y&ReportGB=&NewMenuID=11&stkGb=&strResearchYN=")
print(result.markdown)

[LOG] 🚀 Initializing LocalSeleniumCrawlerStrategy
[LOG] 🌤️  Warming up the WebCrawler
[LOG] 🌞 WebCrawler is ready to crawl
[LOG] 🚀 Crawling done for https://cdn.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A005930&cID=&MenuYn=Y&ReportGB=&NewMenuID=11&stkGb=&strResearchYN=, success: True, time taken: 0.4092729091644287 seconds
[LOG] 🚀 Content extracted for https://cdn.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A005930&cID=&MenuYn=Y&ReportGB=&NewMenuID=11&stkGb=&strResearchYN=, success: True, time taken: 0.1468198299407959 seconds
[LOG] 🔥 Extracting semantic blocks for https://cdn.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A005930&cID=&MenuYn=Y&ReportGB=&NewMenuID=11&stkGb=&strResearchYN=, Strategy: NoExtractionStrategy
[LOG] 🚀 Extraction done for https://cdn.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A005930&cID=&MenuYn=Y&ReportGB=&NewMenuID=11&stkGb=&strResearchYN=, time taken: 0.14783191680908203 seconds.
본문 바로가기 메뉴 바로가기

# 상장기업분석

리포트 조회 통합 모바일앱 리타민 출시리포트 조회 통합 모바일앱 리타민

In [None]:
from crawl4ai import WebCrawler
from crawl4ai.extraction_strategy import LLMExtractionStrategy
from pydantic import BaseModel, Field

def get_crawler():
    crawler = WebCrawler(verbose=True)
    crawler.warmup()
    return crawler

class PageSummary(BaseModel):
    title: str = Field(..., description="Title of the page.")
    summary: str = Field(..., description="Summary of the page.")
    brief_summary: str = Field(..., description="Brief summary of the page.")
    keywords: list = Field(..., description="Keywords assigned to the page.")

def summary_extraction(url, model_name='openai/gpt-4o'):
    result = get_crawler().run(
        url=url,
        word_count_threshold=1,
        extraction_strategy=LLMExtractionStrategy(
            provider= model_name, 
            api_token = os.getenv('OPENAI_API_KEY'), 
            schema=PageSummary.model_json_schema(),
            extraction_type="schema",
            apply_chunking=False,
            instruction=(
                "From the crawled content, extract the following details: "
                "1. Title of the page "
                "2. Summary of the page, which is a detailed summary "
                "3. Brief summary of the page, which is a paragraph text "
                "4. Keywords assigned to the page, which is a list of keywords. "
                'The extracted JSON format should look like this: '
                '{ "title": "Page Title", "summary": "Detailed summary of the page.", '
                '"brief_summary": "Brief summary in a paragraph.", "keywords": ["keyword1", "keyword2", "keyword3"] }'
            )
        ),
        bypass_cache=True,
    )
    result = result.extracted_content.encode('utf-8', errors='ignore').decode("unicode_escape")
    result_json = json.loads(result)
    return result_json