In [1]:
from pattern.web import Wikipedia, plaintext
from pattern.en import sentiment

# Fetch the text of the Wikipedia article
article = Wikipedia().article('Artificial Intelligence')
text = plaintext(article.string)

# Analyze the sentiment of the text
sentiment_score = sentiment(text)

print('Sentiment score:', sentiment_score)

Sentiment score: (0.0413736939997445, 0.4914729770696167)


In [2]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from pattern.web import Wikipedia, plaintext

# Create a list of (title, sentiment) tuples
# data = [
#     ("Healthcare", "positive"),
#     ("Warfare", "negative"),
#     ("Ethical Concerns", "negative"),
#     ("Improve Life", "positive"),
#     # Add more articles here...
# ]

data = [
    ("Artificial Intelligence", "positive"),
    ("Machine Learning", "positive"),
    ("Deep Learning", "positive"),
    ("Computer Science", "negative"),
    # Add more articles here...
]

# Fetch the text of the Wikipedia articles and associate it with the sentiment
data = [(plaintext(Wikipedia().article(title).string), sentiment) for title, sentiment in data]

# Split the data into the texts and the sentiments
texts, sentiments = zip(*data)

# Create a pipeline that converts the texts to a bag-of-words representation and then applies a Naive Bayes classifier
model = make_pipeline(CountVectorizer(), MultinomialNB())

# Train the model on the data
model.fit(texts, sentiments)

# Classify a new article
new_article = plaintext(Wikipedia().article('Artificial Intelligence').string)
print(model.predict([new_article]))

['positive']


In [7]:
from pattern.web import Crawler, DEPTH
from pattern.en import sentiment

class PoliticoCrawler(Crawler):
    def visit(self, link, source=None):
        print("visiting: %s from: %s" % (link.url, link.referrer))
        if source:  # If the source is HTML
            s = sentiment(source)
            print("sentiment: %s" % str(s))

    def fail(self, link):
        print("failed: %s" % link.url)

# Create a new crawler.
crawler = PoliticoCrawler(links=["https://www.bbc.com/"], domains=["bbc.com"], delay=1)

while len(crawler.visited) < 100:  # Crawler.visited is a dictionary of all URL's visited so far.
    crawler.crawl(cached=True, throttle=5)

visiting: https://www.bbc.com/ from: 
sentiment: (0.10392193982190648, 0.42923732772108264)
visiting: https://www.bbc.com/news from: https://www.bbc.com/
sentiment: (0.08329913676238326, 0.37323594321864184)
visiting: https://www.bbc.com/sport from: https://www.bbc.com/
sentiment: (0.1162070755820756, 0.3307037557037555)
visiting: https://www.bbc.com/business from: https://www.bbc.com/
sentiment: (0.05951350087637523, 0.42287673821007177)
visiting: https://www.bbc.com/innovation from: https://www.bbc.com/
sentiment: (0.06766936896672382, 0.4077020297709952)
visiting: https://www.bbc.com/culture from: https://www.bbc.com/
sentiment: (0.14561828211386496, 0.38211232627022096)
visiting: https://www.bbc.com/travel from: https://www.bbc.com/
sentiment: (0.14852557163509694, 0.4231848931013276)
visiting: https://www.bbc.com/future-planet from: https://www.bbc.com/
sentiment: (0.07119074859989344, 0.4354118104118101)
visiting: https://www.bbc.com/video from: https://www.bbc.com/
sentiment: (0

In [12]:
from pattern.web import Wikipedia, plaintext
from pattern.vector import Document, Model, KNN

# Create a model and a k-nearest neighbors classifier.
model = Model()
knn = KNN()

# Fetch the content of the Wikipedia pages of the 3 major religions.
wikipedia = Wikipedia(language="en")
religions = ["Christianity", "Islam", "Hinduism", "Secularism"]
for religion in religions:
    print(f"Fetching content for {religion}...")
    article = wikipedia.search(religion)
    if article:
        text = plaintext(article.string)
        doc = Document(text, type=religion)
        model.append(doc)

# Train the classifier on the documents in the model.
for document in model:
    knn.train(document)

# Classify a new document.
new_doc = Document('Holy spirit')
print(knn.classify(new_doc))

Fetching content for Christianity...
Fetching content for Islam...
Fetching content for Hinduism...
Fetching content for Secularism...
Christianity


In [13]:
from pattern.web import Crawler, DEPTH, plaintext
from pattern.vector import Document

class BBC_Crawler(Crawler):
    def visit(self, link, source=None):
        print("visiting: %s from: %s" % (link.url, link.referrer))
        if source:  # If the source is HTML
            text = plaintext(source)
            doc = Document(text)
            classification = knn.classify(doc)
            print(f"This page is classified as: {classification}")

    def fail(self, link):
        print("failed: %s" % link.url)

# Create a new crawler.
crawler = BBC_Crawler(links=["https://www.bbc.com/"], domains=["bbc.com"], delay=1)

while len(crawler.visited) < 100:  # Crawler.visited is a dictionary of all URL's visited so far.
    crawler.crawl(cached=True, throttle=5)

visiting: https://www.bbc.com/ from: 
This page is classified as: Christianity
visiting: https://cloud.email.bbc.com/US_Election_Unspun_newsletter_signup?&at_bbc_team=studios&at_medium=display&at_objective=acquisition&at_ptr_type=&at_ptr_name=bbc.comhp&at_format=Module&at_link_origin=homepage&at_campaign=uselectionunspun&at_campaign_type=owned & from: https://www.bbc.com/
This page is classified as: Christianity
visiting: https://www.bbc.com/news from: https://www.bbc.com/
This page is classified as: Christianity
visiting: https://cloud.email.bbc.com/InHistory_newsletter_signup?&at_bbc_team=studios&at_medium=display&at_objective=acquisition&at_ptr_type=&at_ptr_name=bbc.comhp&at_format=Module&at_link_origin=homepage&at_campaign=inhistory&at_campaign_type=owned& from: https://www.bbc.com/
This page is classified as: Islam
visiting: https://www.bbc.com/sport from: https://www.bbc.com/
This page is classified as: Christianity
visiting: https://shop.bbc.com/ from: https://www.bbc.com/
This 

KeyboardInterrupt: 

In [2]:
from pattern.web import Google, URL, plaintext, Crawler, DEPTH
from pattern.vector import Document, Model, KNN
from config import API_KEY
from urllib.parse import urlparse

class CustomCrawler(Crawler):
    def visit(self, link, source=None):
        print(f"Visiting: {link.url} from: {link.referrer}")
        if source:  # If the source is HTML
            text = plaintext(source)
            doc = Document(text)
            model.append(doc)
            knn.train(doc)

    def fail(self, link):
        print(f"Failed: {link.url}")

# Create a model and a k-nearest neighbors classifier.
model = Model()
knn = KNN()

# Search the web for articles related to the 3 major religions.
google = Google(license=API_KEY)  # Your license key here.
religions = ["Christian", "Islamic", "Personal"]
for religion in religions:
    search_term = f"{religion} banking services"
    print(f"Searching for {search_term}...")
    for i, result in enumerate(google.search(search_term)):
        if i >= 10:  # Limit to 10 results per religion.
            break
        # Create a new crawler for each search result.
        crawler = CustomCrawler(links=[result.url], delay=1)
        # Crawl the first 10 links of each search result.
        for _ in range(10):
            crawler.crawl(cached=True, throttle=5)

# Classify a new document.
new_doc = Document('Holy spirit')
print(knn.classify(new_doc))

Searching for Christian banking services...
Visiting: https://www.adelfibanking.com/ from: 
Visiting: https://www.adelfibanking.com/#content-anchor from: https://www.adelfibanking.com/
Visiting: https://www.adelfibanking.com/personal/resources/security-and-fraud-prevention from: https://www.adelfibanking.com/
Visiting: https://www.adelfibanking.com/logout from: https://www.adelfibanking.com/
Visiting: https://www.adelfibanking.com/askbetter from: https://www.adelfibanking.com/
Visiting: https://www.adelfibanking.com/personal/loans-credit-card/auto-loans from: https://www.adelfibanking.com/
Visiting: https://www.adelfibanking.com/personal/checking-savings/checking from: https://www.adelfibanking.com/
Visiting: https://www.adelfibanking.com/about-us from: https://www.adelfibanking.com/
Visiting: https://www.adelfibanking.com/atms-locations from: https://www.adelfibanking.com/
Visiting: https://www.adelfibanking.com/membership-impact from: https://www.adelfibanking.com/
Visiting: https://

KeyboardInterrupt: 