<a href="https://colab.research.google.com/github/sumyuck/ML-learning/blob/main/nlc/NLC_p_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import re

# Example text
text = "Contact us at support@example.com or call us at 123-456-7890."

# Pattern matching: Find all email addresses in the text
email_pattern = r'\S+@\S+'
emails = re.findall(email_pattern, text)
print("Found email addresses:", emails)

# Text extraction: Extract the phone number
phone_pattern = r'\d{3}-\d{3}-\d{4}'
phone_number = re.search(phone_pattern, text)
if phone_number:
  print("Found phone number:", phone_number.group(0))
else:
  print("Phone number not found.")

Found email addresses: ['support@example.com']
Found phone number: 123-456-7890


In [None]:
import nltk

# Download necessary NLTK data (if you haven't already)
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt_tab')

from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer

text = "NLTK is a powerful library for natural language processing. It helps with tokenization, stemming, and lemmatization."

# Tokenization
words = word_tokenize(text)
sentences = sent_tokenize(text)

print("Word tokens:", words)
print("Sentence tokens:", sentences)

# Stemming
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in words]
print("Stemmed words:", stemmed_words)

# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
print("Lemmatized words:", lemmatized_words)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Word tokens: ['NLTK', 'is', 'a', 'powerful', 'library', 'for', 'natural', 'language', 'processing', '.', 'It', 'helps', 'with', 'tokenization', ',', 'stemming', ',', 'and', 'lemmatization', '.']
Sentence tokens: ['NLTK is a powerful library for natural language processing.', 'It helps with tokenization, stemming, and lemmatization.']
Stemmed words: ['nltk', 'is', 'a', 'power', 'librari', 'for', 'natur', 'languag', 'process', '.', 'it', 'help', 'with', 'token', ',', 'stem', ',', 'and', 'lemmat', '.']
Lemmatized words: ['NLTK', 'is', 'a', 'powerful', 'library', 'for', 'natural', 'language', 'processing', '.', 'It', 'help', 'with', 'tokenization', ',', 'stemming', ',', 'and', 'lemmatization', '.']


In [None]:
import spacy

# Load the English language model
nlp = spacy.load("en_core_web_sm")

text = "Apple is looking at buying U.K. startup for $1 billion."

# Process the text
doc = nlp(text)

# Named Entity Recognition (NER)
print("Named Entities:")
for ent in doc.ents:
    print(f"{ent.text} ({ent.label_})")

# Dependency Parsing
print("\nDependency Parsing:")
for token in doc:
    print(f"{token.text} ({token.pos_}) - {token.dep_} -> {token.head.text}")

Named Entities:
Apple (ORG)
U.K. (GPE)
$1 billion (MONEY)

Dependency Parsing:
Apple (PROPN) - nsubj -> looking
is (AUX) - aux -> looking
looking (VERB) - ROOT -> looking
at (ADP) - prep -> looking
buying (VERB) - pcomp -> at
U.K. (PROPN) - nsubj -> startup
startup (VERB) - ccomp -> buying
for (ADP) - prep -> startup
$ (SYM) - quantmod -> billion
1 (NUM) - compound -> billion
billion (NUM) - pobj -> for
. (PUNCT) - punct -> looking


In [None]:
from textblob import TextBlob
from googletrans import Translator

text = "TextBlob is a great library for sentiment analysis and translation."

# Sentiment Analysis
blob = TextBlob(text)
sentiment = blob.sentiment
print(f"Sentiment of the text: {sentiment}")

# Translation
translator = Translator()
translated_text = translator.translate(text_to_translate, dest='es')
print(f"Translated text (Spanish): {translated_text}")

Sentiment of the text: Sentiment(polarity=0.8, subjectivity=0.75)
Translated text (Spanish): Translated(src=en, dest=es, text=¿Hola, cómo estás?, pronunciation=None, extra_data="{'confiden...")
