In [None]:
import nltk;
import string;
from nltk.corpus import stopwords;
from nltk.tokenize import word_tokenize, sent_tokenize;
from nltk.probability import FreqDist;
nltk.download('punkt');
nltk.download('stopwords');

paragraph = """Technology has transformed the way we live, work, and communicate. 
From smartphones to artificial intelligence, innovation continues to shape our future. 
The internet connects people across the globe in an instant. 
Automation and machine learning are changing industries and creating new opportunities. 
While technology can sometimes feel overwhelming, it offers incredible tools for solving real-world problems.""";

text_lower = paragraph.lower();
text_clean = text_lower.translate(str.maketrans('', '', string.punctuation));
print("Text in lowercase with removed punctuation is\n",text_clean);

words = word_tokenize(text_clean);
print("\nWord tokenization\n",words);
sentences = sent_tokenize(paragraph);
print("\nSentence tokenization\n",sentences);

word = word_tokenize(text_lower);
print("\nWord tokenization\n",word);
split = text_lower.split();
print("\n Split using python function\n",split);

stop_words = set(stopwords.words('english'));
filtered_words = [word for word in words if word not in stop_words];
print("\nText after removing stop words is\n",filtered_words);

freq_dist = FreqDist(filtered_words)
print("\nWord Frequency Counts\n");
for word, freq in freq_dist.items():
    print(f"{word}: {freq}");

In [None]:
import nltk;
import re;
from nltk.corpus import stopwords;
from nltk.tokenize import word_tokenize;
from nltk.stem import PorterStemmer;
from nltk.stem import WordNetLemmatizer;
nltk.download('punkt');
nltk.download('stopwords');
nltk.download('wordnet');

paragraph = """Technology is evolving faster than ever before. 
Smart devices are becoming essential parts of our daily lives. 
Artificial Intelligence helps businesses automate tasks and make smarter decisions. 
Virtual Reality is changing the way we experience games, movies, and education. 
Even healthcare is being transformed through wearable tech and data-driven diagnostics.""";

words_alpha = re.findall(r'\b[a-zA-Z]+\b', paragraph.lower());
print("Words with only alphabets (lowercase):\n", words_alpha)

stop_words = set(stopwords.words('english'));
filtered_words = [word for word in words_alpha if word not in stop_words];
print("\nFiltered Words (after removing stopwords):\n", filtered_words);

porter_stemmer = PorterStemmer();
stemmed_words = [porter_stemmer.stem(word) for word in filtered_words];
print("\nStemmed Words (using PorterStemmer):\n", stemmed_words);

lemmatizer = WordNetLemmatizer();
lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words];
print("\nLemmatized Words (using WordNetLemmatizer):\n", lemmatized_words);

In [None]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer;
import numpy as np;

texts = [
    "New tech innovations change the world every day.",
    "The best smartphone of 2025 is here, offering amazing features.",
    "Breaking news: Natural disaster causes widespread damage across the region."
];

vectorizer = CountVectorizer();
X_count = vectorizer.fit_transform(texts);
count_feature_names = vectorizer.get_feature_names_out();
print("Bag of Words Representation:");
print(X_count.toarray());
print("Feature Names (Words in BoW):", count_feature_names);

tfidf_vectorizer = TfidfVectorizer()
X_tfidf = tfidf_vectorizer.fit_transform(texts);
print("TF-IDF Scores: ");
print(X_tfidf.toarray());
tfidf_feature_names = tfidf_vectorizer.get_feature_names_out();
print("Feature Names (Words in TF-IDF): ",tfidf_feature_names);

print("\nTop 3 Keywords (TF-IDF Scores) for each Text:");
for i, text in enumerate(texts):
    print(f"\nText {i + 1}: {text}");
    tfidf_scores = X_tfidf[i].toarray().flatten();
    top_indices = tfidf_scores.argsort()[-3:][::-1];
    top_keywords = [(tfidf_feature_names[idx], tfidf_scores[idx]) for idx in top_indices];
    print("Top 3 Keywords (Word, TF-IDF Score):");
    for keyword, score in top_keywords:
        print(f"'{keyword}': {score:.4f}");


In [None]:
import nltk;
import re;
from sklearn.feature_extraction.text import TfidfVectorizer;
from sklearn.metrics.pairwise import cosine_similarity;
nltk.download('punkt');

text_ai = """Artificial Intelligence (AI) is revolutionizing industries by automating tasks and improving decision-making. 
It uses machine learning algorithms to process large datasets and make predictions. AI applications range from self-driving cars to voice assistants, 
and it continues to evolve rapidly.""";
text_blockchain = """Blockchain is a decentralized ledger technology that ensures secure transactions without intermediaries. 
It is most commonly known for being the backbone of cryptocurrencies like Bitcoin. Blockchain’s transparency and security make it a powerful tool for various industries, 
from finance to supply chains.""";

def preprocess_text(text):
    text = text.lower(); 
    text = re.sub(r'[^\w\s]', '', text);
    tokens = nltk.word_tokenize(text);
    return tokens;
tokens_ai = preprocess_text(text_ai);
print("Tokens for AI:", tokens_ai, "\n");
tokens_blockchain = preprocess_text(text_blockchain);
print("Tokens for Blockchain:", tokens_blockchain);

def jaccard_similarity(tokens1, tokens2):
    set1, set2 = set(tokens1), set(tokens2);
    intersection = set1.intersection(set2);
    union = set1.union(set2);
    return len(intersection) / len(union);

jaccard_sim = jaccard_similarity(tokens_ai, tokens_blockchain)
print("\nJaccard Similarity:", jaccard_sim);

vectorizer = TfidfVectorizer();
texts = [text_ai, text_blockchain];
tfidf_matrix = vectorizer.fit_transform(texts);
cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1]);
print("\nCosine Similarity:", cosine_sim[0][0]);

if jaccard_sim > cosine_sim:
    print("\nJaccard Similarity gives better insights in this case.");
else:
    print("\nCosine Similarity gives better insights in this case.");


In [None]:
from textblob import TextBlob;
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer;
from wordcloud import WordCloud;
import matplotlib.pyplot as plt;

review = "I recently bought the XYZ smartphone, and I am really impressed with its performance! The battery lasts all day, and the camera quality is outstanding. The display is vibrant and smooth, making it perfect for gaming and media consumption. Although the price is a bit high, I think it's totally worth it for all the features it offers. Highly recommend it!";

blob = TextBlob(review);
textblob_polarity = blob.sentiment.polarity;
print("TextBlob Polarity:", textblob_polarity);
textblob_subjectivity = blob.sentiment.subjectivity;
print("TextBlob Subjectivity:", textblob_subjectivity);

analyzer = SentimentIntensityAnalyzer();
vader_sentiment = analyzer.polarity_scores(review);
print("\nVADER Sentiment Scores:", vader_sentiment);
def classify_review(polarity_score):
    if polarity_score > 0.1:
        return "Positive";
    elif polarity_score < -0.1:
        return "Negative";
    else:
        return "Neutral";
classification = classify_review(textblob_polarity);
print("\nReview Classification (TextBlob):", classification);

positive_reviews = [review];
positive_reviews_text = " ".join(positive_reviews);
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(positive_reviews_text);
plt.figure(figsize=(10, 5));
plt.imshow(wordcloud, interpolation='bilinear');
plt.axis('off');
plt.show();