In [2]:
import re
import pandas as pd
from langdetect import detect
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from langdetect import DetectorFactory

# Ensure consistent results with langdetect
DetectorFactory.seed = 0

# Sample Data (as if read from CSV)
data = {
    'Text': [
        "Hello, how are you?",
        "Bonjour, comment ça va?",
        "नमस्ते, आप कैसे हैं?",

    ]
}

# Convert the sample data into a DataFrame
df = pd.DataFrame(data)

# Language detection function
def detect_language(text):
    try:
        return detect(text)
    except:
        return "unknown"  # In case the language cannot be detected

# Word count function (tokenizes text into words)
def word_count(text):
    words = word_tokenize(text)
    return len(words)

# Sentence count function (tokenizes text into sentences)
def sentence_count(text):
    sentences = sent_tokenize(text)
    return len(sentences)

# Apply the functions to the dataframe
df['language'] = df['Text'].apply(detect_language)
df['word_count'] = df['Text'].apply(word_count)
df['sentence_count'] = df['Text'].apply(sentence_count)
df['word_tokens'] = df['Text'].apply(lambda x: word_tokenize(x))
df['sentence_tokens'] = df['Text'].apply(lambda x: sent_tokenize(x))

# Display the processed DataFrame (optional)
print(df)


                      Text language  word_count  sentence_count  \
0      Hello, how are you?       en           6               1   
1  Bonjour, comment ça va?       fr           6               1   
2     नमस्ते, आप कैसे हैं?       hi           6               1   

                        word_tokens            sentence_tokens  
0      [Hello, ,, how, are, you, ?]      [Hello, how are you?]  
1  [Bonjour, ,, comment, ça, va, ?]  [Bonjour, comment ça va?]  
2     [नमस्ते, ,, आप, कैसे, हैं, ?]     [नमस्ते, आप कैसे हैं?]  


In [2]:
import nltk
from nltk.stem import SnowballStemmer
from nltk.tokenize import word_tokenize

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('punkt_tab') # Download the punkt_tab resource

# Create an instance of the SnowballStemmer for German
stemmer = SnowballStemmer("german")

# Accept a sentence input from the user
sentence = input("Enter a German sentence: ")

# Tokenize the sentence into words
words = word_tokenize(sentence)

# Perform stemming for each word in the list
stemmed_words = [stemmer.stem(word) for word in words]

# Output the original and stemmed words
print("\nOriginal and Stemmed Words:")
for original, stemmed in zip(words, stemmed_words):
    print(f"Original: {original}, Stemmed: {stemmed}")





[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\felin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\felin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


Enter a German sentence:  Ich laufe gerne im Park.



Original and Stemmed Words:
Original: Ich, Stemmed: ich
Original: laufe, Stemmed: lauf
Original: gerne, Stemmed: gern
Original: im, Stemmed: im
Original: Park, Stemmed: park
Original: ., Stemmed: .


In [3]:
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('punkt_tab')  # This may not be necessary, but keeping it for completeness

# Accept sentence from the user
sentence = input("Please enter a sentence: ")

# Tokenize the sentence into words
words = word_tokenize(sentence)

# Perform POS tagging
pos_tags = pos_tag(words)

# Print the POS tags
print(pos_tags)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\felin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\felin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\felin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


Please enter a sentence:  "The quick brown fox jumps over the lazy dog."


[('``', '``'), ('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.'), ("''", "''")]


In [1]:
# Option 2: Word Embeddings (Word2Vec)

from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize, sent_tokenize
import nltk

nltk.download('punkt')

# Sample paragraph
text = """
Natural language processing (NLP) is a field of artificial intelligence. 
It focuses on the interaction between computers and humans through language.
"""

# Tokenize into sentences, then into words
sentences = sent_tokenize(text)
tokenized_sentences = [word_tokenize(sentence.lower()) for sentence in sentences]

# Train Word2Vec model
model = Word2Vec(sentences=tokenized_sentences, vector_size=100, window=5, min_count=1, workers=4)

# Access word vector
word = 'language'
if word in model.wv:
    print(f"Vector for '{word}':\n", model.wv[word])
else:
    print(f"'{word}' not found in the vocabulary.")

# Find similar words
print("\nWords similar to 'language':")
print(model.wv.most_similar('language'))


Vector for 'language':
 [-5.3646474e-04  2.3810801e-04  5.1115868e-03  9.0194996e-03
 -9.3105817e-03 -7.1176412e-03  6.4591132e-03  8.9828968e-03
 -5.0157169e-03 -3.7675719e-03  7.3834234e-03 -1.5338082e-03
 -4.5329770e-03  6.5577170e-03 -4.8615970e-03 -1.8221879e-03
  2.8838872e-03  9.8978775e-04 -8.2891919e-03 -9.4487956e-03
  7.3142950e-03  5.0697206e-03  6.7561041e-03  7.6594559e-04
  6.3553466e-03 -3.4018364e-03 -9.5542887e-04  5.7787406e-03
 -7.5193024e-03 -3.9333673e-03 -7.5123105e-03 -9.3613472e-04
  9.5457397e-03 -7.3263012e-03 -2.3354592e-03 -1.9433126e-03
  8.0798836e-03 -5.9388769e-03  4.6167683e-05 -4.7591492e-03
 -9.6119754e-03  5.0150724e-03 -8.7612551e-03 -4.3907915e-03
 -4.3744913e-05 -3.0390202e-04 -7.6636383e-03  9.6157677e-03
  4.9826694e-03  9.2392573e-03 -8.1592631e-03  4.4972054e-03
 -4.1479236e-03  8.2272000e-04  8.5027330e-03 -4.4630114e-03
  4.5193853e-03 -6.7860056e-03 -3.5441772e-03  9.4036534e-03
 -1.5820283e-03  3.2915483e-04 -4.1401330e-03 -7.6809111e-03


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\felin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
from sklearn.feature_extraction.text import CountVectorizer
documents = ["This is the first document.",
			"This document is the second document.",
			"And this is the third one.",
			"Is this the first document?"]

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(documents)
feature_names = vectorizer.get_feature_names_out()

print("Bag-of-Words Matrix:")
print(X.toarray())
print("Vocabulary (Feature Names):", feature_names)





Bag-of-Words Matrix:
[[0 1 1 1 0 0 1 0 1]
 [0 2 0 1 0 1 1 0 1]
 [1 0 0 1 1 0 1 1 1]
 [0 1 1 1 0 0 1 0 1]]
Vocabulary (Feature Names): ['and' 'document' 'first' 'is' 'one' 'second' 'the' 'third' 'this']


In [5]:
import nltk

# Download the 'wordnet' resource
nltk.download('wordnet')

# import these modules
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

def lemmatize_word(word, pos=None):
    if pos:
        return lemmatizer.lemmatize(word, pos=pos)
    else:
        return lemmatizer.lemmatize(word)

def main():
    while True:
        # Display the menu
        print("\n--- Lemmatizer Menu ---")
        print("1. Lemmatize a word without POS tag")
        print("2. Lemmatize a word with POS tag (adjective)")
        print("3. Lemmatize a word with POS tag (verb)")
        print("4. Exit")

        choice = input("Enter your choice (1/2/3/4): ")

        if choice == '1':
            word = input("Enter the word to lemmatize: ")
            print(f"Lemmatized word: {lemmatize_word(word)}")

        elif choice == '2':
            word = input("Enter the word to lemmatize: ")
            print(f"Lemmatized word (as adjective): {lemmatize_word(word, pos='a')}")

        elif choice == '3':
            word = input("Enter the word to lemmatize: ")
            print(f"Lemmatized word (as verb): {lemmatize_word(word, pos='v')}")

        elif choice == '4':
            print("Exiting...")
            break

        else:
            print("Invalid choice, please try again.")

if __name__ == "__main__":
    main()





[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\felin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!



--- Lemmatizer Menu ---
1. Lemmatize a word without POS tag
2. Lemmatize a word with POS tag (adjective)
3. Lemmatize a word with POS tag (verb)
4. Exit


Enter your choice (1/2/3/4):  1
Enter the word to lemmatize:  studies


Lemmatized word: study

--- Lemmatizer Menu ---
1. Lemmatize a word without POS tag
2. Lemmatize a word with POS tag (adjective)
3. Lemmatize a word with POS tag (verb)
4. Exit


Enter your choice (1/2/3/4):  2
Enter the word to lemmatize:  running


Lemmatized word (as adjective): running

--- Lemmatizer Menu ---
1. Lemmatize a word without POS tag
2. Lemmatize a word with POS tag (adjective)
3. Lemmatize a word with POS tag (verb)
4. Exit


Enter your choice (1/2/3/4):  3
Enter the word to lemmatize:  running


Lemmatized word (as verb): run

--- Lemmatizer Menu ---
1. Lemmatize a word without POS tag
2. Lemmatize a word with POS tag (adjective)
3. Lemmatize a word with POS tag (verb)
4. Exit


Enter your choice (1/2/3/4):  4


Exiting...


In [1]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from collections import Counter

# Download required resources
nltk.download('punkt')
nltk.download('vader_lexicon')  # Download the VADER lexicon
nltk.download('punkt_tab')  # Download the punkt_tab data for tokenization

# Initialize SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

# Basic emotion dictionary
emotion_dict = {
    'anger': ['angry', 'mad', 'rage'],
    'joy': ['happy', 'joyful', 'excited'],
    'sadness': ['sad', 'unhappy', 'depressed'],
    'fear': ['scared', 'afraid', 'terrified'],
    'surprise': ['surprised', 'shocked', 'amazed'],
    'disgust': ['disgusted', 'gross']
}

# Sentiment Analysis using VADER
def analyze_sentiment(text):
    sentiment_scores = sia.polarity_scores(text)
    return 'POSITIVE' if sentiment_scores['compound'] >= 0.05 else ('NEGATIVE' if sentiment_scores['compound'] <= -0.05 else 'NEUTRAL')

# Emotion Detection based on keywords
def analyze_emotion(text):
    words = word_tokenize(text.lower())
    word_counts = Counter(words)
    emotion_scores = {emotion: sum(word_counts[key] for key in keywords) for emotion, keywords in emotion_dict.items()}
    return max(emotion_scores, key=emotion_scores.get) if max(emotion_scores.values()) > 0 else 'Neutral'

# Function to analyze both sentiment and emotion
def analyze_text(text):
    sentiment = analyze_sentiment(text)
    emotion = analyze_emotion(text)
    return sentiment, emotion

# Menu-driven program
def menu():
    print("\nWelcome to Sentiment and Emotion Analyzer")
    print("1. Analyze Sentiment")
    print("2. Analyze Emotion")
    print("3. Analyze Both Sentiment and Emotion")
    print("4. Exit")

    while True:
        choice = input("\nEnter your choice (1/2/3/4): ")

        if choice == '1':
            text = input("\nEnter text to analyze sentiment: ")
            sentiment = analyze_sentiment(text)
            print(f"Sentiment: {sentiment}")

        elif choice == '2':
            text = input("\nEnter text to analyze emotion: ")
            emotion = analyze_emotion(text)
            print(f"Emotion: {emotion}")

        elif choice == '3':
            text = input("\nEnter text to analyze both sentiment and emotion: ")
            sentiment, emotion = analyze_text(text)
            print(f"Sentiment: {sentiment}, Emotion: {emotion}")

        elif choice == '4':
            print("Exiting the program. Goodbye!")
            break

        else:
            print("Invalid choice. Please select a valid option.")

# Call the menu function to start the program
if __name__ == "__main__":
    menu()




[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\felin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\felin\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\felin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!



Welcome to Sentiment and Emotion Analyzer
1. Analyze Sentiment
2. Analyze Emotion
3. Analyze Both Sentiment and Emotion
4. Exit

Enter your choice (1/2/3/4): 1

Enter text to analyze sentiment: I am happy to visit you .
Sentiment: POSITIVE

Enter your choice (1/2/3/4): 4
Exiting the program. Goodbye!
