In [28]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize,sent_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
import string

file_path = 'Freedom of expression.txt'
with open(file_path, 'r') as file:
    text_data = file.read()
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()
def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    processed_tokens = []
    for word in tokens:
        if word not in stop_words and word not in string.punctuation:
            lemmatized_word = lemmatizer.lemmatize(word) 
            stemmed_word = stemmer.stem(lemmatized_word) 
            processed_tokens.append(stemmed_word)
    return ' '.join(processed_tokens)
preprocessed_text = preprocess_text(text_data)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform([preprocessed_text])
vocabulary = vectorizer.get_feature_names_out()
bow_array = X.toarray()
print("Vocabulary:\n", vocabulary)
print("\nBag of Words (Vectorization):\n", bow_array)

tfidf_vectorizer=TfidfVectorizer()
X_tfidf=tfidf_vectorizer.fit_transform([preprocessed_text])
tfidf_vocabulary=tfidf_vectorizer.get_feature_names_out()
tfidf_array=X_tfidf.toarray()
print("TF-IDF Vocabulary:",tfidf_vocabulary)
print("\nTF-IDF Array:",tfidf_array)

Vocabulary:
 ['1996' '2003' '2005' '2011' '2012' '302' '408' '430' '736' '85' 'absenc'
 'abus' 'accur' 'across' 'act' 'action' 'address' 'administr' 'affair'
 'affect' 'agenc' 'allow' 'along' 'also' 'anticip' 'apart' 'applic' 'argu'
 'argument' 'art' 'assert' 'assist' 'associ' 'assum' 'attain' 'attempt'
 'attribut' 'autocrat' 'autonomi' 'avail' 'avoid' 'balanc' 'basic' 'becom'
 'benefit' 'best' 'boast' 'break' 'bring' 'brought' 'cent' 'challeng'
 'check' 'civil' 'commit' 'commun' 'compet' 'comprehend' 'compris'
 'concept' 'conclud' 'conclus' 'confront' 'consider' 'constitut'
 'contemporari' 'control' 'core' 'correctli' 'corrupt' 'countri' 'crisi'
 'current' 'curtail' 'de' 'deliveri' 'democraci' 'democrat' 'depend'
 'deriv' 'desir' 'develop' 'directli' 'discov' 'discuss' 'dissemin'
 'divers' 'easili' 'econom' 'effect' 'el' 'elect' 'emerg' 'enhanc'
 'enshrin' 'equal' 'erupt' 'especi' 'even' 'exampl' 'exercis' 'expect'
 'expens' 'express' 'face' 'facilit' 'fact' 'fail' 'fear' 'final' 'fin

In [33]:
with open("Freedom of expression.txt",'r')as file:
    content=file.read()
sentences=sent_tokenize(content)
positive_keywords=['freedom','good','rights','express','benefits']
negative_keywords=['abuse','control','violence','intimidated']
def label_sentiment(text):
    text_lower=text.lower()
    if any(word in text_lower for word in positive_keywords):
        return "positive"
    elif any(word in text_lower for word in negative_keywords):
        return "negative"
    else:
        return "neutral"
labels=[label_sentiment(sentence) for sentence in sentences]
for sentence,label in zip(sentences,labels):
    print(f"Sentence:'{sentence}' => Label: '{label}'")

Sentence:'Freedom of expression is one of the basic tenets of human rights across the globe.' => Label: 'positive'
Sentence:'It comprises of the freedom to express oneself without the fear of being intimidated, free delivery of the speech, liberated press, freedom to disseminate and receive information and the right to silence as well as free sharing of opinions (Temperman 2011, p.736).' => Label: 'positive'
Sentence:'Freedom of expression or speech is valued across the world even though there have been, relentless attempts to curtail this provision by some illiberal and autocratic states.' => Label: 'positive'
Sentence:'It is also prudent to mention that other sub-elements of human rights such as the right to take part in elections and the freedoms of association and thought are under the umbrella of freedoms of speech and expression.' => Label: 'positive'
Sentence:'Also, the social rights of an individual are directly impacted by the freedom of expression rights.' => Label: 'positive