### Import important libraries

In [None]:
pip install nltk

In [None]:
import re
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer

### Sample Text

In [None]:
sample_text = '''
Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with 
the use of significant indentation.Python is dynamically typed and garbage-collected. It supports multiple programming
paradigms, including structured (particularly procedural), object-oriented and functional programming. It is often 
described as a "batteries included" language due to its comprehensive standard library.
'''

### Tokenization

In [None]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

In [None]:
sentences = sent_tokenize(sample_text)
sentences

In [None]:
words = [word_tokenize(sentence) for sentence in sentences]
words

### Lowercasing and Removing Special Characters

In [12]:
# Clean words using regex
cleaned_words = [[re.sub(r'[^a-zA-Z0-9]', '', word.lower()) for word in sentences] for sentence in words]
cleaned_words

[['pythonisahighlevelgeneralpurposeprogramminglanguage',
  'itsdesignphilosophyemphasizescodereadabilitywiththeuseofsignificantindentationpythonisdynamicallytypedandgarbagecollected',
  'itsupportsmultipleprogrammingparadigmsincludingstructuredparticularlyproceduralobjectorientedandfunctionalprogramming',
  'itisoftendescribedasabatteriesincludedlanguageduetoitscomprehensivestandardlibrary'],
 ['pythonisahighlevelgeneralpurposeprogramminglanguage',
  'itsdesignphilosophyemphasizescodereadabilitywiththeuseofsignificantindentationpythonisdynamicallytypedandgarbagecollected',
  'itsupportsmultipleprogrammingparadigmsincludingstructuredparticularlyproceduralobjectorientedandfunctionalprogramming',
  'itisoftendescribedasabatteriesincludedlanguageduetoitscomprehensivestandardlibrary'],
 ['pythonisahighlevelgeneralpurposeprogramminglanguage',
  'itsdesignphilosophyemphasizescodereadabilitywiththeuseofsignificantindentationpythonisdynamicallytypedandgarbagecollected',
  'itsupportsmultiplepro

### Removing Stopwords

In [None]:
stop_words = set(stopwords.words('english'))
stop_words

In [14]:
filtered_words = [[word for word in sentences if word not in stop_words] for sentence in cleaned_words]
filtered_words

[['\nPython is a high-level, general-purpose programming language.',
  'Its design philosophy emphasizes code readability with \nthe use of significant indentation.Python is dynamically typed and garbage-collected.',
  'It supports multiple programming\nparadigms, including structured (particularly procedural), object-oriented and functional programming.',
  'It is often \ndescribed as a "batteries included" language due to its comprehensive standard library.'],
 ['\nPython is a high-level, general-purpose programming language.',
  'Its design philosophy emphasizes code readability with \nthe use of significant indentation.Python is dynamically typed and garbage-collected.',
  'It supports multiple programming\nparadigms, including structured (particularly procedural), object-oriented and functional programming.',
  'It is often \ndescribed as a "batteries included" language due to its comprehensive standard library.'],
 ['\nPython is a high-level, general-purpose programming language.

### Stemming and Lemmatization

In [15]:
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

In [17]:
stemmed_words = [[stemmer.stem(word) for word in sentences] for sentence in filtered_words]
stemmed_words

[['\npython is a high-level, general-purpose programming language.',
  'its design philosophy emphasizes code readability with \nthe use of significant indentation.python is dynamically typed and garbage-collected.',
  'it supports multiple programming\nparadigms, including structured (particularly procedural), object-oriented and functional programming.',
  'it is often \ndescribed as a "batteries included" language due to its comprehensive standard library.'],
 ['\npython is a high-level, general-purpose programming language.',
  'its design philosophy emphasizes code readability with \nthe use of significant indentation.python is dynamically typed and garbage-collected.',
  'it supports multiple programming\nparadigms, including structured (particularly procedural), object-oriented and functional programming.',
  'it is often \ndescribed as a "batteries included" language due to its comprehensive standard library.'],
 ['\npython is a high-level, general-purpose programming language.

In [19]:
lemmatized_words = [[lemmatizer.lemmatize(word) for word in sentences] for sentence in filtered_words]
lemmatized_words

[['\nPython is a high-level, general-purpose programming language.',
  'Its design philosophy emphasizes code readability with \nthe use of significant indentation.Python is dynamically typed and garbage-collected.',
  'It supports multiple programming\nparadigms, including structured (particularly procedural), object-oriented and functional programming.',
  'It is often \ndescribed as a "batteries included" language due to its comprehensive standard library.'],
 ['\nPython is a high-level, general-purpose programming language.',
  'Its design philosophy emphasizes code readability with \nthe use of significant indentation.Python is dynamically typed and garbage-collected.',
  'It supports multiple programming\nparadigms, including structured (particularly procedural), object-oriented and functional programming.',
  'It is often \ndescribed as a "batteries included" language due to its comprehensive standard library.'],
 ['\nPython is a high-level, general-purpose programming language.

### Printing Processed Sentences

In [20]:
print("Original Sentences:")
for sentence in sentences:
    print(sentence)

Original Sentences:

Python is a high-level, general-purpose programming language.
Its design philosophy emphasizes code readability with 
the use of significant indentation.Python is dynamically typed and garbage-collected.
It supports multiple programming
paradigms, including structured (particularly procedural), object-oriented and functional programming.
It is often 
described as a "batteries included" language due to its comprehensive standard library.


In [21]:
print("\nProcessed Sentences (Lemmatized):")
for sentence in lemmatized_words:
    print(' '.join(sentence))


Processed Sentences (Lemmatized):

Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with 
the use of significant indentation.Python is dynamically typed and garbage-collected. It supports multiple programming
paradigms, including structured (particularly procedural), object-oriented and functional programming. It is often 
described as a "batteries included" language due to its comprehensive standard library.

Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with 
the use of significant indentation.Python is dynamically typed and garbage-collected. It supports multiple programming
paradigms, including structured (particularly procedural), object-oriented and functional programming. It is often 
described as a "batteries included" language due to its comprehensive standard library.

Python is a high-level, general-purpose programming language. Its design philoso