# **Implement POS tagging on a text corpus using an NLP library (like NLTK or SpaCy). Analyze the grammatical structure of sentences using syntactic/dependency parsing.**

In [None]:
import spacy

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
text = "Data science is a multidisciplinary field that combines mathematics, statistics, computer science, and domain expertise to extract actionable insights from vast amounts of raw data. It involves systematically collecting, cleaning, and analyzing structured and unstructured datasets to uncover hidden patterns and predict future trends. By leveraging advanced technologies like machine learning, artificial intelligence, and cloud computing, data science empowers organizations across sectors like healthcare and finance to make evidence-based strategic decisions"

In [None]:
doc=nlp(text)

#POS tagging

In [None]:
for token in doc:
    print(f"{token.text:15}  POS: {token.pos_:10}  TAG: {token.tag_:10}  DEP: {token.dep_}  Description: {spacy.explain(token.tag_)}")

Data             POS: NOUN        TAG: NN          DEP: compound  Description: noun, singular or mass
science          POS: NOUN        TAG: NN          DEP: nsubj  Description: noun, singular or mass
is               POS: AUX         TAG: VBZ         DEP: ROOT  Description: verb, 3rd person singular present
a                POS: DET         TAG: DT          DEP: det  Description: determiner
multidisciplinary  POS: ADJ         TAG: JJ          DEP: amod  Description: adjective (English), other noun-modifier (Chinese)
field            POS: NOUN        TAG: NN          DEP: attr  Description: noun, singular or mass
that             POS: PRON        TAG: WDT         DEP: nsubj  Description: wh-determiner
combines         POS: VERB        TAG: VBZ         DEP: relcl  Description: verb, 3rd person singular present
mathematics      POS: NOUN        TAG: NNS         DEP: dobj  Description: noun, plural
,                POS: PUNCT       TAG: ,           DEP: punct  Description: punctuation mar

#Dependency Parsing

In [None]:
for token in doc:
    print(f"{token.text:12} <--{token.dep_:10}-- {token.head.text}")

Data         <--compound  -- science
science      <--nsubj     -- is
is           <--ROOT      -- is
a            <--det       -- field
multidisciplinary <--amod      -- field
field        <--attr      -- is
that         <--nsubj     -- combines
combines     <--relcl     -- field
mathematics  <--dobj      -- combines
,            <--punct     -- mathematics
statistics   <--conj      -- mathematics
,            <--punct     -- statistics
computer     <--compound  -- science
science      <--conj      -- statistics
,            <--punct     -- science
and          <--cc        -- science
domain       <--compound  -- expertise
expertise    <--conj      -- science
to           <--aux       -- extract
extract      <--advcl     -- combines
actionable   <--amod      -- insights
insights     <--dobj      -- extract
from         <--prep      -- insights
vast         <--amod      -- amounts
amounts      <--pobj      -- from
of           <--prep      -- amounts
raw          <--amod      -- data
da

#Sentence wise grammar parsing

In [None]:
for sent in doc.sents:
    print("Sentence:", sent.text)
    for token in sent:
        print(f"  {token.text:15} {token.pos_:10} {token.dep_:10} --> {token.head.text}")
    print()

Sentence: Data science is a multidisciplinary field that combines mathematics, statistics, computer science, and domain expertise to extract actionable insights from vast amounts of raw data.
  Data            NOUN       compound   --> science
  science         NOUN       nsubj      --> is
  is              AUX        ROOT       --> is
  a               DET        det        --> field
  multidisciplinary ADJ        amod       --> field
  field           NOUN       attr       --> is
  that            PRON       nsubj      --> combines
  combines        VERB       relcl      --> field
  mathematics     NOUN       dobj       --> combines
  ,               PUNCT      punct      --> mathematics
  statistics      NOUN       conj       --> mathematics
  ,               PUNCT      punct      --> statistics
  computer        NOUN       compound   --> science
  science         NOUN       conj       --> statistics
  ,               PUNCT      punct      --> science
  and             CCONJ      cc

#Visualization

In [None]:
from spacy import displacy

In [None]:
displacy.render(doc, style="dep", jupyter=True)