# Using Indic NLP Library
[https://github.com/anoopkunchukuttan/indic_nlp_library](https://github.com/anoopkunchukuttan/indic_nlp_library)

## Morphological Analysis

In [1]:
from indicnlp.morph import unsupervised_morph 

In [2]:
morph = unsupervised_morph.UnsupervisedMorphAnalyzer("bn")

In [25]:
text = u"""\
করা করেছিলাম করেছি করতে করেছিল হয়েছে হয়েছিল হয় হওয়ার হবে আবিষ্কৃত আবিষ্কার অভিষিক্ত অভিষেক অভিষেকের আমি আমার আমাদের তুমি তোমার তোমাদের বসা বসেছিল বসে বসি বসেছিলাম বস বসার\
"""

In [26]:
word_token = text.split(" ")

In [5]:
word_morph = []

for i in word_token:
    word_morph.append(morph.morph_analyze(i))

In [6]:
import pandas as pd

In [7]:
indic = pd.DataFrame({"1_Word": word_token, "2_Morpheme": word_morph})

In [8]:
indic

Unnamed: 0,1_Word,2_Morpheme
0,করা,[করা]
1,করেছিলাম,"[করে, ছিলাম]"
2,করেছি,"[করেছ, ি]"
3,করতে,"[কর, তে]"
4,করেছিল,[করেছিল]
5,হয়েছে,"[হয়ে, ছে]"
6,হয়েছিল,"[হয়ে, ছিল]"
7,হয়,[হয়]
8,হওয়ার,[হওয়ার]
9,হবে,[হবে]


## Transliteration

In [9]:
from indicnlp.transliterate.unicode_transliterate import ItransTransliterator

In [10]:
bangla_text = "ami apni tumi tomar tomader amar apnar apnader akash"

In [11]:
text_trans = ItransTransliterator.from_itrans(bangla_text, "bn")

In [12]:
print repr(text_trans).decode("unicode_escape")

u'অমি অপ্নি তুমি তোমর্ তোমদের্ অমর্ অপ্নর্ অপ্নদের্ অকশ্'


# Using Silpa
[https://github.com/libindic/Silpa-Flask](https://github.com/libindic/Silpa-Flask)

## Transliteration

In [13]:
from transliteration import getInstance 

In [14]:
trans = getInstance()

In [15]:
text_trans = trans.transliterate(bangla_text, "bn_IN")

Found  6  rules.
Found  6  rules.
Found  6  rules.
Found  6  rules.
Found  6  rules.
Found  6  rules.
Found  6  rules.
Found  6  rules.
Found  6  rules.


In [16]:
print repr(text_trans).decode("unicode_escape")

u'ওমী     অমোর    '


# Using BengaliStemmer
[https://github.com/gdebasis/BengaliStemmer](https://github.com/gdebasis/BengaliStemmer)

## Stemming

In [17]:
import rbs

In [18]:
word_stem1 = []

for i in word_token:
    word_stem1.append(rbs.stemWord(i, True))

In [19]:
bs1 = pd.DataFrame({"1_Word": word_token, "2_Stem": word_stem1})

In [20]:
bs1

Unnamed: 0,1_Word,2_Stem
0,করা,কর
1,করেছিলাম,করেছিলাম
2,করেছি,করেছ
3,করতে,করত
4,করেছিল,করেছিল
5,হয়েছে,হয়েছ
6,হয়েছিল,হয়েছিল
7,হয়,হ
8,হওয়ার,হ
9,হবে,হব


# Using BanglaStemmer
[https://github.com/rafi-kamal/Bangla-Stemmer](https://github.com/rafi-kamal/Bangla-Stemmer)

## Stemming

In [21]:
import jnius_config

jnius_config.set_classpath(".", "path to class")

In [22]:
from jnius import autoclass

In [23]:
cls = autoclass("RuleFileParser")

In [24]:
stemmer = cls()

In [27]:
word_stem2 = []

for i in word_token:
    word_stem2.append(stemmer.stemOfWord(i))

In [28]:
bs2 = pd.DataFrame({"1_Word": word_token, "2_Stem": word_stem2})

In [29]:
bs2

Unnamed: 0,1_Word,2_Stem
0,করা,ক
1,করেছিলাম,করেছিলাম
2,করেছি,করে
3,করতে,কর
4,করেছিল,কর
5,হয়েছে,হ
6,হয়েছিল,হ
7,হয়,হ
8,হওয়ার,হওয়
9,হবে,হব


# Using Avro
[https://github.com/kaustavdm/pyAvroPhonetic](https://github.com/kaustavdm/pyAvroPhonetic)

## Transliteration

In [30]:
from pyavrophonetic import avro

In [31]:
trans_text = avro.parse(bangla_text)

In [32]:
print repr(trans_text).decode("unicode_escape")

u'আমি আপ্নি তুমি তমার তমাদের আমার আপ্নার আপ্নাদের আকাশ'
