In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'tagger:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4643750%2F7905730%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240322%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240322T110439Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D6fefebc951529f8520e88e72bfed553e24d52f5ecc200d866b198d24cd0363a3af410640d08b376f8ffcb1faac3adf5847c0e8c2f39d1bebff20823c176f711f671205dfe567d1598c4d40b86047299c65ca30ddfd167154b7e5ed2522291a6921c3f9d506a366fce2fc65f44d01b071d8eb222e18dd5c4abb9eb8c2080bae017c5eb154dc6be851b0169e248631f3c3170ac87c35903ad2510c027d619b57cf6740c9c15093de3d451b893c679849164f1bc6c0148d71bfaeef68b9eae2c1375b89d353a4f692f3219bf5187ea15f69c17e895112b7ef599ac6aa73bc3db0b477411a8d2b133b696f6c672f8e207a7a0845860ff20e673dbc8cd7de1e7c0f56'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/tagger/nlpp.csv


In [None]:
import pandas as pd
import numpy as np

In [None]:
file_path = '/kaggle/input/tagger/nlpp.csv'
data = pd.read_csv(file_path)
data[:20]

Unnamed: 0,word,pos
0,ఆ,DEM
1,తరువాత,NST
2,హైదరాబాదుకు,NNP
3,బదిలీ,NN
4,అయ్యాడు,VM
5,</s>,END
6,<s>,START
7,ఈ,DEM
8,గ్రామంలో,NN
9,ప్రజల,NN


In [None]:
data.columns

Index(['word', 'pos'], dtype='object')

In [None]:
data['pos']

0       DEM
1       NST
2       NNP
3        NN
4        VM
       ... 
9061     NN
9062     VM
9063     RB
9064     VM
9065    END
Name: pos, Length: 9066, dtype: object

In [None]:
words = list(data['word'])
pos = list(data['pos'])

In [None]:
print(words[:10])
print(pos[:10])

['ఆ', 'తరువాత', 'హైదరాబాదుకు', 'బదిలీ', 'అయ్యాడు', '</s>', '<s>', 'ఈ', 'గ్రామంలో', 'ప్రజల']
['DEM', 'NST', 'NNP', 'NN', 'VM', 'END', 'START', 'DEM', 'NN', 'NN']


In [None]:
print(len(words))
print(len(pos))

9066
9066


In [None]:
telugu_sen = []
telugu_pos = []
temp = ["<s>"]
temp_pos = ["START"]

for i in range(9066):
    temp.append(words[i])
    temp_pos.append(pos[i])
    if words[i] == '</s>':
        telugu_sen.append(temp)
        telugu_pos.append(temp_pos)
        temp = []
        temp_pos = []

In [None]:
print(telugu_sen[:50])
print(telugu_pos[:2])

[['<s>', 'ఆ', 'తరువాత', 'హైదరాబాదుకు', 'బదిలీ', 'అయ్యాడు', '</s>'], ['<s>', 'ఈ', 'గ్రామంలో', 'ప్రజల', 'ప్రధాన', 'వృత్తి', 'వ్యవసాయం', '</s>'], ['<s>', 'కుత్బుల్లాపూర్ై\x80\x8c', 'ఆంధ్ర', 'ప్రదేశ్', 'రాష్ట్రములోని', 'రంగారెడ్డి', 'జిల్లాకు', 'చెందిన', 'ఒక', 'మండలము', '</s>'], ['<s>', 'జ్ఞానపీఠ', 'పురస్కారం', 'గ్రహీత', 'విశ్వనాథ', 'సత్యనారాయణ', '</s>'], ['<s>', 'ఈ', 'గ్రామము', 'కోస్గి', 'నుంచి', 'మద్దూరు', 'వెళ్ళు', 'మార్గములో', 'కలదు', '</s>'], ['<s>', 'ఇక్కడ', 'కేవలం', 'ఐదవ', 'తరగతి', 'వరకు', 'మాత్రమే', 'పాఠశాల', 'సౌకర్యం', 'ఉంది', '</s>'], ['<s>', 'వికీపీడియా', 'సభ్యులు', 'రవిచంద్ర', 'మరియు', 'కాసుబాబు', 'మరియు', 'వికీపీడియా', 'అజ్ఞాత', 'సభ్యులు', 'కృతిపై', 'ఆధారితం', '</s>'], ['<s>', 'రైలు', 'రవాణా', 'వ్యవస్థ', 'పరిమాణం', 'క్రమంలో', 'దేశాల', 'జాబితా', 'ఇక్కడ', 'ఇవ్వబడింది', '</s>'], ['<s>', 'హిందూ', 'సంఘం', 'ఒక', 'కులాల', 'కూటమి', '</s>'], ['<s>', 'దయచేసి', 'ఏదో', 'ఒక', 'పేజీకి', 'లింకు', 'పెట్టండి', '</s>'], ['<s>', 'జిల్లాలో', 'ముఖ్యమైన', 'గ్రామాలలో', 'ఇది', 'ఒకటి', '</s>'], ['<s>'

In [None]:
tags = list(data['pos'].unique())
print(tags)
print(len(tags))

['DEM', 'NST', 'NNP', 'NN', 'VM', 'END', 'START', 'JJ', 'PSP', 'QO', 'CC', 'WQ', 'PRP', 'QC', 'RB', 'SYM', 'INTF', 'UT', 'RP', 'QF', 'RDP', 'CL']
22


In [None]:
word = list(data['word'].unique())
print(len(word))

2269


In [None]:
pos_to_no = {}
no_to_pos = {}

for i, tag in enumerate(tags):
    no_to_pos[i] = tag
    pos_to_no[tag] = i

print(no_to_pos)
print(pos_to_no)

{0: 'DEM', 1: 'NST', 2: 'NNP', 3: 'NN', 4: 'VM', 5: 'END', 6: 'START', 7: 'JJ', 8: 'PSP', 9: 'QO', 10: 'CC', 11: 'WQ', 12: 'PRP', 13: 'QC', 14: 'RB', 15: 'SYM', 16: 'INTF', 17: 'UT', 18: 'RP', 19: 'QF', 20: 'RDP', 21: 'CL'}
{'DEM': 0, 'NST': 1, 'NNP': 2, 'NN': 3, 'VM': 4, 'END': 5, 'START': 6, 'JJ': 7, 'PSP': 8, 'QO': 9, 'CC': 10, 'WQ': 11, 'PRP': 12, 'QC': 13, 'RB': 14, 'SYM': 15, 'INTF': 16, 'UT': 17, 'RP': 18, 'QF': 19, 'RDP': 20, 'CL': 21}


In [None]:
# now -- row, next -- col

In [None]:
transition_matrix = {}
emission_matrix = {}
pos_sum = {}
for i in range(len(telugu_sen)):
    for j in range(len(telugu_sen[i]) - 1):

        word, pos = telugu_sen[i][j], telugu_pos[i][j]
        nextWord, nextPos = telugu_sen[i][j + 1], telugu_pos[i][j + 1]

        #transition matrix
        if (pos, nextPos) in transition_matrix:
            transition_matrix[(pos, nextPos)] +=  transition_matrix[(pos, nextPos)] + 1
        else:
             transition_matrix[(pos, nextPos)] = 1

        # denominatinor
        if pos not in pos_sum:
            pos_sum[pos] = 1
        else:
            pos_sum[pos] += pos_sum[pos] + 1

        #emision matrix
        if (pos, word) in emission_matrix:
            emission_matrix[(pos, word)] += emission_matrix[(pos, word)] + 1
        else:
            emission_matrix[(pos, word)] = 1



In [None]:
print(transition_matrix)

{('START', 'DEM'): 5444517870735015415413993718908291383295, ('DEM', 'NST'): 8191, ('NST', 'NNP'): 16383, ('NNP', 'NN'): 21661481985318866090456360813617841433097164651373566993519371723551728967231450179999800047688590453885868835635965404913860607, ('NN', 'VM'): 94758184344525691842589080106353915726128296943157752144717531617800961467674370503593652882607817257720198406807316479868870852301929589321550737002025216015896910157522577243058183937475491017166931103132108688408987234729983, ('VM', 'END'): 483067190377157293086918986366498418037365916213304374832154406431439892786195053067024220822740322245307952003937772147170634832630373456967863584183385093587122601852927, ('DEM', 'NN'): 6427752177035961102167848369364650410088811975131171341205503, ('NN', 'NN'): 1045306387854636870253908275374199688241601463566149140121811569475858326273063486923488404947071351458226156437261030596473650602742134325330586401164671803947983875433477677836620888459189622000953024159077961130480630942134

In [None]:
# handling UNKOWN token
# for key, values in emission_matrix.items():


In [None]:
# converting counts to probabilities
for key, values in transition_matrix.items():
    prePos, nextPos = key
    transition_matrix[(prePos, nextPos)] = transition_matrix[(prePos, nextPos)] / pos_sum[prePos]

for key, values in emission_matrix.items():
    pos, word = key
    emission_matrix[(pos, word)] = emission_matrix[(pos, word)] / pos_sum[pos]

In [None]:
print(transition_matrix)

{('START', 'DEM'): 4.064936359238081e-261, ('DEM', 'NST'): 4.861137212844907e-63, ('NST', 'NNP'): 7.0060646792704706e-46, ('NNP', 'NN'): 3.454467422037778e-77, ('NN', 'VM'): 0.0, ('VM', 'END'): 1.221974545399842e-150, ('DEM', 'NN'): 3.814697265625e-06, ('NN', 'NN'): 0.0, ('NN', 'JJ'): 0.0, ('JJ', 'NN'): 1.7763568394002505e-15, ('NN', 'END'): 0.0, ('START', 'NNP'): 5.720889335234188e-247, ('NNP', 'NNP'): 1.695830344760954e-167, ('NN', 'NNP'): 0.0, ('VM', 'JJ'): 1.4582244039112795e-303, ('START', 'NN'): 1.90109156629516e-211, ('VM', 'PSP'): 8.288793e-317, ('PSP', 'NNP'): 2.2026824808563262e-13, ('START', 'NST'): 8.406091369059067e-286, ('NST', 'NN'): 6.310887241768095e-30, ('NN', 'QO'): 0.0, ('QO', 'NN'): 0.0624997764825288, ('NN', 'PSP'): 0.0, ('PSP', 'PSP'): 2.1316282072803157e-14, ('PSP', 'NN'): 2.3282353822651368e-10, ('NNP', 'CC'): 2.6126808781413032e-200, ('CC', 'NN'): 1.734723475976807e-18, ('NN', 'CC'): 0.0, ('CC', 'NNP'): 1.5777157919109474e-30, ('NN', 'NST'): 0.0, ('NST', 'VM')

In [None]:
text = "నేను నీతో ఆడుతున్నాను"
text = text.split()

def tagPos(text, i, n, value, posSeq, preTag, maxValue, maxPos):
    if i == n:
#         print(value[0], maxValue[0], posSeq)
        if value[0] > maxValue[0]:
#             print("changed")
            print(value[0], maxValue[0], posSeq)
#             print(posSeq)
            maxValue[0] = value[0]
            maxPos[0] = posSeq
        return


    for tag in tags:

        if (tag, text[i]) in emission_matrix:
            e_m = emission_matrix[(tag, text[i])]
        # how to consider un_known words
        else:
            e_m = -1

        if (preTag, tag) in transition_matrix:
            t_m = transition_matrix[(preTag, tag)]
        else:
            t_m = -1

        posSeq.append(tag)
        temp = value[0]
        value[0] =  e_m * t_m * value[0]
#         if (e_m == -1 or t_m == -1) and value == 1:
#             value[0] = -1 * value[0]

        tagPos(text, i + 1, n, value, posSeq, tag, maxValue, maxPos)
        value[0] = temp
        posSeq.pop()



maxPos = [["k"]]
maxValue = [-1e9]
preTag  = 'START'
value = [1]

tagPos(text, 0, len(text), value, [], preTag, maxValue, maxPos)

print(maxPos)


-0.0 -1000000000.0 ['NN', 'NN', 'NN']
[[]]


In [None]:
vocab = set(data['word'])
# print(vocab)
if "నిన్ను" in vocab:
    print("yes")
else:
    print("no")

no


**VITERBI**

In [None]:
dp = []
for i in range(22):
    temp = []
    for j in range(len(text) + 1):
        temp.append(-1)
    dp.append(temp)
pos = []

#dp created with 22 * 3 tables

for i in range(len(text) + 1):
    for j in range(22):
        tag = no_to_pos[j]

        if i < len(text) and (tag, text[i]) in emission_matrix:
            e_m = emission_matrix[(tag, text[i])]
        else:
            e_m = -1

        if  i < len(text) and ('START', tag) in transition_matrix:
            t_m = transition_matrix[(preTag, tag)]
        else:
            t_m = -1

        if i == 0:
            dp[j][i] = e_m * t_m
        else:

            maxValue = -1e9
            for k in range(22):

                if (no_to_pos[k], tag) in transition_matrix:
                    t_g = transition_matrix[(no_to_pos[k], tag)]
                else:
                    t_g = -1

                if dp[k][i - 1] * t_g > maxValue:
                    maxValue = dp[k][i - 1]


            if i == len(text):
                break

            dp[j][i] = e_m * maxValue


pos = []
for i in range(len(text)):
    value = -1e9
    tag = -1
    for j in range(22):
        if dp[j][i] > value:
            value = dp[j][i]
            tag = j
    pos.append(no_to_pos[j])

print(dp)
print(pos)


[[-4.064936359238081e-261, -1, -3.057371614216545e-297, -1], [-8.406091369059067e-286, -1, -3.057371614216545e-297, -1], [-5.720889335234188e-247, -1, -3.057371614216545e-297, -1], [-1.90109156629516e-211, -1, -3.057371614216545e-297, -1], [-3.131512315903125e-294, -1, -3.057371614216545e-297, -1], [1, -1, -3.057371614216545e-297, -1], [1, 3.057371614216545e-297, -3.057371614216545e-297, -1], [-4.0083367125518366e-292, -1, -3.057371614216545e-297, -1], [-7.466108948025751e-301, -1, -3.057371614216545e-297, -1], [-7.466108948025751e-301, -1, -3.057371614216545e-297, -1], [-6.263025378417145e-294, -1, -3.057371614216545e-297, -1], [-2.4464199189995978e-296, -1, -3.057371614216545e-297, -1], [0.0, -1, -3.057371614216545e-297, -1], [-2.3144937738879828e-299, -1, -3.057371614216545e-297, -1], [-4.892914499088676e-296, -1, -3.057371614216545e-297, -1], [1, -1, -3.057371614216545e-297, -1], [-2.2398326844077253e-300, -1, -3.057371614216545e-297, -1], [-2.3144937738879828e-299, -1, -3.05737161

In [None]:
pip install sklearn-crfsuite

Collecting sklearn-crfsuite
  Downloading sklearn_crfsuite-0.3.6-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting python-crfsuite>=0.8.3 (from sklearn-crfsuite)
  Downloading python_crfsuite-0.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Downloading sklearn_crfsuite-0.3.6-py2.py3-none-any.whl (12 kB)
Downloading python_crfsuite-0.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: python-crfsuite, sklearn-crfsuite
Successfully installed python-crfsuite-0.9.10 sklearn-crfsuite-0.3.6
Note: you may need to restart the kernel to use updated packages.


In [None]:
import sklearn_crfsuite
from sklearn_crfsuite import metrics

In [None]:
def get_features(sentence, i):
    word = sentence[i]

    if len(word) >= 2:
        prefix_1 = word[:2]
        suffix_1 = word[-2:]
    else:
        prefix_1 = ""
        suffix_1 = ""

    if len(word) >= 4:
        prefix_2 = word[:4]
        suffix_2 = word[-4:]
    else:
        prefix_2 = ""
        suffix_2 = ""

    features = {
        'word': word,
        'is_first' : i == 0,
        'is_last' : i == len(sentence) - 1,

        'prefix_1' : prefix_1,
        'prefix_2' : prefix_2,

        'suffix_1' : suffix_1,
        'suffix_2' : suffix_2,

        'pre_word' : '' if i == 0 else sentence[i - 1],
        'next_word' : '' if i == len(sentence) - 1 else sentence[i + 1]
    }

    return features

In [None]:
X = []
Y = []

for j in range(len(telugu_sen)):
    x_sen = []
    y_sen = []

    cut_sen = telugu_sen[i][1:len(telugu_sen[i]) - 1]
    cut_pos = telugu_pos[i][1:len(telugu_pos[i]) - 1]
    for i in range(len(cut_sen)):
        x_sen.append(get_features(cut_sen, i))
        y_sen.append(cut_pos[i])

    X.append(x_sen)
    Y.append(y_sen)

In [None]:
print(X[0])
print(Y[0])

[{'word': 'హిందూ', 'is_first': True, 'is_last': False, 'prefix_1': 'హి', 'prefix_2': 'హింద', 'suffix_1': 'దూ', 'suffix_2': 'ిందూ', 'pre_word': '', 'next_word': 'సంఘం'}, {'word': 'సంఘం', 'is_first': False, 'is_last': False, 'prefix_1': 'సం', 'prefix_2': 'సంఘం', 'suffix_1': 'ఘం', 'suffix_2': 'సంఘం', 'pre_word': 'హిందూ', 'next_word': 'ఒక'}, {'word': 'ఒక', 'is_first': False, 'is_last': False, 'prefix_1': 'ఒక', 'prefix_2': '', 'suffix_1': 'ఒక', 'suffix_2': '', 'pre_word': 'సంఘం', 'next_word': 'కులాల'}, {'word': 'కులాల', 'is_first': False, 'is_last': False, 'prefix_1': 'కు', 'prefix_2': 'కులా', 'suffix_1': 'ాల', 'suffix_2': 'ులాల', 'pre_word': 'ఒక', 'next_word': 'కూటమి'}, {'word': 'కూటమి', 'is_first': False, 'is_last': True, 'prefix_1': 'కూ', 'prefix_2': 'కూటమ', 'suffix_1': 'మి', 'suffix_2': 'ూటమి', 'pre_word': 'కులాల', 'next_word': ''}]
['NNP', 'NN', 'JJ', 'NN', 'NN']


In [None]:
# crf = sklearn_crfsuite.CRF(
#     algorithm='lbfgs',
#     c1=0.1,
#     c2=0.1,
#     max_iterations=100,
#     all_possible_transitions=True
# )
# crf.fit(X, Y)

# GETTING SOME ERROR

In [None]:
import pycrfsuite


trainer = pycrfsuite.Trainer(verbose=False)
for x, y in zip(X, Y):
    trainer.append(x, y)

trainer.set_params({
    'c1': 1.0,
    'c2': 1e-3,
    'max_iterations': 50,
    'feature.possible_transitions': True
})
trainer.train('teluguPos.crfsuite')

In [None]:
tagger = pycrfsuite.Tagger()
tagger.open('teluguPos.crfsuite')

features = [get_features(text, i) for i in range(len(text))]
tags = tagger.tag(features)
print(list(zip(text, tags)))

[('నేను', 'NN'), ('నీతో', 'NN'), ('ఆడుతున్నాను', 'VM')]


In [None]:
print(features)
print(len(features))

[{'word': 'నేను', 'is_first': True, 'is_last': False, 'prefix_1': 'నే', 'prefix_2': 'నేను', 'suffix_1': 'ను', 'suffix_2': 'నేను', 'pre_word': '', 'next_word': 'నీతో'}, {'word': 'నీతో', 'is_first': False, 'is_last': False, 'prefix_1': 'నీ', 'prefix_2': 'నీతో', 'suffix_1': 'తో', 'suffix_2': 'నీతో', 'pre_word': 'నేను', 'next_word': 'ఆడుతున్నాను'}, {'word': 'ఆడుతున్నాను', 'is_first': False, 'is_last': True, 'prefix_1': 'ఆడ', 'prefix_2': 'ఆడుత', 'suffix_1': 'ను', 'suffix_2': 'నాను', 'pre_word': 'నీతో', 'next_word': ''}]
3


In [None]:
tagger.set(features)
prop = tagger.probability(['VM','PSP','NNP'])
print(prop)

0.006910925672248036


In [None]:
! pip install gradio

In [None]:
import gradio as gr

def tagger(sentence):
    text = sentence.split()
    features = [get_features(text, i) for i in range(len(text))]
    tags = tagger.tag(features)
    return 'hello'

demo = gr.Interface(fn=tagger, inputs="text", outputs="text")
demo.launch()