In [2]:
import fasttext
import pandas as pd
import csv
model = fasttext.load_model('model.bin', label_prefix='__label__', encoding='utf-8')

In [3]:
### Read our train data into a dataframe
train = pd.read_csv("train_90pct.csv")
copy = train.copy()
copy.head()
test = pd.read_csv("train_10pct.csv")
test_copy = test.copy()
test_copy.head()
predict = pd.read_csv("test.csv")
predict_copy = predict.copy()
predict.head()

Unnamed: 0,id,text
0,id02310,"Still, as I urged our leaving Ireland with suc..."
1,id24541,"If a fire wanted fanning, it could readily be ..."
2,id00134,And when they had broken down the frail door t...
3,id27757,While I was thinking how I should possibly man...
4,id04081,I am not sure to what limit his knowledge may ...


In [4]:
with open('cleaned.txt', 'w+') as output:
    with open('train_90pct.csv', 'r+') as input:
        reader = csv.reader(input, delimiter=',', quotechar='"')
        next(reader, None)
        for row in reader:
            output.write(row[1] + ' __label__' + row[2] + '\n')

In [5]:
with open('clean.txt', 'w+') as output:
    with open('train_10pct.csv', 'r+') as input:
        reader = csv.reader(input, delimiter=',', quotechar='"')
        next(reader, None)
        for row in reader:
            output.write(row[1] + ' __label__' + row[2] + '\n')

In [6]:
classifier = fasttext.supervised('cleaned.txt', 'model')

In [7]:
# If we had a test file, we could measure the precision with: 

result = classifier.test('clean.txt')
print ('P@1:', result.precision)
print ('R@1:', result.recall)
print ('Number of examples:', result.nexamples)

P@1: 0.793
R@1: 0.793
Number of examples: 2000


In [8]:
text = [text for text in predict_copy['text']]
text[:3]

['Still, as I urged our leaving Ireland with such inquietude and impatience, my father thought it best to yield.',
 'If a fire wanted fanning, it could readily be fanned with a newspaper, and as the government grew weaker, I have no doubt that leather and iron acquired durability in proportion, for, in a very short time, there was not a pair of bellows in all Rotterdam that ever stood in need of a stitch or required the assistance of a hammer.',
 'And when they had broken down the frail door they found only this: two cleanly picked human skeletons on the earthen floor, and a number of singular beetles crawling in the shadowy corners.']

In [9]:
labels = classifier.predict_proba(text, k=1)
print (labels)

[[('MWS', 0.658203)], [('EAP', 0.806641)], [('HPL', 0.925781)], [('EAP', 0.773438)], [('EAP', 0.666016)], [('EAP', 0.912109)], [('EAP', 0.982422)], [('MWS', 0.726562)], [('EAP', 0.992188)], [('EAP', 0.994141)], [('HPL', 0.5625)], [('HPL', 0.888672)], [('MWS', 0.791016)], [('HPL', 0.458984)], [('EAP', 0.898438)], [('MWS', 0.992188)], [('EAP', 0.533203)], [('EAP', 0.996094)], [('EAP', 0.65625)], [('EAP', 0.859375)], [('EAP', 0.759766)], [('HPL', 0.361328)], [('EAP', 0.623047)], [('EAP', 0.53125)], [('MWS', 0.576172)], [('MWS', 0.820312)], [('EAP', 0.566406)], [('EAP', 0.966797)], [('MWS', 0.800781)], [('MWS', 0.65625)], [('MWS', 0.878906)], [('EAP', 0.941406)], [('HPL', 0.964844)], [('EAP', 0.544922)], [('EAP', 0.513672)], [('EAP', 0.517578)], [('HPL', 0.519531)], [('MWS', 0.980469)], [('EAP', 0.431641)], [('EAP', 0.953125)], [('EAP', 0.691406)], [('EAP', 0.974609)], [('EAP', 0.951172)], [('EAP', 0.806641)], [('EAP', 0.648438)], [('MWS', 0.628906)], [('HPL', 0.947266)], [('EAP', 0.826172

In [10]:
final = pd.DataFrame(labels, columns = ['FastText Prediction'])
final.head()

Unnamed: 0,FastText Prediction
0,"(MWS, 0.658203)"
1,"(EAP, 0.806641)"
2,"(HPL, 0.925781)"
3,"(EAP, 0.773438)"
4,"(EAP, 0.666016)"
