In [1]:
import pandas as pd
import pickle
import numpy as np
import json
import csv
import fasttext
from sklearn.model_selection import train_test_split

In [19]:
train_raw = pd.read_json("data/train.jsonl", lines=True, encoding="utf-8")
test_raw = pd.read_json("data/test.jsonl", lines=True, encoding="utf-8")


In [20]:
train_raw['conext_string'] = train_raw.context.apply(lambda x: ' '.join(x[::-1][:3]))
test_raw['conext_string'] = test_raw.context.apply(lambda x: ' '.join(x[::-1][:3]))

In [21]:
train_raw.head(10)

Unnamed: 0,label,response,context,conext_string
0,SARCASM,@USER @USER @USER I don't get this .. obviousl...,[A minor child deserves privacy and should be ...,@USER If your child isn't named Barron ... #Be...
1,SARCASM,@USER @USER trying to protest about . Talking ...,[@USER @USER Why is he a loser ? He's just a P...,@USER @USER having to make up excuses of why y...
2,SARCASM,@USER @USER @USER He makes an insane about of ...,[Donald J . Trump is guilty as charged . The e...,@USER I ’ ll remember to not support you at th...
3,SARCASM,@USER @USER Meanwhile Trump won't even release...,[Jamie Raskin tanked Doug Collins . Collins lo...,@USER But not half as stupid as Schiff looks ....
4,SARCASM,@USER @USER Pretty Sure the Anti-Lincoln Crowd...,[Man ... y ’ all gone “ both sides ” the apoca...,@USER They already did . Obama said many times...
5,SARCASM,@USER @USER @USER -> per your tag line : never...,[Donald Trump tapped into voters ’ populist sh...,@USER because these privileged white boys are ...
6,SARCASM,@USER @USER he does ! It excites him then he k...,[@USER @USER Coo-Coo . Keep on supporting fema...,@USER @USER do you masturbate to these videos ...
7,SARCASM,"Oh look , it's the #racist @USER offering soli...","[Hi , I'm Dennis , I'll be looking after lily'...",@USER Dennis please pass on my love and solida...
8,SARCASM,@USER @USER @USER As they are the biggest bull...,[Tips for children and young people from @USER...,@USER @USER @USER Please forward on to the Soc...
9,SARCASM,@USER @USER @USER responds to facts by tossing...,[The response of Sanders ' team to his quote f...,"@USER Careful , Bernie ’ s supporters get trig..."


In [22]:
train_raw['Target'] = train_raw['label'].apply(lambda x: '__label__'+x)
train_raw['all_string'] = train_raw['response'] + ". " + train_raw['conext_string']
test_raw['all_string'] = test_raw['response'] + ". " + test_raw['conext_string']

In [23]:
train_raw['all_string'] = train_raw['all_string'].apply(lambda x: x.lower())
test_raw['all_string'] = test_raw['all_string'].apply(lambda x: x.lower())

In [29]:
X_train, X_test, y_train, y_test = train_test_split(train_raw[['all_string']], train_raw['Target'], test_size=0.33, random_state=100)

In [30]:
train_all = X_train
train_all['Target'] = y_train
test_all = X_test
test_all['Target'] = y_test

In [31]:
train_all

Unnamed: 0,all_string,Target
2417,@user @user @user anybody else think marc mart...,__label__SARCASM
1516,@user so u r not only ignorant of basic econmc...,__label__SARCASM
3342,"demning leaders = "" diversity is our strength ...",__label__NOT_SARCASM
3750,"@user @user @user ahh good catch , cant rememb...",__label__NOT_SARCASM
579,"@user woo hoo i made the cut . lol you know , ...",__label__SARCASM
...,...,...
4149,@user @user ok great ! the girls and i will se...,__label__NOT_SARCASM
1890,@user @user @user don't disagree with dave ......,__label__SARCASM
350,@user @user or how about they are substantiate...,__label__SARCASM
79,@user delayed as in the had to use @user to ge...,__label__SARCASM


In [32]:
train_all[['Target', 'all_string']].to_csv('train.txt', header=None, index=None, sep=' ', quoting=csv.QUOTE_NONE, escapechar = ' ')
test_all[['Target', 'all_string']].to_csv('test.txt', header=None, index=None, sep=' ', quoting=csv.QUOTE_NONE, escapechar = ' ')

In [33]:
model = fasttext.train_supervised('train.txt', lr=0.0075, dim=50, epoch=100, ws=10, wordNgrams=2, minn=5)

In [34]:
print(model.words)



In [35]:
len(model.words)

24852

In [36]:
def print_results(N, p, r):
    print("N\t" + str(N))
    print("P@{}\t{:.10f}".format(1, p))
    print("R@{}\t{:.10f}".format(1, r))

print_results(*model.test('test.txt'))

N	1650
P@1	0.6515151515
R@1	0.6515151515


In [39]:
test_pred=test_all

def predict_label(row):
    return model.predict(row['all_string'])
test_pred['predictions'] = test_pred.apply(predict_label,axis=1)
test_pred['Pred_label'] = test_pred['predictions'].apply(lambda x:x[0][0])
test_pred['Pred_prob'] = test_pred['predictions'].apply(lambda x: x[1][0] if (x[0][0] == '__label__SARCASM') else 1-x[1][0])
test_pred['True_prob'] = test_pred['Target'].apply(lambda x: 1 if x == '__label__SARCASM' else 0)

In [40]:
from sklearn.metrics import precision_recall_fscore_support
precision_recall_fscore_support(test_pred['Target'], test_pred['Pred_label'])

(array([0.6844584 , 0.63079961]),
 array([0.5382716 , 0.76071429]),
 array([0.60262612, 0.68969239]),
 array([810, 840], dtype=int64))

In [43]:
test_pred=test_raw

def predict_label(row):
    return model.predict(row['response'])
test_pred['predictions'] = test_pred.apply(predict_label,axis=1)
test_pred['Pred_label'] = test_pred['predictions'].apply(lambda x:x[0][0])
test_pred['Pred_prob'] = test_pred['predictions'].apply(lambda x:x[1][0])
test_pred['Pred_label2'] = test_pred['Pred_label'].apply(lambda x:x.replace("__label__", ""))

In [44]:
test_pred

Unnamed: 0,id,response,context,conext_string,all_string,predictions,Pred_label,Pred_prob,Pred_label2
0,twitter_1,"@USER @USER @USER My 3 year old , that just fi...","[Well now that ’ s problematic AF <URL>, @USER...",@USER @USER @USER No .. he actually in the gif...,"@user @user @user my 3 year old , that just fi...","((__label__SARCASM,), [0.5334812998771667])",__label__SARCASM,0.533481,SARCASM
1,twitter_2,@USER @USER How many verifiable lies has he to...,[Last week the Fake News said that a section o...,@USER The mainstream media doesn't report the ...,@user @user how many verifiable lies has he to...,"((__label__SARCASM,), [0.544791042804718])",__label__SARCASM,0.544791,SARCASM
2,twitter_3,@USER @USER @USER Maybe Docs just a scrub of a...,[@USER Let ’ s Aplaud Brett When he deserves i...,@USER @USER He did try keep korkmaz in in the ...,@user @user @user maybe docs just a scrub of a...,"((__label__SARCASM,), [0.500037670135498])",__label__SARCASM,0.500038,SARCASM
3,twitter_4,@USER @USER is just a cover up for the real ha...,[Women generally hate this president . What's ...,@USER I've hated him before he was placed in o...,@user @user is just a cover up for the real ha...,"((__label__NOT_SARCASM,), [0.5302925109863281])",__label__NOT_SARCASM,0.530293,NOT_SARCASM
4,twitter_5,@USER @USER @USER The irony being that he even...,"[Dear media Remoaners , you excitedly sharing ...",@USER @USER Quite an articulate and considered...,@user @user @user the irony being that he even...,"((__label__SARCASM,), [0.5540772676467896])",__label__SARCASM,0.554077,SARCASM
...,...,...,...,...,...,...,...,...,...
1795,twitter_1796,@USER @USER @USER is definitely the best out t...,[I have been a business customer of MWeb @USER...,"@USER @USER It ’ s time for @USER , 24/7 frien...",@user @user @user is definitely the best out t...,"((__label__NOT_SARCASM,), [0.5591954588890076])",__label__NOT_SARCASM,0.559195,NOT_SARCASM
1796,twitter_1797,@USER @USER Ye let her out run wild and infect...,[A woman refuses to have her temperature taken...,@USER Disgusting i hope this world burns A wom...,@user @user ye let her out run wild and infect...,"((__label__NOT_SARCASM,), [0.5153915882110596])",__label__NOT_SARCASM,0.515392,NOT_SARCASM
1797,twitter_1798,"@USER @USER @USER Thanks for that , I would ha...",[The reason big government wants @USER out is ...,@USER @USER @USER Tell Obama that . Throughout...,"@user @user @user thanks for that , i would ha...","((__label__SARCASM,), [0.5189510583877563])",__label__SARCASM,0.518951,SARCASM
1798,twitter_1799,@USER @USER @USER Yes also #found this on #new...,[Happy #musicmonday and #thanks for #all your ...,@USER @USER @USER I totally agree ! Music is s...,@user @user @user yes also #found this on #new...,"((__label__NOT_SARCASM,), [0.5063786506652832])",__label__NOT_SARCASM,0.506379,NOT_SARCASM


In [45]:
test_pred[['id', 'Pred_label2']].to_csv('answer.txt', header=None, index=None, sep=',', quoting=csv.QUOTE_NONE, escapechar = ' ')