In [29]:
%load_ext autoreload
%autoreload 2

In [24]:
import pandas as pd
import numpy as np
from collections import Counter
from nltk import word_tokenize, pos_tag
from tqdm import tqdm, tqdm_notebook
import pickle
import os

from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, confusion_matrix

In [31]:
#path = '../data/davidson/'
path = '../data/zeerak_naacl/'
# path = '../data/wiki_talk/'
# debug = pd.read_csv('{}debug.csv'.format(path), encoding='utf-8')
train = pd.read_csv('{}train.csv'.format(path), encoding='utf-8')
dev = pd.read_csv('{}dev.csv'.format(path), encoding='utf-8')
test = pd.read_csv('{}test.csv'.format(path), encoding='utf-8')

In [32]:
set(test['label'])

{'none', 'racism', 'sexism'}

In [33]:
# Fix labels for davidson
new_train_labels = ['neither' if l == 'offensive_language' else l
                    for l in train['label']]
new_test_labels = ['neither' if l == 'offensive_language' else l
                    for l in test['label']]
train['label'] = new_train_labels
test['label'] = new_test_labels

In [34]:
# Fix labels for zeerak
new_train_labels = [l if l == 'none' else 'hate_speech'
                    for l in train['label']]
new_test_labels = [l if l == 'none' else 'hate_speech'
                    for l in test['label']]
train['label'] = new_train_labels
test['label'] = new_test_labels

In [35]:
vectorizer = CountVectorizer()
vectorizer.fit(train['tweet'])
X_train = vectorizer.transform(train['tweet'])
y_train = train['label'].values

X_test = vectorizer.transform(test['tweet'])
y_test = test['label'].values

In [36]:
clf = LogisticRegression()
clf.fit(X_train, y_train)

y_train_pred = clf.predict(X_train)
p, r, f1, s = precision_recall_fscore_support(y_train, y_train_pred, average='weighted')
acc = accuracy_score(y_train, y_train_pred)
print(f"Training\n\tPrecision: {p:.2f}\n\tRecall: {r:.2f}\n\tF1-Score: {f1:.2f}\n\tAccuracy: {acc:.2f}")

y_test_pred = clf.predict(X_test)
p, r, f1, s = precision_recall_fscore_support(y_test, y_test_pred, average='weighted')
acc = accuracy_score(y_test, y_test_pred)
print(f"Training\n\tPrecision: {p:.2f}\n\tRecall: {r:.2f}\n\tF1-Score: {f1:.2f}\n\tAccuracy: {acc:.2f}")

Training
	Precision: 0.93
	Recall: 0.93
	F1-Score: 0.93
	Accuracy: 0.93
Training
	Precision: 0.84
	Recall: 0.84
	F1-Score: 0.84
	Accuracy: 0.84


In [37]:
coef_df = pd.DataFrame(columns=['word', 'coef'])

In [38]:
coef_df['coef'] = clf.coef_[0]
for w, i in tqdm_notebook(vectorizer.vocabulary_.items()):
    coef_df.loc[i, 'word'] = w

HBox(children=(IntProgress(value=0, max=14128), HTML(value='')))




In [39]:
coef_df.sort_values(by='coef', ascending=True, inplace=True)
coef_df.reset_index(drop=True, inplace=True)

In [40]:
# Davidson output
#coef_df.to_csv('../output/davidson_log_reg_coef.csv', encoding='utf-8', index=False)

In [41]:
# Zeerak output
coef_df.to_csv('../output/zeerak_log_reg_coef.csv', encoding='utf-8', index=False)

### Error Analysis

In [42]:
d_path = '../data/davidson/'
z_path = '../data/zeerak_naacl/'
o2_path = '../output/davidson_2way_gradrev_0.5/'
o3_path = '../output/davidson-zeerak_davidson_2018-04-25T10-55/'
o4_path = '../output/davidson-zeerak_davidson_2way_gradrev_0.5/'

d_test = pd.read_csv(f'{d_path}test.csv', encoding='utf-8')
# d_pred = pd.read_pickle(f'{o2_path}test_preds.pkl')
base_pred = pd.read_pickle(f'{o3_path}test_preds.pkl')

z_test = pd.read_csv(f'{z_path}test.csv', encoding='utf-8')
# base_z_pred = pd.read_pickle(f'{o3_path}test_preds.pkl')
ca_pred = pd.read_pickle(f'{o4_path}test_preds.pkl')

In [43]:
d_test['pred'] = d_pred
d_test = d_test[['tweet', 'label', 'pred']].copy()
d_test.loc[:, 'label'] = [1 if l == 'hate_speech' else 0 for l in d_test['label']]
d_test['correct'] = [*map(int, d_test['label'] == d_test['pred'])]

z_test['pred'] = z_pred
z_test = z_test[['tweet', 'label', 'pred']].copy()
z_test.loc[:, 'label'] = [0 if l == 'none' else 1 for l in z_test['label']]
z_test['correct'] = [*map(int, z_test['label'] == z_test['pred'])]

In [44]:
Counter(d_test['label'])

Counter({0: 2318, 1: 146})

In [45]:
print("Davidson 2-way:")
tn, fp, fn, tp = confusion_matrix(d_test.label, d_test.pred).ravel()
print(f"\ttn: {tn}\n\tfp: {fp}\n\tfn: {fn}\n\ttp: {tp}")
print("Davidson-Zeerak:")
tn, fp, fn, tp = confusion_matrix(z_test.label, z_test.pred).ravel()
print(f"\ttn: {tn}\n\tfp: {fp}\n\tfn: {fn}\n\ttp: {tp}")

Davidson 2-way:
	tn: 2246
	fp: 72
	fn: 94
	tp: 52
Davidson-Zeerak:
	tn: 990
	fp: 95
	fn: 426
	tp: 59


In [46]:
# Pull out 10 examples of fp and fn for each dataset

In [47]:
def get_error_samples(df):
    """Return two df's that are fp and fn."""
    fp_msk = (df['correct'] == 0) & (df['pred'] == 1)
    fp_ex = df[fp_msk].copy()
    
    fn_msk = (df['correct'] == 0) & (df['pred'] == 0)
    fn_ex = df[fn_msk].copy()
    
    return fp_ex, fn_ex

In [48]:
d_fp, d_fn = get_error_samples(d_test)
z_fp, z_fn = get_error_samples(z_test)

In [49]:
out_path = '../output/error_analysis/'
d_fname = '{}davidson_grad_rev_{}.csv'
z_fname = '{}davidson-zeerak_{}.csv'

In [50]:
d_fp.to_csv(d_fname.format(out_path, 'fp'), encoding='utf-8', index=False)
d_fn.to_csv(d_fname.format(out_path, 'fn'), encoding='utf-8', index=False)
z_fp.to_csv(z_fname.format(out_path, 'fp'), encoding='utf-8', index=False)
z_fn.to_csv(z_fname.format(out_path, 'fn'), encoding='utf-8', index=False)

In [None]:
d_fn.loc[662, 'tweet']

In [51]:
d_fp.loc[2161].tweet

'<MENTION> 1 2 3 4 how many niggers are in my store i knowwwww your stealing 😂😂😂 that vine still be having me weak'

In [52]:
msk = ['africa' in t for t in z_fp.tweet]
z_fp[msk].tweet

1501    # mkr the fat south africans are feeling hopeful
Name: tweet, dtype: object

#### Cross-domain baseline vs constrained attention (Davidson-Zeerak)

In [53]:
d_path = '../data/davidson/'
z_path = '../data/zeerak_naacl/'
o2_path = '../output/davidson_2way_gradrev_0.5/'
o3_path = '../output/davidson-zeerak_davidson_2018-04-25T10-55/'
o4_path = '../output/davidson-zeerak_davidson_2way_gradrev_0.5/'
o5_path = '../output/zeerak-davidson_zeerak_2way_all/'
o6_path = '../output/zeerak-davidson_zeerak_2way_gradrev/'

d_test = pd.read_csv(f'{d_path}test.csv', encoding='utf-8')
# d_pred = pd.read_pickle(f'{o2_path}test_preds.pkl')
base_pred = pd.read_pickle(f'{o3_path}test_preds.pkl')

z_test = pd.read_csv(f'{z_path}test.csv', encoding='utf-8')
# base_z_pred = pd.read_pickle(f'{o3_path}test_preds.pkl')
ca_pred = pd.read_pickle(f'{o4_path}test_preds.pkl')

In [54]:
# d_test['pred'] = d_pred
# d_test = d_test[['tweet', 'label', 'pred']].copy()
# d_test.loc[:, 'label'] = [1 if l == 'hate_speech' else 0 for l in d_test['label']]
# d_test['correct'] = [*map(int, d_test['label'] == d_test['pred'])]

z_test['base_pred'] = base_pred
z_test['ca_pred'] = ca_pred
z_test = z_test[['tweet', 'label', 'base_pred', 'ca_pred']].copy()
z_test.loc[:, 'label'] = [0 if l == 'none' else 1 for l in z_test['label']]
z_test['base_correct'] = [*map(int, z_test['label'] == z_test['base_pred'])]
z_test['ca_correct'] = [*map(int, z_test['label'] == z_test['ca_pred'])]

In [55]:
Counter(z_test['label'])

Counter({0: 1085, 1: 485})

In [56]:
print("Davidson-Zeerak baseline:")
tn, fp, fn, tp = confusion_matrix(z_test.label, z_test.base_pred).ravel()
print(f"\ttn: {tn}\n\tfp: {fp}\n\tfn: {fn}\n\ttp: {tp}")
print("Davidson-Zeerak constrained attention:")
tn, fp, fn, tp = confusion_matrix(z_test.label, z_test.ca_pred).ravel()
print(f"\ttn: {tn}\n\tfp: {fp}\n\tfn: {fn}\n\ttp: {tp}")

Davidson-Zeerak baseline:
	tn: 990
	fp: 95
	fn: 426
	tp: 59
Davidson-Zeerak constrained attention:
	tn: 1068
	fp: 17
	fn: 466
	tp: 19


Find examples where actual hate speech or not, baseline got it and constrained attention didn't

In [57]:
def get_error_samples(df):
    """Return two df's that are fp and fn."""
    fp_msk = (df['label'] == 0) & (df['base_pred']==0) & (df['ca_pred'] == 1)
    fp_ex = df[fp_msk].copy()
    
    fn_msk = (df['label'] == 1) & (df['base_pred'] == 1) & (df['ca_pred'] == 0)
    fn_ex = df[fn_msk].copy()
    
    return fp_ex, fn_ex

In [58]:
z_fp, z_fn = get_error_samples(z_test)

In [59]:
out_path = '../output/error_analysis/'
out_fname = '{}davidson-zeerak_comparison_{}.csv'

In [60]:
z_fp.to_csv(out_fname.format(out_path, 'fp'), encoding='utf-8', index=False)
z_fn.to_csv(out_fname.format(out_path, 'fn'), encoding='utf-8', index=False)

In [65]:
print(len(z_fp))
print(len(z_fn))

10
54


In [67]:
pd.set_option('display.max_colwidth', -1)

In [68]:
z_fn

Unnamed: 0,tweet,label,base_pred,ca_pred,base_correct,ca_correct
9,<MENTION> <MENTION> and no i m not sexist women are to be treated as any other person on this planet,1,1,0,1,0
36,<MENTION> islam declared war on all humanity 1400 years ago while individual muslims may or may not be terrorist islam is terrorist,1,1,0,1,0
57,<MENTION> islam is a pile of manure that destroys human freedom human life human expression and creates endless divisiveness,1,1,0,1,0
71,<MENTION> compared to the sewer and violence and poverty of muslim pakistan americans live in opulence,1,1,0,1,0
208,<MENTION> <MENTION> i actually wish they would just start using egalitarian so we can just let feminist mean the misandrist hypocrites,1,1,0,1,0
212,<MENTION> i am better i have no problem with hinduism christianity tao judaism buddhism or atheism but islam hates all non muslims,1,1,0,1,0
276,<MENTION> leave it to some microbrain to get sanctimonious by finding a way to play the race card,1,1,0,1,0
311,<MENTION> saw this this morning <URL>,1,1,0,1,0
344,<MENTION> here is a map of israel inside all of the muslim nations that want to destroy israel and murder them <URL>,1,1,0,1,0
393,<MENTION> <MENTION> sometimes you have to go to the hadiths to clarify the quran,1,1,0,1,0


In [69]:
z_fp

Unnamed: 0,tweet,label,base_pred,ca_pred,base_correct,ca_correct
306,for many of us our job is part of who we are because we ve had to fight so hard to get where we are,0,0,1,1,0
476,<MENTION> your problem is that race baiting is the oxygen you breath you can t live without it your identity depends on it,0,0,1,1,0
601,<MENTION> you made a false equivalency i called it on you and you threatened financial consequences,0,0,1,1,0
1075,<MENTION> most of my oreilly books are ebooks p those are all so old but yes i usually quite like them,0,0,1,1,0
1149,psycho eyes you are a truly horrible person # mkr # mykitchenrules,0,0,1,1,0
1263,<MENTION> stop lying my hadiths come for the muslim students association of usc anyone can independently verify them,0,0,1,1,0
1269,she can t hear you you little troll # mkr,0,0,1,1,0
1369,<MENTION> <MENTION> <MENTION> isis boko haram al queda taliban iran hanging kurds egyptians killing copts etc etc etc,0,0,1,1,0
1415,# mkr always needs an evil team for ratings kat andrew are that evil team they will intentionally survive,0,0,1,1,0
1482,perpetuating a culture where abuse is tolerated and targets of abuse are mocked for speaking out against their abusers,0,0,1,1,0


#### Cross-domain baseline vs constrained attention (Zeerak-Davidson)

In [76]:
d_path = '../data/davidson/'
z_path = '../data/zeerak_naacl/'
o2_path = '../output/davidson_2way_gradrev_0.5/'
o3_path = '../output/davidson-zeerak_davidson_2018-04-25T10-55/'
o4_path = '../output/davidson-zeerak_davidson_2way_gradrev_0.5/'
o5_path = '../output/zeerak-davidson_zeerak_2way_all/'
o6_path = '../output/zeerak-davidson_zeerak_2way_gradrev/'

d_test = pd.read_csv(f'{d_path}test.csv', encoding='utf-8')
# d_pred = pd.read_pickle(f'{o2_path}test_preds.pkl')
base_pred = pd.read_pickle(f'{o5_path}test_preds.pkl')

z_test = pd.read_csv(f'{z_path}test.csv', encoding='utf-8')
# base_z_pred = pd.read_pickle(f'{o3_path}test_preds.pkl')
ca_pred = pd.read_pickle(f'{o6_path}test_preds.pkl')

In [77]:
d_test['base_pred'] = base_pred
d_test['ca_pred'] = ca_pred
d_test = d_test[['tweet', 'label', 'base_pred', 'ca_pred']].copy()
d_test.loc[:, 'label'] = [0 if l == 'neither' else 1 for l in d_test['label']]
d_test['base_correct'] = [*map(int, d_test['label'] == d_test['base_pred'])]
d_test['ca_correct'] = [*map(int, d_test['label'] == d_test['ca_pred'])]

In [78]:
Counter(d_test['label'])

Counter({1: 2085, 0: 379})

In [80]:
print("Zeerak-Davidson baseline:")
tn, fp, fn, tp = confusion_matrix(d_test.label, d_test.base_pred).ravel()
print(f"\ttn: {tn}\n\tfp: {fp}\n\tfn: {fn}\n\ttp: {tp}")
print("Zeerak-Davidson constrained attention:")
tn, fp, fn, tp = confusion_matrix(d_test.label, d_test.ca_pred).ravel()
print(f"\ttn: {tn}\n\tfp: {fp}\n\tfn: {fn}\n\ttp: {tp}")

Zeerak-Davidson baseline:
	tn: 277
	fp: 102
	fn: 966
	tp: 1119
Zeerak-Davidson constrained attention:
	tn: 330
	fp: 49
	fn: 1352
	tp: 733


Find examples where actual hate speech or not, baseline got it and constrained attention didn't

In [81]:
def get_error_samples(df):
    """Return two df's that are fp and fn."""
    fp_msk = (df['label'] == 0) & (df['base_pred']==0) & (df['ca_pred'] == 1)
    fp_ex = df[fp_msk].copy()
    
    fn_msk = (df['label'] == 1) & (df['base_pred'] == 1) & (df['ca_pred'] == 0)
    fn_ex = df[fn_msk].copy()
    
    return fp_ex, fn_ex

In [82]:
d_fp, d_fn = get_error_samples(d_test)

In [83]:
out_path = '../output/error_analysis/'
out_fname = '{}zeerak-davidson_comparison_{}.csv'

In [84]:
d_fp.to_csv(out_fname.format(out_path, 'fp'), encoding='utf-8', index=False)
d_fn.to_csv(out_fname.format(out_path, 'fn'), encoding='utf-8', index=False)

In [85]:
print(len(d_fp))
print(len(d_fn))

27
525


In [86]:
pd.set_option('display.max_colwidth', -1)

In [87]:
d_fn

Unnamed: 0,tweet,label,base_pred,ca_pred,base_correct,ca_correct
8,and girl that pussy ain t shit to me baby girl roll up and get high,1,1,0,1,0
11,<MENTION> bitch i might be aesthetic <URL>,1,1,0,1,0
13,side bitches are basically new relationships started an addition to the problems you already have with your primary woman,1,1,0,1,0
17,<MENTION> bitch don t kill my vibe,1,1,0,1,0
19,<MENTION> this shit sorry than a hoe,1,1,0,1,0
20,you hate football your a faggot,1,1,0,1,0
22,thats the only thing a nigga ask for <MENTION> its some choosable bitches in here tonite,1,1,0,1,0
26,same bitches they be all in my face bout how they couldn t do it be the same bitches all on his dick 😒,1,1,0,1,0
29,niggas sleepin with bitches grandmas lmfao maury fake af,1,1,0,1,0
36,<MENTION> <MENTION> <MENTION> then how yall got here b stfu negro,1,1,0,1,0


In [88]:
d_fp

Unnamed: 0,tweet,label,base_pred,ca_pred,base_correct,ca_correct
66,<MENTION> rick scott vs charlie crist is the worst gubernatorial election in america and it s the face of our ugly future <URL>,0,0,1,1,0
106,<MENTION> go the gulls first grade should be a cracker physical,0,0,1,1,0
228,<MENTION> # sagittarius can be compassionate towards other people when they want to be especially towards the ones they care abo,0,0,1,1,0
313,<MENTION> hes behind the plate for the game tonight here in az im looking to buy a red yellow balkamania shirt i can tear off there,0,0,1,1,0
377,<MENTION> have a great day you lovely fellow apes <URL>,0,0,1,1,0
434,<MENTION> like charlie chan style never saw that either hmm makes me think of the cockneys in the killzone series,0,0,1,1,0
473,<MENTION> klain s real sole job is to dampen down the mushrooming bad pr and political fallout heading into nov election,0,0,1,1,0
556,<MENTION> that the yankees have won four straight shane greene starts and 7 of 9 speaks to his stability in a season of starters emergen,0,0,1,1,0
565,<MENTION> <MENTION> jay i think once someone has alzheimer s suffers from alcoholism and is mildly retarded people shouldn t mak,0,0,1,1,0
887,<MENTION> curious george the curious little monkey or deranged serial cat killer the story the man with the yellow hat doesn t want you,0,0,1,1,0


#### In-domain racism/sexism baseline vs bilstm

In [30]:
# data_path = '../data/zeerak_naacl/racism_test.csv'
# bilstm_preds_path = '../output/zeerak_racism_all/test_preds.pkl'
# logreg_preds_path = '../output/logreg_preds_racism_racism.pkl'

data_path = '../data/zeerak_naacl/sexism_test.csv'
bilstm_preds_path = '../output/sexism_zeerak_naacl_zeerak_sexism_all/test_preds.pkl'
logreg_preds_path = '../output/logreg_preds_sexism_sexism.pkl'

data = pd.read_csv(data_path)
bilstm_preds = pd.read_pickle(bilstm_preds_path)
with open(logreg_preds_path, 'rb') as f:
    logreg_preds = pickle.load(f)

data.columns

Index(['tweet', 'label', 'mentions', 'hashtags', 'slurs', 'original_tweet',
       'racism', 'sexism', 'none', 'tweet_id', 'user_screen_name',
       'tweet_unk_slur', 'tweet_no_slur', 'tweet_pos_slur'],
      dtype='object')

In [31]:
data['bilstm_preds'] = bilstm_preds
data['logreg_preds'] = logreg_preds
data = data[['tweet', 'label', 'bilstm_preds', 'logreg_preds']].copy()
data.loc[:, 'label'] = [0 if l == 'none' else 1 for l in data['label']]
# data['base_correct'] = [*map(int, data['label'] == data['base_pred'])]
# data['ca_correct'] = [*map(int, data['label'] == data['ca_pred'])]

Counter(data['label'])

Counter({0: 542, 1: 197})

In [32]:
print("Logistic regression:")
tn, fp, fn, tp = confusion_matrix(data.label, data.logreg_preds).ravel()
print(f"\ttn: {tn}\n\tfp: {fp}\n\tfn: {fn}\n\ttp: {tp}")
print("BiLSTM:")
tn, fp, fn, tp = confusion_matrix(data.label, data.bilstm_preds).ravel()
print(f"\ttn: {tn}\n\tfp: {fp}\n\tfn: {fn}\n\ttp: {tp}")

Logistic regression:
	tn: 528
	fp: 14
	fn: 80
	tp: 117
BiLSTM:
	tn: 490
	fp: 52
	fn: 64
	tp: 133


Find examples where actual hate speech or not, baseline got it and other model didn't

In [33]:
def get_error_samples(df):
    """Return two df's that are fp and fn."""
    baseline_colname = 'logreg_preds'
    model_colname = 'bilstm_preds'
    fp_msk = (df['label'] == 0) & (df[baseline_colname]==0) & (df[model_colname] == 1)
    fp_ex = df[fp_msk].copy()
    
    fn_msk = (df['label'] == 1) & (df[baseline_colname] == 1) & (df[model_colname] == 0)
    fn_ex = df[fn_msk].copy()
    
    return fp_ex, fn_ex

In [34]:
d_fp, d_fn = get_error_samples(data)

In [35]:
pd.set_option('display.max_colwidth', -1)

In [36]:
d_fn

Unnamed: 0,tweet,label,bilstm_preds,logreg_preds
13,lmao <MENTION> mitt romney needs to run for a 3rd time in 2016 not sexist but hillary clinton can t run this country,1,0,1
141,<MENTION> i dunno knowing what i know now or in today s environment i would not date any girl from same school,1,0,1
183,<MENTION> <MENTION> <MENTION> sorry bitch you did your research in your own asshole,1,0,1
248,<MENTION> <MENTION> <MENTION> worked in a virtually all female environment not sexist but comes with it s own problems sadly,1,0,1
284,<MENTION> but logistically speaking the vagina is a tough character it can accommodate items of size and girth quite well,1,0,1
475,<MENTION> <MENTION> <MENTION> <MENTION> it s a great website use it omg you are right it is great <URL>,1,0,1
517,<MENTION> why is kat over there looking smug she can t even cook # mkr,1,0,1
648,<MENTION> on # internationalwomensday women who inspire me <MENTION> <MENTION> <MENTION> <MENTION> <MENTION> <MENTION>,1,0,1
653,<MENTION> relax afterward < so after a standard 8 hour shift of child rearing and housework you take over or when she s dead,1,0,1
733,<MENTION> on halloween girls should go out with no makeup on now that would be scary,1,0,1


In [37]:
d_fp

Unnamed: 0,tweet,label,bilstm_preds,logreg_preds
34,<MENTION> wtf kat and andre are cheaters that is so unfair they cannot give someone 1 they should be gone # mkr # katandandre,0,1,0
36,forgot how annoying annie is this is what happens when the series goes for ten years # mkr,0,1,0
65,<MENTION> yeah men never talk about shit they know nothing about like birth control or sexism for example,0,1,0
86,<MENTION> having male employees truly sucks because they undermine your authority based on your gender and tweet about you to strangers,0,1,0
94,# mkr are these two for real,0,1,0
108,no seriously read that in dwight s voice,0,1,0
113,<MENTION> peace is important but freedom is even more important,0,1,0
127,<MENTION> <MENTION> # islamicstatemedia <URL>,0,1,0
131,<MENTION> looks neat i need to start keeping my eyes open for cfps,0,1,0
154,the judges are scoring strategically to get kat out of their sight # mkr,0,1,0


In [38]:
out_path = '../output/error_analysis/'
out_fname = '{}zeerak_sexism_comparison_{}.csv'

In [39]:
d_fp.to_csv(out_fname.format(out_path, 'fp'), encoding='utf-8', index=False)
d_fn.to_csv(out_fname.format(out_path, 'fn'), encoding='utf-8', index=False)

In [20]:
pd.set_option('display.max_colwidth', -1)