# REDHEN DATA

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np
import pickle
from fastai.nlp import *
from sklearn.linear_model import LogisticRegression

In [3]:
data = pd.read_excel('/home/spriyanshu723/Isla Vista - All Excerpts - 1_2_2019.xlsx')
data.head()

Unnamed: 0,StoryID,Excerpt,CodesApplied_Combined,ACCOUNT,ACCOUNT_Cultural,ACCOUNT_Individual,ACCOUNT_Other,COMMUNITYRECOVERY,EVENT,GRIEF,...,SOCIALSUPPORT,THREAT,THREAT_Assessment,TRAUMA,TRAUMA_Physical,TRAUMA_Psychological,TRAUMA_Individual,TRAUMA_Community,TRAUMA_Societal,VICTIMS
0,NI2599,"Are guns the problem, video\ngames, the increa...","ACCOUNTABILITY, ACCOUNT- Culture/societal risk...",1,1,0,0,99,0,0,...,0,0,99,0,0,0,99,99,99,0
1,NI2599,"May 23, 2014, in Isla Vista, California. 22-ye...","EVENT, TRAUMA, TRAUMA- Physical",0,0,0,0,99,1,0,...,0,0,99,1,1,0,99,99,99,0
2,NI2951,A 22-year-old student last Friday killed six p...,EVENT,0,0,0,0,99,1,0,...,0,0,99,0,0,0,99,99,99,0
3,NI2951,A 22-year-old student last Friday killed six p...,"ACCOUNTABILITY, ACCOUNT- Culture/societal risk...",1,1,0,0,99,0,0,...,0,0,99,0,0,0,99,99,99,0
4,NI2951,Elliot Rodger was not a typical man � few of u...,"PERPETRATOR, ACCOUNTABILITY, ACCOUNT- Individu...",1,0,1,0,99,0,0,...,0,0,99,0,0,0,99,99,99,0


In [4]:
data.columns

Index(['StoryID', 'Excerpt', 'CodesApplied_Combined', 'ACCOUNT',
       'ACCOUNT_Cultural', 'ACCOUNT_Individual', 'ACCOUNT_Other',
       'COMMUNITYRECOVERY', 'EVENT', 'GRIEF', 'GRIEF_Individual',
       'GRIEF_Community', 'GRIEF_Societal', 'HERO', 'INVESTIGATION', 'JOURNEY',
       'JOURNEY_Mental', 'JOURNEY_Physical', 'LEGAL', 'MEDIA', 'MISCELLANEOUS',
       'MOURNING', 'MOURNING_Individual', 'MOURNING_Community',
       'MOURNING_Societal', 'PERPETRATOR', 'PHOTO', 'POLICY', 'POLICY_Guns',
       'POLICY_InfoSharing', 'POLICY_MentalHealth', 'POLICY_Other',
       'POLICY_VictimAdv', 'POLICY_OtherAdv', 'POLICY_Practice',
       'PRIVATESECTOR', 'RACECULTURE', 'RESOURCES', 'SAFETY',
       'SAFETY_Community', 'SAFETY_Individual', 'SAFETY_SchoolOrg',
       'SAFETY_Societal', 'SOCIALSUPPORT', 'THREAT', 'THREAT_Assessment',
       'TRAUMA', 'TRAUMA_Physical', 'TRAUMA_Psychological',
       'TRAUMA_Individual', 'TRAUMA_Community', 'TRAUMA_Societal', 'VICTIMS'],
      dtype='object')

In [5]:
data.shape

(8131, 53)

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8131 entries, 0 to 8130
Data columns (total 53 columns):
StoryID                  8131 non-null object
Excerpt                  8130 non-null object
CodesApplied_Combined    8128 non-null object
ACCOUNT                  8131 non-null int64
ACCOUNT_Cultural         8131 non-null int64
ACCOUNT_Individual       8131 non-null int64
ACCOUNT_Other            8131 non-null int64
COMMUNITYRECOVERY        8131 non-null int64
EVENT                    8131 non-null int64
GRIEF                    8131 non-null int64
GRIEF_Individual         8131 non-null int64
GRIEF_Community          8131 non-null int64
GRIEF_Societal           8131 non-null int64
HERO                     8131 non-null int64
INVESTIGATION            8131 non-null int64
JOURNEY                  8131 non-null int64
JOURNEY_Mental           8131 non-null int64
JOURNEY_Physical         8131 non-null int64
LEGAL                    8131 non-null int64
MEDIA                    8131 non-n

## DATA ANALYSIS 

### NUMBER OF WORDS CALCULATION 

In [7]:
data['word_count'] = data['Excerpt'].apply(lambda x: len(str(x).split(" ")))
data[['Excerpt','word_count']].head()

Unnamed: 0,Excerpt,word_count
0,"Are guns the problem, video\ngames, the increa...",50
1,"May 23, 2014, in Isla Vista, California. 22-ye...",138
2,A 22-year-old student last Friday killed six p...,22
3,A 22-year-old student last Friday killed six p...,57
4,Elliot Rodger was not a typical man � few of u...,44


### NUMBER OF CHARACTER CALCULATION 

In [8]:
data['char_count'] = data['Excerpt'].str.len() ## this also includes spaces
data[['Excerpt','char_count']].head()

Unnamed: 0,Excerpt,char_count
0,"Are guns the problem, video\ngames, the increa...",279.0
1,"May 23, 2014, in Isla Vista, California. 22-ye...",848.0
2,A 22-year-old student last Friday killed six p...,121.0
3,A 22-year-old student last Friday killed six p...,358.0
4,Elliot Rodger was not a typical man � few of u...,238.0


## BASIC PRE-PROCESSING 

### REMOVING PUNCTUATION 

In [9]:
data['Excerpt'] = data['Excerpt'].str.replace('[^\w\s]','')
data['Excerpt'].head()

0    Are guns the problem video\ngames the increase...
1    May 23 2014 in Isla Vista California 22yearold...
2    A 22yearold student last Friday killed six peo...
3    A 22yearold student last Friday killed six peo...
4    Elliot Rodger was not a typical man  few of us...
Name: Excerpt, dtype: object

### SPELLING CORRECTION 

In [13]:
from textblob import TextBlob
data['Excerpt'][:5].apply(lambda x: str(TextBlob(x).correct()))

0    Are guns the problem video\ngames the increase...
1    May 23 2014 in Sea Vista California 22yearold ...
2    A 22yearold student last Friday killed six peo...
3    A 22yearold student last Friday killed six peo...
4    Allot Lodger was not a typical man  few of us ...
Name: Excerpt, dtype: object

### PREPROCESSING 

In [24]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/spriyanshu723/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [25]:
stopword = nltk.corpus.stopwords.words('english')

In [26]:
#stop_words = stopwords.words("english")
#stemmer = SnowballStemmer("english")
ps = nltk.PorterStemmer()
stop_words = stopword
stemmer = ps

In [27]:
TEXT_CLEANING_RE = "@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+"

In [28]:
def preprocess(text, stem=False):
    # Remove link,user and special characters
    text = re.sub(TEXT_CLEANING_RE, ' ', str(text).lower()).strip()
    tokens = []
    for token in text.split():
        if token not in stop_words:
            if stem:
                tokens.append(stemmer.stem(token))
            else:
                tokens.append(token)
    return " ".join(tokens)

In [29]:
data['Excerpt_pre'] = data['Excerpt'].apply(lambda x: preprocess(x))
data.head()

Unnamed: 0,StoryID,Excerpt,CodesApplied_Combined,ACCOUNT,ACCOUNT_Cultural,ACCOUNT_Individual,ACCOUNT_Other,COMMUNITYRECOVERY,EVENT,GRIEF,...,TRAUMA,TRAUMA_Physical,TRAUMA_Psychological,TRAUMA_Individual,TRAUMA_Community,TRAUMA_Societal,VICTIMS,word_count,char_count,Excerpt_pre
7992,NI3079,Elliot Rodger Wangs roommate stabbed Wang and ...,EVENT,0,0,0,0,99,1,0,...,0,0,0,99,99,99,0,34,215.0,elliot rodger wangs roommate stabbed wang two ...
4686,NI2689,Because the tragedy unfolded at the start of a...,RESOURCES,0,0,0,0,99,0,0,...,0,0,0,99,99,99,0,32,228.0,tragedy unfolded start holiday weekend school ...
6130,NI2400,Gunrelated restraining orders Also passed in r...,"POLICY, POLICY- Guns, POLICY- Mental health",0,0,0,0,99,0,0,...,0,0,0,99,99,99,0,122,734.0,gunrelated restraining orders also passed resp...
5876,NI1393,Linder said she will also remember Weiss for h...,"VICTIMS, GRIEF/LOSS",0,0,0,0,99,0,1,...,0,0,0,99,99,99,1,100,499.0,linder said also remember weiss epic shot pool...
4126,NI3259,I mean I know how this goes We all do Werent y...,"ACCOUNTABILITY, ACCOUNT- Culture/societal risk...",1,1,0,0,99,0,0,...,0,0,0,99,99,99,0,85,425.0,mean know goes werent sort expecting father on...


### TOKENIZATION 

In [30]:
def tokenization(text):
    text = re.split('\W+', text)
    return text

In [31]:
data['Excerpt_tokenized'] = data['Excerpt_pre'].apply(lambda x: tokenization(x.lower()))
data.head()

Unnamed: 0,StoryID,Excerpt,CodesApplied_Combined,ACCOUNT,ACCOUNT_Cultural,ACCOUNT_Individual,ACCOUNT_Other,COMMUNITYRECOVERY,EVENT,GRIEF,...,TRAUMA_Physical,TRAUMA_Psychological,TRAUMA_Individual,TRAUMA_Community,TRAUMA_Societal,VICTIMS,word_count,char_count,Excerpt_pre,Excerpt_tokenized
7992,NI3079,Elliot Rodger Wangs roommate stabbed Wang and ...,EVENT,0,0,0,0,99,1,0,...,0,0,99,99,99,0,34,215.0,elliot rodger wangs roommate stabbed wang two ...,"[elliot, rodger, wangs, roommate, stabbed, wan..."
4686,NI2689,Because the tragedy unfolded at the start of a...,RESOURCES,0,0,0,0,99,0,0,...,0,0,99,99,99,0,32,228.0,tragedy unfolded start holiday weekend school ...,"[tragedy, unfolded, start, holiday, weekend, s..."
6130,NI2400,Gunrelated restraining orders Also passed in r...,"POLICY, POLICY- Guns, POLICY- Mental health",0,0,0,0,99,0,0,...,0,0,99,99,99,0,122,734.0,gunrelated restraining orders also passed resp...,"[gunrelated, restraining, orders, also, passed..."
5876,NI1393,Linder said she will also remember Weiss for h...,"VICTIMS, GRIEF/LOSS",0,0,0,0,99,0,1,...,0,0,99,99,99,1,100,499.0,linder said also remember weiss epic shot pool...,"[linder, said, also, remember, weiss, epic, sh..."
4126,NI3259,I mean I know how this goes We all do Werent y...,"ACCOUNTABILITY, ACCOUNT- Culture/societal risk...",1,1,0,0,99,0,0,...,0,0,99,99,99,0,85,425.0,mean know goes werent sort expecting father on...,"[mean, know, goes, werent, sort, expecting, fa..."


### STOP WORDS REMOVAL 

In [32]:
stopword = nltk.corpus.stopwords.words('english')

In [33]:
def remove_stopwords(text):
    text = [word for word in text if word not in stopword]
    return text

In [34]:
data['Excerpt_nonstop'] = data['Excerpt_tokenized'].apply(lambda x: remove_stopwords(x))
data.head()

Unnamed: 0,StoryID,Excerpt,CodesApplied_Combined,ACCOUNT,ACCOUNT_Cultural,ACCOUNT_Individual,ACCOUNT_Other,COMMUNITYRECOVERY,EVENT,GRIEF,...,TRAUMA_Psychological,TRAUMA_Individual,TRAUMA_Community,TRAUMA_Societal,VICTIMS,word_count,char_count,Excerpt_pre,Excerpt_tokenized,Excerpt_nonstop
7992,NI3079,Elliot Rodger Wangs roommate stabbed Wang and ...,EVENT,0,0,0,0,99,1,0,...,0,99,99,99,0,34,215.0,elliot rodger wangs roommate stabbed wang two ...,"[elliot, rodger, wangs, roommate, stabbed, wan...","[elliot, rodger, wangs, roommate, stabbed, wan..."
4686,NI2689,Because the tragedy unfolded at the start of a...,RESOURCES,0,0,0,0,99,0,0,...,0,99,99,99,0,32,228.0,tragedy unfolded start holiday weekend school ...,"[tragedy, unfolded, start, holiday, weekend, s...","[tragedy, unfolded, start, holiday, weekend, s..."
6130,NI2400,Gunrelated restraining orders Also passed in r...,"POLICY, POLICY- Guns, POLICY- Mental health",0,0,0,0,99,0,0,...,0,99,99,99,0,122,734.0,gunrelated restraining orders also passed resp...,"[gunrelated, restraining, orders, also, passed...","[gunrelated, restraining, orders, also, passed..."
5876,NI1393,Linder said she will also remember Weiss for h...,"VICTIMS, GRIEF/LOSS",0,0,0,0,99,0,1,...,0,99,99,99,1,100,499.0,linder said also remember weiss epic shot pool...,"[linder, said, also, remember, weiss, epic, sh...","[linder, said, also, remember, weiss, epic, sh..."
4126,NI3259,I mean I know how this goes We all do Werent y...,"ACCOUNTABILITY, ACCOUNT- Culture/societal risk...",1,1,0,0,99,0,0,...,0,99,99,99,0,85,425.0,mean know goes werent sort expecting father on...,"[mean, know, goes, werent, sort, expecting, fa...","[mean, know, goes, werent, sort, expecting, fa..."


### STEMMING 

In [35]:
ps = nltk.PorterStemmer()

In [36]:
def stemming(text):
    text = [ps.stem(word) for word in text]
    return text

In [37]:
import sys
sys.setrecursionlimit(15000)

In [38]:
data['Excerpt_stemmed'] = data['Excerpt_nonstop'].apply(lambda x: stemming(x))
data.head()

Unnamed: 0,StoryID,Excerpt,CodesApplied_Combined,ACCOUNT,ACCOUNT_Cultural,ACCOUNT_Individual,ACCOUNT_Other,COMMUNITYRECOVERY,EVENT,GRIEF,...,TRAUMA_Individual,TRAUMA_Community,TRAUMA_Societal,VICTIMS,word_count,char_count,Excerpt_pre,Excerpt_tokenized,Excerpt_nonstop,Excerpt_stemmed
7992,NI3079,Elliot Rodger Wangs roommate stabbed Wang and ...,EVENT,0,0,0,0,99,1,0,...,99,99,99,0,34,215.0,elliot rodger wangs roommate stabbed wang two ...,"[elliot, rodger, wangs, roommate, stabbed, wan...","[elliot, rodger, wangs, roommate, stabbed, wan...","[elliot, rodger, wang, roommat, stab, wang, tw..."
4686,NI2689,Because the tragedy unfolded at the start of a...,RESOURCES,0,0,0,0,99,0,0,...,99,99,99,0,32,228.0,tragedy unfolded start holiday weekend school ...,"[tragedy, unfolded, start, holiday, weekend, s...","[tragedy, unfolded, start, holiday, weekend, s...","[tragedi, unfold, start, holiday, weekend, sch..."
6130,NI2400,Gunrelated restraining orders Also passed in r...,"POLICY, POLICY- Guns, POLICY- Mental health",0,0,0,0,99,0,0,...,99,99,99,0,122,734.0,gunrelated restraining orders also passed resp...,"[gunrelated, restraining, orders, also, passed...","[gunrelated, restraining, orders, also, passed...","[gunrel, restrain, order, also, pass, respons,..."
5876,NI1393,Linder said she will also remember Weiss for h...,"VICTIMS, GRIEF/LOSS",0,0,0,0,99,0,1,...,99,99,99,1,100,499.0,linder said also remember weiss epic shot pool...,"[linder, said, also, remember, weiss, epic, sh...","[linder, said, also, remember, weiss, epic, sh...","[linder, said, also, rememb, weiss, epic, shot..."
4126,NI3259,I mean I know how this goes We all do Werent y...,"ACCOUNTABILITY, ACCOUNT- Culture/societal risk...",1,1,0,0,99,0,0,...,99,99,99,0,85,425.0,mean know goes werent sort expecting father on...,"[mean, know, goes, werent, sort, expecting, fa...","[mean, know, goes, werent, sort, expecting, fa...","[mean, know, goe, werent, sort, expect, father..."


###  LAMMITIZATION 

In [39]:
wn = nltk.WordNetLemmatizer()

In [40]:
def lemmatizer(text):
    text = [wn.lemmatize(word) for word in text]
    return text

In [41]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     /home/spriyanshu723/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [43]:
data['Excerpt_lemmatized'] = data['Excerpt_nonstop'].apply(lambda x: lemmatizer(x))
data.head()

Unnamed: 0,StoryID,Excerpt,CodesApplied_Combined,ACCOUNT,ACCOUNT_Cultural,ACCOUNT_Individual,ACCOUNT_Other,COMMUNITYRECOVERY,EVENT,GRIEF,...,TRAUMA_Community,TRAUMA_Societal,VICTIMS,word_count,char_count,Excerpt_pre,Excerpt_tokenized,Excerpt_nonstop,Excerpt_stemmed,Excerpt_lemmatized
7992,NI3079,Elliot Rodger Wangs roommate stabbed Wang and ...,EVENT,0,0,0,0,99,1,0,...,99,99,0,34,215.0,elliot rodger wangs roommate stabbed wang two ...,"[elliot, rodger, wangs, roommate, stabbed, wan...","[elliot, rodger, wangs, roommate, stabbed, wan...","[elliot, rodger, wang, roommat, stab, wang, tw...","[elliot, rodger, wangs, roommate, stabbed, wan..."
4686,NI2689,Because the tragedy unfolded at the start of a...,RESOURCES,0,0,0,0,99,0,0,...,99,99,0,32,228.0,tragedy unfolded start holiday weekend school ...,"[tragedy, unfolded, start, holiday, weekend, s...","[tragedy, unfolded, start, holiday, weekend, s...","[tragedi, unfold, start, holiday, weekend, sch...","[tragedy, unfolded, start, holiday, weekend, s..."
6130,NI2400,Gunrelated restraining orders Also passed in r...,"POLICY, POLICY- Guns, POLICY- Mental health",0,0,0,0,99,0,0,...,99,99,0,122,734.0,gunrelated restraining orders also passed resp...,"[gunrelated, restraining, orders, also, passed...","[gunrelated, restraining, orders, also, passed...","[gunrel, restrain, order, also, pass, respons,...","[gunrelated, restraining, order, also, passed,..."
5876,NI1393,Linder said she will also remember Weiss for h...,"VICTIMS, GRIEF/LOSS",0,0,0,0,99,0,1,...,99,99,1,100,499.0,linder said also remember weiss epic shot pool...,"[linder, said, also, remember, weiss, epic, sh...","[linder, said, also, remember, weiss, epic, sh...","[linder, said, also, rememb, weiss, epic, shot...","[linder, said, also, remember, wei, epic, shot..."
4126,NI3259,I mean I know how this goes We all do Werent y...,"ACCOUNTABILITY, ACCOUNT- Culture/societal risk...",1,1,0,0,99,0,0,...,99,99,0,85,425.0,mean know goes werent sort expecting father on...,"[mean, know, goes, werent, sort, expecting, fa...","[mean, know, goes, werent, sort, expecting, fa...","[mean, know, goe, werent, sort, expect, father...","[mean, know, go, werent, sort, expecting, fath..."


### CLEANING 

In [44]:
def clean_text(text):
    text_lc = "".join([word.lower() for word in text if word not in string.punctuation]) # remove puntuation
    text_rc = re.sub('[0-9]+', '', text_lc)
    tokens = re.split('\W+', text_rc)    # tokenization
    text = [ps.stem(word) for word in tokens if word not in stopword]  # remove stopwords and stemming
    return text

In [46]:
data['Excerpt_clean'] = data['Excerpt_pre'].apply(lambda x: clean_text(x))
data.head()

Unnamed: 0,StoryID,Excerpt,CodesApplied_Combined,ACCOUNT,ACCOUNT_Cultural,ACCOUNT_Individual,ACCOUNT_Other,COMMUNITYRECOVERY,EVENT,GRIEF,...,TRAUMA_Societal,VICTIMS,word_count,char_count,Excerpt_pre,Excerpt_tokenized,Excerpt_nonstop,Excerpt_stemmed,Excerpt_lemmatized,Excerpt_clean
7992,NI3079,Elliot Rodger Wangs roommate stabbed Wang and ...,EVENT,0,0,0,0,99,1,0,...,99,0,34,215.0,elliot rodger wangs roommate stabbed wang two ...,"[elliot, rodger, wangs, roommate, stabbed, wan...","[elliot, rodger, wangs, roommate, stabbed, wan...","[elliot, rodger, wang, roommat, stab, wang, tw...","[elliot, rodger, wangs, roommate, stabbed, wan...","[elliot, rodger, wang, roommat, stab, wang, tw..."
4686,NI2689,Because the tragedy unfolded at the start of a...,RESOURCES,0,0,0,0,99,0,0,...,99,0,32,228.0,tragedy unfolded start holiday weekend school ...,"[tragedy, unfolded, start, holiday, weekend, s...","[tragedy, unfolded, start, holiday, weekend, s...","[tragedi, unfold, start, holiday, weekend, sch...","[tragedy, unfolded, start, holiday, weekend, s...","[tragedi, unfold, start, holiday, weekend, sch..."
6130,NI2400,Gunrelated restraining orders Also passed in r...,"POLICY, POLICY- Guns, POLICY- Mental health",0,0,0,0,99,0,0,...,99,0,122,734.0,gunrelated restraining orders also passed resp...,"[gunrelated, restraining, orders, also, passed...","[gunrelated, restraining, orders, also, passed...","[gunrel, restrain, order, also, pass, respons,...","[gunrelated, restraining, order, also, passed,...","[gunrel, restrain, order, also, pass, respons,..."
5876,NI1393,Linder said she will also remember Weiss for h...,"VICTIMS, GRIEF/LOSS",0,0,0,0,99,0,1,...,99,1,100,499.0,linder said also remember weiss epic shot pool...,"[linder, said, also, remember, weiss, epic, sh...","[linder, said, also, remember, weiss, epic, sh...","[linder, said, also, rememb, weiss, epic, shot...","[linder, said, also, remember, wei, epic, shot...","[linder, said, also, rememb, weiss, epic, shot..."
4126,NI3259,I mean I know how this goes We all do Werent y...,"ACCOUNTABILITY, ACCOUNT- Culture/societal risk...",1,1,0,0,99,0,0,...,99,0,85,425.0,mean know goes werent sort expecting father on...,"[mean, know, goes, werent, sort, expecting, fa...","[mean, know, goes, werent, sort, expecting, fa...","[mean, know, goe, werent, sort, expect, father...","[mean, know, go, werent, sort, expecting, fath...","[mean, know, goe, werent, sort, expect, father..."


In [47]:
data.columns

Index(['StoryID', 'Excerpt', 'CodesApplied_Combined', 'ACCOUNT',
       'ACCOUNT_Cultural', 'ACCOUNT_Individual', 'ACCOUNT_Other',
       'COMMUNITYRECOVERY', 'EVENT', 'GRIEF', 'GRIEF_Individual',
       'GRIEF_Community', 'GRIEF_Societal', 'HERO', 'INVESTIGATION', 'JOURNEY',
       'JOURNEY_Mental', 'JOURNEY_Physical', 'LEGAL', 'MEDIA', 'MISCELLANEOUS',
       'MOURNING', 'MOURNING_Individual', 'MOURNING_Community',
       'MOURNING_Societal', 'PERPETRATOR', 'PHOTO', 'POLICY', 'POLICY_Guns',
       'POLICY_InfoSharing', 'POLICY_MentalHealth', 'POLICY_Other',
       'POLICY_VictimAdv', 'POLICY_OtherAdv', 'POLICY_Practice',
       'PRIVATESECTOR', 'RACECULTURE', 'RESOURCES', 'SAFETY',
       'SAFETY_Community', 'SAFETY_Individual', 'SAFETY_SchoolOrg',
       'SAFETY_Societal', 'SOCIALSUPPORT', 'THREAT', 'THREAT_Assessment',
       'TRAUMA', 'TRAUMA_Physical', 'TRAUMA_Psychological',
       'TRAUMA_Individual', 'TRAUMA_Community', 'TRAUMA_Societal', 'VICTIMS',
       'word_count', 'char_

In [48]:
data.to_csv('redhen_preprocessed.csv')