### This module creates feature files which have the linguistic features                                                                              <br>The linguistic features used are :
SentimentScore<br>
CountPunc    -     count of total punctuations<br>
Readability<br>
CountWord    -     count of total words<br>

In [1]:
#make necessary imports
import os
import random
import glob
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer 
import string
import textstat as ts

#go to the desired directory
os.chdir("../datasets")

In [2]:
#utility function to save the file in csv
def savetocsv(df,path):
    df.to_csv(path)

In [3]:
#utility function to get the file in form of a pandas dataframe
def getframe(path):
    df=pd.read_csv(path, sep='\t', header=None)
    df=df.drop([0,3,4,5,6,7,8,9,10,11,12,13], axis=1)
    df=df.drop_duplicates()
    df.loc[(df[1] == 'false') | (df[1] == 'pants-fire') | (df[1] == 'barely-true') , 1] = 0
    df.loc[df[1] != 0 , 1] = 1
    df=df.rename(columns={1:'label',2:'statement'})
    columns_titles = ["statement","label"]
    df=df.reindex(columns=columns_titles)
    return df

In [4]:
#features - SentimentScore
def SentimentScore(df):
    df['SentimentScore'] = df['statement'].map(lambda x:SentimentIntensityAnalyzer().polarity_scores(x)['compound'])
    return df

In [5]:
#features - count of punctuations
def CountPunc(df):
    count = lambda l1,l2: sum([1 for x in l1 if x in l2])
    df['CountPunc'] = df.statement.apply(lambda s: count(s, string.punctuation))
    return df

In [6]:
#features - readability
def Readability(df):
    df['Readability'] = df['statement'].map(lambda x:ts.automated_readability_index(x))
    return df

In [7]:
#features - countword
def CountWord(df):
    df['CountWord'] = df['statement'].map(lambda x:len(x.split()))
    return df

In [8]:
#utility function to add the linguistic features
def addfeatures(df):
    df=SentimentScore(df)
    df=CountPunc(df)
    df=Readability(df)
    df=CountWord(df)
    return df

In [9]:
#utility function to create files containing linguistic features
def createfiles(path):
    df=getframe(path)
    df=addfeatures(df)
    x=path.find('.')
    df.to_csv('liar_tweaked' + path[13:x] + 'featuredata.csv')
    return df

In [10]:
#calling the utility function to create files with the linguistic features
createfiles('liar_original/train.tsv')
createfiles('liar_original/test.tsv')
createfiles('liar_original/valid.tsv')

Unnamed: 0,statement,label,SentimentScore,CountPunc,Readability,CountWord
0,We have less Americans working now than in the...,0,0.0000,1,3.4,10
1,"When Obama was sworn into office, he DID NOT u...",0,0.0000,6,13.8,26
2,Says Having organizations parading as being so...,0,-0.3400,1,20.1,32
3,Says nearly half of Oregons children are poor.,1,-0.4767,1,5.6,8
4,On attacks by Republicans that various program...,1,-0.4404,5,17.6,33
...,...,...,...,...,...,...
1279,"For the first time in more than a decade, impo...",1,0.0000,4,8.6,20
1280,Says Donald Trump has bankrupted his companies...,1,0.0000,2,8.8,14
1281,"John McCain and George Bush have ""absolutely n...",1,-0.3597,3,9.7,13
1282,A new poll shows 62 percent support the presid...,0,0.7096,6,10.7,33
