-
Notifications
You must be signed in to change notification settings - Fork 3
/
LIWC-22-cli_Example.py
214 lines (149 loc) · 10.1 KB
/
LIWC-22-cli_Example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Ryan L. Boyd
# 2022-03-17
# _ _______ _______ ___ ___ _ _ ______ _
# | | |_ _\ \ / / ____| |__ \|__ \ | (_) | ____| | |
# | | | | \ \ /\ / | | ______ ) | ) ______ ___| |_ | |__ __ ____ _ _ __ ___ _ __ | | ___
# | | | | \ \/ \/ /| | |______/ / / |______/ __| | | | __| \ \/ / _` | '_ ` _ \| '_ \| |/ _ \
# | |____ _| |_ \ /\ / | |____ / /_ / /_ | (__| | | | |____ > | (_| | | | | | | |_) | | __/
# |______|_____| \/ \/ \_____| |____|____| \___|_|_| |______/_/\_\__,_|_| |_| |_| .__/|_|\___|
# | |
# |_|
"""
This is an example script that demonstrates how to make a call to the LIWC-22 command line interface (CLI)
from Python. Briefly described, what we want to do is launch the CLI application as a subprocess, then wait
for that subprocess to finish.
This is a very crude example script, so please feel free to improve/innovate on this example :) """
# Make sure that you have the LIWC-22.exe GUI running — it is required for the CLI to function correctly :)
# Make sure that you have the LIWC-22.exe GUI running — it is required for the CLI to function correctly :)
# Make sure that you have the LIWC-22.exe GUI running — it is required for the CLI to function correctly :)
# Make sure that you have the LIWC-22.exe GUI running — it is required for the CLI to function correctly :)
import subprocess
# ______ _ _ _ _ _ _________ _________ ______ _ _
# | ____| | | | | (_| | | | |__ __\ \ / |__ __| | ____(_| |
# | |__ ___ | | __| | ___ _ __ __ ___| |_| |__ | | \ V / | | | |__ _| | ___ ___
# | __/ _ \| |/ _` |/ _ | '__| \ \ /\ / | | __| '_ \ | | > < | | | __| | | |/ _ / __|
# | | | (_) | | (_| | __| | \ V V /| | |_| | | | | | / . \ | | | | | | | __\__ \
# |_| \___/|_|\__,_|\___|_| \_/\_/ |_|\__|_| |_| |_| /_/ \_\ |_| |_| |_|_|\___|___/
inputFolderTXT = "C:/Users/Ryan/Datasets/TED - English Only - TXT Files/"
outputLocation = "C:/Users/Ryan/Datasets/TED Talk TXT Files - Analyzed.csv"
# This command will read texts from a folder, analyze them using the standard "Word Count" LIWC analysis,
# then save our output to a specified location.
cmd_to_execute = ["LIWC-22-cli",
"--mode", "wc",
"--input", inputFolderTXT,
"--output", outputLocation]
# Let's go ahead and run this analysis:
subprocess.call(cmd_to_execute)
# We will see the following in the terminal as it begins working:
#
# Picked up JAVA_TOOL_OPTIONS: -Dfile.encoding=UTF-8
# Processing:
# - [folder] C:\Users\Ryan\Datasets\TED - English Only - TXT Files
# [=================== ] 47.75%; Number of Texts Analyzed: 1304; Total Words Analyzed: 2.62M
# A thing of beauty, to be sure. What if we want to process our texts using an older LIWC dictionary,
# or an external dictionary file? This can be done easily as well.
# We can specify whether we want to use the LIWC2001, LIWC2007, LIWC2015,
# or LIWC22 dictionary with the --dictionary argument.
liwcDict = "LIWC2015"
# Alternatively, you can specify the absolute path to an external dictionary
# file that you would like to use, and LIWC will load this dictionary for processing.
#liwcDict = "C:/Users/Ryan/Dictionaries/Personal Values Dictionary.dicx"
# Let's update our output location as well so that we don't overwrite our previous file.
outputLocation = "C:/Users/Ryan/Datasets/TED Talk TXT Files - Analyzed (LIWC2015).csv"
cmd_to_execute = ["LIWC-22-cli",
"--mode", "wc",
"--dictionary", liwcDict,
"--input", inputFolderTXT,
"--output", outputLocation]
subprocess.call(cmd_to_execute)
# _____ _______ __ ______ _ _
# / ____|/ ____\ \ / / | ____(_| |
# | | | (___ \ \ / / | |__ _| | ___
# | | \___ \ \ \/ / | __| | | |/ _ \
# | |____ ____) | \ / | | | | | __/
# \_____|_____/ \/ |_| |_|_|\___|
# Beautiful. Now, let's do the same thing, but analyzing a CSV file full of the same texts.
inputFileCSV = 'C:/Users/Ryan/Datasets/TED Talk - English Transcripts.csv'
outputLocation = 'C:/Users/Ryan/Datasets/TED Talk CSV File - Analyzed.csv'
# We're going to use a variation on the command above. Since this is a CSV file, we want to include the indices of
# 1) the columns that include the text identifiers (although this is not required, it makes our data easier to merge later)
# 2) the columns that include the actual text that we want to analyze
#
# In my CSV file, the first column has the text identifiers, and the second column contains the text.
# For more complex datasets, please use the --help argument with LIWC-22 to learn more about how to process your text.
cmd_to_execute = ["LIWC-22-cli",
"--mode", "wc",
"--input", inputFileCSV,
"--row-id-indices", "1",
"--column-indices", "2",
"--output", outputLocation]
# Let's go ahead and run this analysis:
subprocess.call(cmd_to_execute)
# We will see the following in the terminal as LIWC does its magic:
# Picked up JAVA_TOOL_OPTIONS: -Dfile.encoding=UTF-8
# Processing:
# - [file] C:\Users\Ryan\Datasets\TED Talk - English Transcripts.csv
# [========================================] 100.00%; Number of Rows Analyzed: 2737; Total Words Analyzed: 5.40M
# Done. Please examine results in C:\Users\Ryan\Datasets\TED Talk CSV File - Analyzed.csv
# _ _____ _ _
# /\ | | / ____| | (_)
# / \ _ __ __ _| |_ _ _______ | (___ | |_ _ __ _ _ __ __ _
# / /\ \ | '_ \ / _` | | | | |_ / _ \ \___ \| __| '__| | '_ \ / _` |
# / ____ \| | | | (_| | | |_| |/ | __/ ____) | |_| | | | | | | (_| |
# /_/ \_|_| |_|\__,_|_|\__, /___\___| |_____/ \__|_| |_|_| |_|\__, |
# __/ | __/ |
# |___/ |___/
# What if we want to simply pass a string to the CLI for analysis? This is possible. As described on the
# Help section of the liwc.app website, this is generally not recommended as it will not be very performant.
#
# Also, of serious importance! Most command lines/terminals have a limit on the length of any string that it
# will parse. This means that you likely cannot analyze very long texts (e.g., like a long paper, speech,
# or book) by passing the text directly into the console. Instead, you will likely need to process your
# data directly from the disk instead.
#
# However, if you insist...
# The string that we would like to analyze.
inputString = "This is some text that I would like to analyze. After it has finished, I will say \"Thank you, LIWC!\""
# For this one, let's save our result as a newline-delimited json file (.ndjson)
outputLocation = 'C:/Users/Ryan/Datasets/LIWC-22 Results from String.ndjson'
cmd_to_execute = ["LIWC-22-cli",
"--mode", "wc",
"--input", "console",
"--console-text", inputString,
"--output", outputLocation]
# Let's go ahead and run this analysis:
subprocess.call(cmd_to_execute)
# The results from this analysis:
#{"Segment": 1,"WC": 20,"Analytic": 3.8,"Clout": 40.06,"Authentic": 28.56,"Tone": 99,"WPS": 10,"BigWords": 10,
#"Dic": 100, "Linguistic": 80,"function": 70,"pronoun": 30,"ppron": 15,"i": 10,"we": 0,"you": 5,"shehe": 0,"they": 0,
#"ipron": 15,"det": 15,"article": 0,"number": 0,"prep": 15,"auxverb": 20,"adverb": 0,"conj": 5,"negate": 0,
#"verb": 35,"adj": 0,"quantity": 5,"Drives": 5,"affiliation": 0,"achieve": 5,"power": 0,"Cognition": 15,
#"allnone": 0,"cogproc": 15,"insight": 5,"cause": 0,"discrep": 10,"tentat": 0,"certitude": 0,"differ": 0,
#"memory": 0,"Affect": 15,"tone_pos": 15,"tone_neg": 0,"emotion": 10,"emo_pos": 10,"emo_neg": 0,"emo_anx": 0,
#"emo_anger": 0,"emo_sad": 0,"swear": 0,"Social": 20,"socbehav": 15,"prosocial": 5,"polite": 5,"conflict": 0,"moral": 0,
#"comm": 15,"socrefs": 5,"family": 0,"friend": 0,"female": 0,"male": 0,"Culture": 5,"politic": 0,"ethnicity": 0,"
#tech": 5,"Lifestyle": 0,"leisure": 0,"home": 0,"work": 0,"money": 0,"relig": 0,"Physical": 0,"health": 0,"illness": 0,
#"wellness": 0,"mental": 0,"substances": 0,"sexual": 0,"food": 0,"death": 0,"need": 0,"want": 0,"acquire": 0,"lack": 0,
#"fulfill": 0,"fatigue": 0,"reward": 0,"risk": 0,"curiosity": 0,"allure": 0,"Perception": 0,"attention": 0,"motion": 0,
#"space": 0,"visual": 0,"auditory": 0,"feeling": 0,"time": 10,"focuspast": 0,"focuspresent": 10,"focusfuture": 5,
#"Conversation": 0,"netspeak": 0,"assent": 0,"nonflu": 0,"filler": 0,
#"AllPunc": 30,"Period": 5,"Comma": 10,"QMark": 0,"Exclam": 5,"Apostro": 0,"OtherP": 10}
# And, lastly — what if we want to get the output directly from the command line or terminal as a json string?
# Why, we can do that too!
inputString = "This is some text that I would like to analyze. After it has finished," \
" we will get results in the console. Hooray!"
outputLocation = "console"
cmd_to_execute = ["LIWC-22-cli",
"--mode", "wc",
"--input", "console",
"--console-text", inputString,
"--output", outputLocation]
# Let's go ahead and run this analysis. We do this somewhat differently than what we've been doing, however.
# This will end up giving us a list, where each element is a line of output from the console.
results = subprocess.check_output(cmd_to_execute, shell=True).strip().splitlines()
# In this case, the item that we want to parse from a json to a Python dictionary is in results[1], so we will
# go right ahead and parse that to a dictionary now:
import json
results_json = json.loads(results[1])