In [1]:
import logging
import socket
import urllib
import subprocess
import os
import time
from joblib import Parallel, delayed

logging.basicConfig(level='INFO')
logger = logging.getLogger("/home/jara/prstuff/thesis/")

if not 'SentiStrengthCom.jar' in os.listdir('.'):
	logger.warning("You need 'SentiStrengthCom.jar' to use this wrapper!")
	logger.warning("because this version is not freely available, it was not packaged with this wrapper :-( ")
	logger.warning("get it from http://sentistrength.wlv.ac.uk/ by emailing Professor Thelwall")

class sentistrength():

    def __init__(self,language, port=8181):
        self.language = language
        self.sentistrength = ""
        self.port = port 

    def __del__(self):
        if self.sentistrength:
            os.killpg(self.sentistrength.pid,15)

    def run_server(self, language):
        if language!=self.language and self.sentistrength:
            logger.warning("wrong language running, trying to switch")
            os.killpg(self.sentistrength.pid,15)
            time.sleep(1)
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            sock.connect(('0.0.0.0',self.port))
        except ConnectionRefusedError:
            try:
                logger.info("server not found, trying to launch server")
                self.sentistrength = subprocess.Popen(["java -jar SentiStrengthCom.jar sentidata ./%s/ listen 8181 trinary" %language], shell=True, preexec_fn=os.setsid)
                time.sleep(1)
                sock.connect(('0.0.0.0',self.port))
                self.language = language
            except:
                raise Exception("unable to start server, is there a process already running? ")
        return sock
    

    def get_sentiment(self, string_to_code, language="EN"):
        url_encoded = urllib.parse.quote(string_to_code)
        request_string = "GET /%s HTTP/1.0 \r\n\r\n" %url_encoded
        sock = self.run_server(language)
        sock.sendall(str.encode(request_string,'UTF-8'))
        response = sock.recv(4096)
        resp_string = response.decode()
        positive, negative, score = resp_string.split()
        return {'positive':positive,'negative':negative,'neutral':score}

class multisent():
    def __init__(self, language, startport=8222, cores=-2, batchsize=1000):
        self.language  = language
        self.cores     = cores
        self.instances = []
        self.status    = "initialized"
        self.startport = startport
        self.batchsize = batchsize

    def __del__(self):
        self.stop_all()

    def _top_port(self):
        return max([instance['port'] for instance in self.instances]+[self.startport-1]) 
    
    def get_status(self):
        no_instances = len(self.instances)
        if not no_instances:
            if self.status!='initialized': 
                self.status='stopped'
        else:
            self.status = 'Running {no_instances} instances'.format(**locals())
        return self.status    

    def check_instances(self):
        if not self.instances:
            print('No instances to check')
        for instance in self.instances:
            port  = instance.get('port','UNKNOWN')
            pid   = instance.get('pid','UNKNOWN')
            works = check_exists(instance['port']) and "WORKS" or "FAILED"
            print("Instance {pid:5} at port {port:5} status {works:8}".format(**locals()))

    def start_server(self, port=None, attempts=5):
        if not port:
            port = self._top_port()+1
        if check_exists(port): 
            logger.info("server at {port} already exists!".format(**locals()))
            self.start_server(port+1)
            return 
        instance = subprocess.Popen(["java -jar SentiStrengthCom.jar sentidata ./%s/ listen %s trinary" %(self.language,port)], 
                                    shell=True, preexec_fn=os.setsid)
        while not check_exists(port):
            time.sleep(1)
            attempts -= 1
            if not attempts: 
                logger.warn('failed to start {language} server at port {port}'.format(**locals()))
                return False
        instance = {'instance':instance, 'pid':instance.pid, 'language':self.language,'port':port}
        logger.info("started instance {pid} at port {port}".format(**instance))
        self.instances.append(instance)
        return True

    def stop_server(self, port=None,pid=None):
        if port and pid:
            logger.warn("this function requires EITHER a port OR a pid, ignores pid if both")
        if port:
            instance = [instance for instance in self.instances if instance['port']==port]
        elif pid:
            instance = [instance for instance in self.instances if instance['pid']==pid]
        else:
            instance = self.instances

        if not instance:
            logger.warn("Instance not found!")
            return False
        instance = instance[0]
        
        os.killpg(instance['instance'].pid, 15)
        time.sleep(1)
        if not check_exists(instance['port']):
            logger.info('Stopped {pid} instance at port {port}'.format(**instance))
            self.instances.remove(instance)
            return True
        else:
            logger.warn('Unable to stop {pid} instance running at {port}!!'.format(**instance))
            return False

    def _loop_over(self, looped_iterable, fixed_iterable):
        iterator = 0
        for item in fixed_iterable:
            if iterator==len(looped_iterable):
                iterator=0
            yield looped_iterable[iterator], item
            iterator +=1
    
    def _batch_up(self, iterable):
        batch = []
        for num, item in enumerate(iterable):
            batch.append(item)
            if not (num+1) % self.batchsize :
                yield batch
                batch = []
        if batch: yield batch

    def start_all(self):
        if self.cores < 0:
            no_servers = os.cpu_count() + (self.cores+1)
        else:
            no_servers = self.cores
        logger.info('Starting {no_servers} servers in {self.language}'.format(**locals()))
        for i in range(no_servers):
            self.start_server()
        self.get_status()

    def stop_all(self):
        while self.instances:
            instance = self.instances[0]
            self.stop_server(pid=instance['pid'])

    def run_batch(self, texts):
        if not self.instances: 
            logger.info('No servers found, starting servers')
            self.start_all()
        ports = [instance['port'] for instance in self.instances]
        return Parallel(n_jobs=min(self.cores,len(ports)), backend='threading')(delayed(query_instance)(port,text) for port,text in self._loop_over(ports, texts))

    def run_stream(self, texts):
        for batch in self._batch_up(texts):
            for item in self.run_batch(batch):
                yield item

def query_instance(port, string_to_code):
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    try:
        sock.connect(('0.0.0.0',port))
    except:
        raise Exception("unable to reach server")
    url_encoded = urllib.parse.quote(string_to_code)
    request_string = "GET /%s HTTP/1.0 \r\n\r\n" %url_encoded
    sock.sendall(str.encode(request_string,'UTF-8'))
    response = sock.recv(4096)
    resp_string = response.decode()
    positive, negative, score = resp_string.split()
    return {'positive':positive,'negative':negative,'neutral':score}


def check_exists(port):
    try:
        query_instance(port,'test string')
    except:
        return False
    return True

In [2]:
senti = sentistrength("EN")
res = senti.get_sentiment('I love using sentistrength.')
print(res)

INFO:/home/jara/prstuff/thesis/:server not found, trying to launch server


{'neutral': '1', 'negative': '-1', 'positive': '3'}


In [3]:
ms = multisent('EN')
texts = ['This is great!!'] * 10000
res   = ms.run_batch(texts) # push all texts and get the results in one go
print(res[0])

INFO:/home/jara/prstuff/thesis/:No servers found, starting servers
INFO:/home/jara/prstuff/thesis/:Starting 1 servers in EN
INFO:/home/jara/prstuff/thesis/:started instance 11403 at port 8222


{'neutral': '1', 'negative': '-1', 'positive': '4'}


In [None]:
#####news artikelen########

In [24]:
#import cs & make list for titles and process it
import csv
titlelist=[]
processedlist_title=[]
with open("/home/jara/prstuff/thesis/output_orgnews/douwe.csv", encoding="utf-8",mode="r",newline="") as csvfile:
    reader = csv.reader(csvfile, delimiter=",")
    for row in reader:
        title=row[6]
        title_processed=title.lower().replace("!"," ").replace(")"," ").replace("|","").replace("."," ").replace("?"," ").replace("("," ").replace("’"," ").replace('"'," ").replace("#"," ").replace(":"," ")
        titlelist.append(title)
        processedlist_title.append(title)

In [25]:
#processedlist_title.append(article_processed)

In [26]:
#look at porcessedlist_title
print(processedlist_title[0])
len(processedlist_title)

_score


47

In [28]:
processedlist_title =processedlist_title[1:]
len(processedlist_title)
#print(processedlist_title)

46

In [37]:
sentiments_list=[]
neg=[]
pos=[]
senti = sentistrength("NL")
for t in processedlist_title:
    pi = senti.get_sentiment(t)
    print(pi)
    sentiments_list.append(pi)

INFO:/home/jara/prstuff/thesis/:server not found, trying to launch server


{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': '0', 'negative': '-1', 'positive': '1'}
{'neutral': 

In [38]:
#len moet 3 zijn
len(pi)

3

In [39]:
#print(sentiments_list)

In [40]:
print(sentiments_list[:2])
len(sentiments_list)

[{'neutral': '0', 'negative': '-1', 'positive': '1'}, {'neutral': '0', 'negative': '-1', 'positive': '1'}]


46

In [41]:
sentiments_list2=[list(col) for col in zip(*[d.values() for d in sentiments_list])]

In [42]:
#print(sentiments_list2)

In [43]:
pos_list=sentiments_list2[1]
neg_list=sentiments_list2[0]
len(pos_list)
#len(neg_list)

46

In [44]:
pos_list_int = list(map(int, pos_list))
neg_list_int = list(map(int, neg_list))

In [45]:
print(pos_list_int)

[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]


In [46]:
sentiment3 = [a + b for a, b in zip(neg_list_int, pos_list_int)]

In [47]:
x=sum(sentiment3)/len(sentiment3)
print(x)

-1.0
