In [None]:
#Libraries
import gspread
import json
import re
import string
import os
#import nltk #if running for the first time you will need to download the nltk data with nltk.download()

#Aliased libraries
import pandas as pd
import matplotlib.pyplot as plt

#Modules
#from nltk.corpus import stopwords
from oauth2client.service_account import ServiceAccountCredentials
from math import pi
from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 \
  import Features, EntitiesOptions, KeywordsOptions, SentimentOptions, EmotionOptions

#Watson credentials
with open('Authorization/watson_client_secret.json') as json_key:
    watson_key = json.load(json_key)
    
apikey = watson_key['apikey'] 
watson_url = watson_key['url']

natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2018-10-29',
    url=watson_url,
    iam_apikey=apikey
)

#authorize google sheets
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']

credentials = ServiceAccountCredentials.from_json_keyfile_name('Authorization/google_client_secret.json', scope)

gc = gspread.authorize(credentials)

wks = gc.open("watsonForms (Responses)").sheet1

In [None]:
# Extract values and put into dataframe
data = wks.get_all_records()

#make dataframes
df = pd.DataFrame(data, columns=data[0].keys())

#Rename the question columns to something easier
df.columns.values[1] = 'q1_text'
df.columns.values[2] = 'q2_text'
df.columns.values[3] = 'q3_text'

#show the data frame
df

In [None]:
#watson analysis of text columns
def analysis(textcol, name):
    name= str(name)+'_'
    for item in textcol:
        #create lists to store response data
        
        outputData = pd.DataFrame() #creates a new dataframe that's empty
        try:
            response = natural_language_understanding.analyze(
              text=item,
              features=Features(
                sentiment=SentimentOptions(),
                emotion=EmotionOptions(),
              ))
            response = response.result
            outputData[name+'usage_units'] = pd.Series(response["usage"]["text_units"])
            outputData[name+'feats_used'] = pd.Series(response["usage"]["features"])
            outputData[name+'senti'] = pd.Series(response["sentiment"]["document"]["score"])
            outputData[name+'senti_label'] = pd.Series(response["sentiment"]["document"]["label"])
            outputData[name+'lang'] = pd.Series(response["language"])
            outputData[name+'sad'] = pd.Series(response["emotion"]["document"]["emotion"]["sadness"])
            outputData[name+'joy'] = pd.Series(response["emotion"]["document"]["emotion"]["joy"])
            outputData[name+'fear'] = pd.Series(response["emotion"]["document"]["emotion"]["fear"])
            outputData[name+'disgust'] = pd.Series(response["emotion"]["document"]["emotion"]["disgust"])
            outputData[name+'anger'] = pd.Series(response["emotion"]["document"]["emotion"]["anger"])
        except KeyError:
            outputData[name+'usage_units'] = pd.Series(response["usage"]["text_units"])
            outputData[name+'feats_used'] = pd.Series(response["usage"]["features"])
            outputData[name+'senti'] = pd.Series(response["sentiment"]["document"]["score"])
            outputData[name+'senti_label'] = pd.Series(response["sentiment"]["document"]["label"])
            outputData[name+'lang'] = pd.Series("Null")
            outputData[name+'sad'] = pd.Series("Null")
            outputData[name+'joy'] = pd.Series("Null")
            outputData[name+'fear'] = pd.Series("Null")
            outputData[name+'disgust'] = pd.Series("Null")
            outputData[name+'anger'] = pd.Series("Null")
        except:
            outputData[name+'usage_units'] = pd.Series("Null")
            outputData[name+'feats_used'] = pd.Series("Null")
            outputData[name+'senti'] = pd.Series("Null")
            outputData[name+'senti_label'] = pd.Series("Null")
            outputData[name+'lang'] = pd.Series("Null")
            outputData[name+'sad'] = pd.Series("Null")
            outputData[name+'joy'] = pd.Series("Null")
            outputData[name+'fear'] = pd.Series("Null")
            outputData[name+'disgust'] = pd.Series("Null")
            outputData[name+'anger'] = pd.Series("Null")
        
        if not os.path.isfile('Data/'+name+'watson_Analysis.csv'):
            outputData.to_csv('Data/'+name+'watson_Analysis.csv', header='column_names')
        else: # else it exists so append without writing the header
            outputData.to_csv('Data/'+name+'watson_Analysis.csv', mode='a', header=False)

In [None]:
#run the analysis
analysis(df['q1_text'],'q1')
analysis(df['q2_text'],'q2')
analysis(df['q3_text'],'q3')

In [None]:
#Load Data
df1 = pd.read_csv('Data/q1_watson_Analysis.csv', index_col=0).reset_index(drop=True)
df2 = pd.read_csv('Data/q2_watson_Analysis.csv', index_col=0).reset_index(drop=True)
df3 = pd.read_csv('Data/q3_watson_Analysis.csv', index_col=0).reset_index(drop=True)
df = pd.concat([df,df1,df2,df3],axis=1)

In [None]:
df

In [None]:
#calculate mean values for q emotions
mean_senti = pd.DataFrame.mean(df['q1_senti'])
mean_sad = pd.DataFrame.mean(df['q1_sad'])
mean_joy = pd.DataFrame.mean(df['q1_joy'])
mean_fear = pd.DataFrame.mean(df['q1_fear'])
mean_disg = pd.DataFrame.mean(df['q1_disgust'])
mean_ang = pd.DataFrame.mean(df['q1_anger'])

#create data for visualization
means = [mean_senti, mean_sad, mean_joy, mean_fear, mean_disg, mean_ang]

print(means)

In [None]:
# Set data
cols1 = ['Questions','Sadness','Joy','Fear','Disgust','Anger']
dfp = pd.DataFrame({
'Questions': ['Q1'],
'Sadness': [mean_sad],
'Joy': [mean_joy],
'Fear': [mean_fear],
'Disgust': [mean_disg],
'Anger': [mean_ang]
},columns=cols1)

# number of variable
categories=list(dfp)[1:]
N = len(categories)
 
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
values=dfp.loc[0].drop('Questions').values.flatten().tolist()
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]
 
# Initialise the spider plot
ax = plt.subplot(111, polar=True)
 
# Draw one axe per variable + add labels labels yet
plt.xticks(angles[:-1], categories, color='grey', size=15)
ax.tick_params(axis='x',pad=30)
 
# Draw ylabels
ax.set_rlabel_position(0)
plt.yticks([.25,.5,.75], [".25",".5",".75"], color="grey", size=10)
plt.ylim(0,1)
 
# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')
 
# Fill area
ax.fill(angles, values, 'b', alpha=0.1)

plt.show()


In [None]:
#sort by an emotional value and print in ascending order
dfs = df.sort_values(by=['q1_anger'],ascending=False)
for i in dfs['q1_text']:
    print(i)
print(dfs['q1_anger'])