# Aspect-based sentiment classification via [PyABSA](https://github.com/yangheng95/PyABSA)
More usages see [here](https://github.com/yangheng95/PyABSA/tree/release/demos/aspect_polarity_classification)

## Other References to PyABSA
```
https://www.youtube.com/watch?v=-UEU-HGjUyQ
```

# iLab2 Survey Comment - Inference Examples

## Checking Available Checkpoints
### PyABSA will check the latest available checkpoints before and load the latest checkpoint from Google Drive. To view available checkpoints, you can use the following code and load the checkpoint by name:

In [1]:
from pyabsa import available_checkpoints

# The results of available_checkpoints() depend on the PyABSA version
checkpoint_map = available_checkpoints()  # show available checkpoints of PyABSA of current version 

This script could only be used to manage NVIDIA GPUs,but no GPU found in your device




Version 1.16.16 of pyabsa is outdated. Version 1.16.18 was released 3 days ago.
[31mcheck release notes at https://github.com/yangheng95/PyABSA/blob/release/release-note.json[0m
[33mThere may be some checkpoints available for early versions of PyABSA, see [0m


  if max_ver == 'N.A.' or StrictVersion(min_ver) <= StrictVersion(__version__) <= StrictVersion(max_ver):


## Perform aspect term detection and then predict sentiment

In [2]:
import pandas as pd
import numpy as np

In [3]:
df_comments = pd.read_csv('Data/Response_Comments_Metrics.csv')


In [4]:
df_comments.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 385708 entries, 0 to 385707
Data columns (total 9 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   RESP_ID         385708 non-null  int64  
 1   RESP_SURVEY_ID  385708 non-null  int64  
 2   RESP_Q_ID       385708 non-null  int64  
 3   RESP_POINTS     66641 non-null   float64
 4   resp_len        385708 non-null  int64  
 5   avg_word_len    385708 non-null  float64
 6   max_word_len    385708 non-null  int64  
 7   word_count      385708 non-null  int64  
 8   RESP_COMMENT    385708 non-null  object 
dtypes: float64(2), int64(6), object(1)
memory usage: 26.5+ MB


In [None]:
from pyabsa.functional import ABSADatasetList
from pyabsa.functional import ATEPCCheckpointManager

df_comments = pd.read_csv('Data/Response_Comments_Metrics.csv')

df_comments.RESP_COMMENT = df_comments.RESP_ID.astype(str) + ' ' + df_comments.RESP_COMMENT
inference_source = list(df_comments.RESP_COMMENT[(df_comments.word_count>1) & (df_comments.word_count<11) & (df_comments.resp_len>df_comments.word_count)])
# inference_source = list(df_comments.RESP_COMMENT[(df_comments.word_count>10) & (df_comments.word_count<21) & (df_comments.resp_len>df_comments.word_count)])
aspect_extractor = ATEPCCheckpointManager.get_aspect_extractor(checkpoint='multilingual-256-2')
atepc_result = aspect_extractor.extract_aspect(
    inference_source=inference_source,
    save_result=True,
    # print_result=True,  # print the result
    pred_sentiment=True,  # Predict the sentiment of extracted aspect terms
)

In [46]:
len(atepc_result)

50867

In [None]:
database_username=input("SQL login:")

In [None]:
import matplotlib.pyplot as plt
import getpass
database_password=getpass.getpass("Password:")

In [56]:
# Import dataframe into MySQL
import sqlalchemy
database_ip       = '192.168.1.142'
database_name     = 'DAI_Survey_EDA'
database_connection = sqlalchemy.create_engine('mssql://{0}:{1}@{2}:1433/{3}?DRIVER=SQL+Server+Native+Client+11.0'.format(database_username, database_password, database_ip, database_name))

  database_connection = sqlalchemy.create_engine('mssql://{0}:{1}@{2}:1433/{3}?DRIVER=SQL+Server+Native+Client+11.0'.format(database_username, database_password, database_ip, database_name))


In [58]:
df_out = pd.DataFrame({'resp_id':[],'aspect':[],'sentiment':[],'confidence':[]})
df_comment = pd.DataFrame({'resp_id':[],'fmt_comment':[]})

for ex_id, result in enumerate(atepc_result):
    resp_id, comment = result['sentence'].split(' ', 1)
    text_printing = comment  # result['sentence']
    for i in range(len(result['aspect'])):
            if result['sentiment'][i] == 'Negative':
                # aspect_info = '<b>{}&nbsp;</b><span style="background-color:Tomato;">{}({})</span>'.format(
                aspect_info = '<b>{}&nbsp;</b><span style="background-color:Tomato;">({})</span>'.format(
                    result['aspect'][i],
                    result['sentiment'][i],
                    # round(result['confidence'][i], 3),
                )
            elif result['sentiment'][i] == 'Neutral':
                aspect_info = '<b>{}&nbsp;</b><span style="background-color:Yellow;">({})</span>'.format(
                    result['aspect'][i],
                    result['sentiment'][i],
                    # round(result['confidence'][i], 3),
                )
            else:
                aspect_info = '<b>{}&nbsp;</b><span style="background-color:#98fb98;">({})</span>'.format(
                    result['aspect'][i],
                    result['sentiment'][i],
                    # round(result['confidence'][i], 3),
                )

            text_printing = text_printing.replace(result['aspect'][i], aspect_info)

            dict_out = {'resp_id': [resp_id], 'aspect': [result['aspect'][i]], 'sentiment':[result['sentiment'][i]], 'confidence':[result['confidence'][i]]}
            # df_out = df_out.append(pd.DataFrame(dict_out))
            # df_out = pd.concat([df_out, pd.DataFrame(dict_out)], axis=0)
            df_out = pd.DataFrame(dict_out)
            df_out.to_sql(con=database_connection, name='df_out', if_exists='append', index=False)
    # df_comment = df_comment.append(pd.DataFrame({'resp_id':[resp_id], 'fmt_comment':[text_printing]}))
    # df_comment = pd.concat([df_comment, pd.DataFrame({'resp_id':[resp_id], 'fmt_comment':[text_printing]})], axis=0)
    df_comment = pd.DataFrame({'resp_id':[resp_id], 'fmt_comment':[text_printing[0:4000]]})
    df_comment.to_sql(con=database_connection, name='fmt_comment', if_exists='append', index=False)
#     if len(df_out) == 500:
#         # write to CSVs
#         df_out.to_csv('output/absa_{}.csv'.format(resp_id),          # Export pandas DataFrame as CSV
#                     header = True
#                     ,index = False)
#         df_comment.to_csv('output/fmt_comment_{}.csv'.format(resp_id),          # Export pandas DataFrame as CSV
#                         header = True
#                         ,index = False)
#         df_out = pd.DataFrame({'resp_id':[],'aspect':[],'sentiment':[],'confidence':[]})
#         df_comment = pd.DataFrame({'resp_id':[],'fmt_comment':[]})
        

# # Write last batch
# if len(df_out) < 500:
#     # write to CSVs
#     df_out.to_csv('output/absa_{}.csv'.format(resp_id),          # Export pandas DataFrame as CSV
#                 header = True
#                 ,index = False)
#     df_comment.to_csv('output/fmt_comment_{}.csv'.format(resp_id),          # Export pandas DataFrame as CSV
#                     header = True
#                     ,index = False)

In [52]:
len(text_printing[0:3999])

3999

In [54]:
len(atepc_result)

50867

In [32]:
df_out

Unnamed: 0,resp_id,aspect,sentiment,confidence
0,22675154,sessions,Positive,0.999886


## Reproduce fmt_comment file with different formatting

In [6]:
import pandas as pd
import numpy as np

df_comments = pd.read_csv('Data/Response_Comments_Metrics.csv')

In [7]:
df_aspects = pd.read_csv('output/absa_dummy_01.csv', delimiter='\t')

In [None]:
database_username=input("SQL login:")

In [None]:
import matplotlib.pyplot as plt
import getpass
database_password=getpass.getpass("Password:")

In [10]:
# Import dataframe into MySQL
import sqlalchemy
database_ip       = '192.168.1.142'
database_name     = 'DAI_Survey_EDA'
database_connection = sqlalchemy.create_engine('mssql://{0}:{1}@{2}:1433/{3}?DRIVER=SQL+Server+Native+Client+11.0'.format(database_username, database_password, database_ip, database_name))

  database_connection = sqlalchemy.create_engine('mssql://{0}:{1}@{2}:1433/{3}?DRIVER=SQL+Server+Native+Client+11.0'.format(database_username, database_password, database_ip, database_name))


In [11]:
df_comments.head()

Unnamed: 0,RESP_ID,RESP_SURVEY_ID,RESP_Q_ID,RESP_POINTS,resp_len,avg_word_len,max_word_len,word_count,RESP_COMMENT
0,13926609,164973,1053,,90,6.0,11,13,Learning new experiences with animation and te...
1,13926622,164973,3131,4.0,102,4.0,10,19,I learned a lot of new techniques and had expe...
2,13926628,164973,3707,3.0,132,5.0,13,22,Goals of immersion and data visualisation were...
3,14109824,187612,1053,,125,4.5,11,22,The interaction between each of my classmate. ...
4,13926629,164973,3708,4.0,46,4.5,9,8,"Came in everyday, very happy with group effort"


## Process df_aspects as JSON

In [13]:
json_aspects = df_aspects.groupby(['RESP_ID']) \
    .apply(lambda x: x[['aspect','sentiment']].to_dict('records')) \
    .reset_index() \
    .rename(columns={0:'asp_sent'}) \
    .to_json(orient='records')

In [14]:
import json

j = json.loads(json_aspects)

In [16]:
for ex_id, result in enumerate(j):
    resp_id = result['RESP_ID']
    # print(resp_id)
    text_printing = df_comments.loc[df_comments.RESP_ID==resp_id][['RESP_COMMENT']].values[0][0]
    
    for asp in enumerate(result['asp_sent']):
        # print(asp[1]['aspect'], asp[1]['sentiment'])
        
        # for asp in df_aspects.itertuples():
        # resp_id = asp.resp_id
        if asp[1]['sentiment'] == 'Negative':
            aspect_info = '<span style="background-color:Tomato;"><b>{}&nbsp;</b></span>'.format(
                asp[1]['aspect']
            )
        elif asp[1]['sentiment'] == 'Neutral':
            aspect_info = '<span style="background-color:Yellow;"><b>{}&nbsp;</b></span>'.format(
                asp[1]['aspect']
            )
        else:
            aspect_info = '<span style="background-color:#98fb98;"><b>{}&nbsp;</b></span>'.format(
                asp[1]['aspect']
            )

        text_printing = text_printing.replace(asp[1]['aspect'], aspect_info)

    out_comments = pd.DataFrame({'resp_id':[resp_id], 'fmt_comment':[text_printing[0:4000]]})
    # print(out_comments)
    out_comments.to_sql(con=database_connection, name='fmt_comment', if_exists='append', index=False)

In [34]:
out_comments.head()

Unnamed: 0,resp_id,fmt_comment
0,14123987,"Ongoing marks and feedback for <span style=""ba..."


In [32]:
df_comments.loc[df_comments.RESP_ID==resp_id][['RESP_COMMENT']].values[0][0]

str

In [None]:
j = json.loads(json_aspects)
for ex_id, result in enumerate(j):
    resp_id = result['resp_id']
    print(resp_id)
    for asp in enumerate(result['asp_sent']):
        print(asp[1]['aspect'], asp[1]['sentiment'])

In [66]:
import json

print(json.dumps(json.loads(json_aspects), indent=2, sort_keys=True))

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [None]:
for ex_id, result in enumerate(json_aspects):
    print(ex_id, result)