In [None]:
# connect to google colab
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
# base path
DATA_PATH = './drive/MyDrive/fyp-code/codes/data/ecpe/'
DEST_PATH = './drive/MyDrive/fyp-code/codes/data/subtasks/'

In [None]:
# usual import
import pandas as pd
import numpy as np
from tqdm import tqdm
import requests
import os
import re

# to call the file that contains the api keys
base_folder = '/content/drive/MyDrive/fyp-code/codes'
training_path = os.path.join(base_folder, "deepai_api_key.zip") 
!unzip $training_path

from deepai_api_key import API_KEY

Archive:  /content/drive/MyDrive/fyp-code/codes/deepai_api_key.zip
 extracting: deepai_api_key.py       


## Import Short Text

In [None]:
short_data = pd.read_csv(DATA_PATH+'ecpe_cleaned_short_data.csv')[['text_cleaned_ecpe']]
short_data.head(5)

Unnamed: 0,text_cleaned_ecpe
0,I get to spend New Year is home again alone an...
1,"Depressed and lonely /: Stuck in a deep, never..."
2,Learning to pretend to have a good time had be...
3,So far he stop texting meafter I said somethin...
4,*sigh* ?? I have not cried so muchI am in so m...


## Import Long Text

In [None]:
long_data = pd.read_csv(DATA_PATH+'ecpe_cleaned_long_data.csv')[['text_cleaned_ecpe']]
long_data.head(5)

Unnamed: 0,text_cleaned_ecpe
0,Just another night. Another night of feeling l...
1,Is it possible to fake depression? I have been...
2,Imagine being attractive Imagine what it would...
3,"Best moment to have anxiety It is 3:30am, I am..."
4,"hi, I am a 21 year-old male from the uk, over ..."


## Do a test on the API call first

In [None]:
r = requests.post(
    "https://api.deepai.org/api/summarization",
    data={
        'text': 'hello',
    },
    headers={'api-key': API_KEY}
)
print(r.json())
text = r.json()['output']
text = text if text != '' else '-'
text

{'id': 'a46a152f-34ab-48e8-949f-40269e123ef1', 'output': ''}


'-'

In [None]:
# one more example
r = requests.post(
    "https://api.deepai.org/api/summarization",
    data={
        'text': long_data.text_cleaned_ecpe[1],
    },
    headers={'api-key': API_KEY}
)
print(r.json())
text = r.json()['output']
text = text if text != '' else '-'
text

{'id': '26b7ec1a-0686-412f-8dc2-e77bcc5dd1f5', 'output': 'Is it possible to fake depression?\nI am just trying to pity myself giving myself depression, but I do not have it.'}


'Is it possible to fake depression?\nI am just trying to pity myself giving myself depression, but I do not have it.'

## Helper function to summarize text

In [None]:
def get_summarized_text(text):
    r = requests.post(
        "https://api.deepai.org/api/summarization",
        data={
            'text': text,
        },
        headers={'api-key': API_KEY}
    )
    text = r.json()['output']
    text = text if text != '' else '-'
    return text

## Apply the DeepAI text summarization API into the text data we have

In [None]:
# try on one piece of data
summary = get_summarized_text(long_data.text_cleaned_ecpe[10])
summary

'it seems like the thing to do.\nI am still going to school, though Hi. I am new to the forums.\nit seems like the thing to do.\nI started having anxiety attacks about a year ago and they have increased in frequency the last few months.\nI have a hard time even driving anymore (especially on the freeway) and increasingly rely on my girlfriend for menial things like going to the store.\nIt seems like quite a few of you have been dealing with these sorts of things for much longer and I am sorry if my posting here so soon seems insensitive, but its already getting to the point where I know I cannot handle living like this for years on end.'

### Short Data

In [None]:
# create a list to store the summaries for the short text
short_data_summaries = []

# get the predicted summary for the whole short text dataset
for sentence in tqdm(short_data.text_cleaned_ecpe.tolist()):
    summary = get_summarized_text(sentence)
    short_data_summaries.append(summary)

100%|██████████| 843/843 [09:42<00:00,  1.45it/s]


In [None]:
# create a dataframe to store the text summaries
summary_short_df = pd.DataFrame()
summary_short_df['text_cleaned'] = short_data.text_cleaned_ecpe
summary_short_df['text_summarized'] = short_data_summaries
summary_short_df.head()

Unnamed: 0,text_cleaned,text_summarized
0,I get to spend New Year is home again alone an...,-
1,"Depressed and lonely /: Stuck in a deep, never...",-
2,Learning to pretend to have a good time had be...,-
3,So far he stop texting meafter I said somethin...,-
4,*sigh* ?? I have not cried so muchI am in so m...,-


## Long Data

In [None]:
# create a list to store the summaries for the short text
long_data_summaries = []

# get the predicted summary for the whole short text dataset
for sentence in tqdm(long_data.text_cleaned_ecpe.tolist()):
    summary = get_summarized_text(sentence)
    long_data_summaries.append(summary)

100%|██████████| 1437/1437 [16:31<00:00,  1.45it/s]


In [None]:
# create a dataframe to store the text summaries
summary_long_df = pd.DataFrame()
summary_long_df['text_cleaned'] = long_data.text_cleaned_ecpe
summary_long_df['text_summarized'] = long_data_summaries
summary_long_df.head()

Unnamed: 0,text_cleaned,text_summarized
0,Just another night. Another night of feeling l...,You would give your life for this person and t...
1,Is it possible to fake depression? I have been...,Is it possible to fake depression?\nI am just ...
2,Imagine being attractive Imagine what it would...,-
3,"Best moment to have anxiety It is 3:30am, I am...",The middle of the night is the best moment for...
4,"hi, I am a 21 year-old male from the uk, over ...","hi, I am a 21 year-old male from the uk, over ..."


## Save the dataframe into csv

In [None]:
summary_short_df.to_csv(DEST_PATH+'subtasks_text_summarization_extractive_short_data.csv', index=False)
summary_long_df.to_csv(DEST_PATH+'subtasks_text_summarization_extractive_long_data.csv', index=False)