In [1]:
from flask import Flask

app = Flask(__name__)

@app.route("/")
def hello():
    return "Hello, World!"

In [2]:
def read_lines_from_txt(filename):
    """
    Reads a file named filename and returns lines of text as list
    Args:
      filename: string of target filepath
    Returns: list of string, one string per line as a list item
    """
    with open(filename, 'r') as f:
        return f.readlines()


def seprate_label_from_sentence(line,line_number):
    """
    seprate line with delimeter "\t


    """
    if (line[:3] != "###") and (line[:2] != "\n"):

        lst = line.split("\t")
        lst[1] = lst[1].replace("\n","")
        return {"target": lst[0],
                "text": lst[1].lower(),
                "line_number":line_number
                }

    else:
        pass


def extract_para(file_line_list):



    """
    will create dictionary object which contains all paragraph as indexed object
    eg. extract_para(train_txt_lines)[0] is a paragraph dict object extracted from txt file
    eg. extract_para(train_txt_lines)[0]["entire_para"][0] is a paragraph's {text,target} object

    return dict:
    {
    total_para:
    0:
    1:
    .
    .
    total_para
    }
    ----------
    index
    {
    entire_para: list of dic object containing target, text
    total_lines:total lines in para

    }


    :param file_line_list: txt file path
    :Note para is seprated by ###
    :return: dictionary conataining total_para, indexed para dic object
    """
    all_para = {"total_para": 0}
    i = 0
    line_number = 0
    para = {"entire_para": [],
            "total_lines": 0}

    for line in file_line_list:
        if line[:3] == '###':
            line_number = 0 # reset line number for new para
            all_para[i] = para
            all_para["total_para"] += 1
            para["total_lines"] = len(para["entire_para"]) - 1 # None line is present in list at the end
            para = {"entire_para": [],
                    "total_lines": 0}
            i += 1
            # print("para: \n")
            # print(para)
            # print(" all para: \n")
            # print(all_para)


        else:
            if line != None :

                txt_n_labels = seprate_label_from_sentence(line,line_number)
                # print(txt_n_labels) # will return label and text dict for line

                line_number += 1
                para["entire_para"].append(txt_n_labels)


    return all_para

# def extract_para(file_line_list):
#   all_para =  {}
#   i = 0
#   total = 0
#   para = {}

#   for line in file_line_list:
#     if line == r'###\d+\n':
#       all_para[i] = para
#       para = {}
#       i++
#     else:
#       tex_n_label = seprate_label_from_sentence(line = line,total_line = j)

#       para["target"]

def make_list_of_all_labeled_sentences(data_dic):

    """
    :param data_dic: data dictionary
    :return: list of all data which contains object
    """
    dev_deta_lines = []
    for index in range(len(data_dic) - 1):

        for line in range(data_dic[index]["total_lines"]):
          obj = data_dic[index]["entire_para"][line]
          obj["total_lines"] = data_dic[index]["total_lines"]
          dev_deta_lines.append(obj)

    return dev_deta_lines

from sklearn.metrics import accuracy_score, precision_recall_fscore_support


  # Calculate model accuracy

def accuracy_of_model(y_true,y_pred):
  """


  Args:
      y_true: true labels in the form of a 1D array
      y_pred: predicted labels in the form of a 1D array
  """
  model_accuracy = accuracy_score(y_true, y_pred) * 100
  # Calculate model precision, recall and f1 score using "weighted average
  model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
  model_results = {"accuracy": model_accuracy,
                  "precision": model_precision,
                  "recall": model_recall,
                  "f1": model_f1}
  return model_results



In [3]:
from tensorflow import keras

In [4]:
trained_model = keras.models.load_model("../saved-models/90_percent_model_2_lakh_para")

In [5]:
random_med_para = 'This RCT examined the efficacy of a manualized social intervention for children with HFASDs. Participants were randomly assigned to treatment or wait-list conditions. Treatment included instruction and therapeutic activities targeting social skills, face-emotion recognition, interest expansion, and interpretation of non-literal language. A response-cost program was applied to reduce problem behaviors and foster skills acquisition. Significant treatment effects were found for five of seven primary outcome measures (parent ratings and direct child measures). Secondary measures based on staff ratings (treatment group only) corroborated gains reported by parents. High levels of parent, child and staff satisfaction were reported, along with high levels of treatment fidelity. Standardized effect size estimates were primarily in the medium and large ranges and favored the treatment group.'


In [6]:
import pandas as pd
import spacy
from spacy.lang.en import English

def para_to_formatted_inp(para = random_med_para):
  """
  function return list of dic obj containe text line number total lines
  """
 
  nlp = English()
  sentencizer = nlp.add_pipe("sentencizer")
  doc = nlp(para)
  abstract_lines = [str(sent) for sent in list(doc.sents)] # return detected sentences from doc in string type (not spaCy token type)
  total_lines = len(abstract_lines)
  temp_list = []

  i = 0
  for line in abstract_lines:
    temp_dict = {}
    temp_dict["text"] = line
    temp_dict["line_number"] = i
    temp_dict["total_lines"] = total_lines
    i = i+1
    temp_list.append(temp_dict)



  return temp_list
lines = para_to_formatted_inp()
pd.DataFrame(lines)

Unnamed: 0,text,line_number,total_lines
0,This RCT examined the efficacy of a manualized...,0,8
1,Participants were randomly assigned to treatme...,1,8
2,Treatment included instruction and therapeutic...,2,8
3,A response-cost program was applied to reduce ...,3,8
4,Significant treatment effects were found for f...,4,8
5,Secondary measures based on staff ratings (tre...,5,8
6,"High levels of parent, child and staff satisfa...",6,8
7,Standardized effect size estimates were primar...,7,8


In [7]:
line_number_list = [line["line_number"] for line in lines]
total_line_list = [line["total_lines"] for line in lines]


In [8]:
import tensorflow as tf

In [9]:
def split_sen_to_char(text):
    return " ".join(list(text))
one_hot_line_number = tf.one_hot(line_number_list,depth = 15)
one_hot_total_lines = tf.one_hot(total_line_list,depth = 20)
line_list = [line["text"] for line in lines]
splitted_lines_in_chars = [split_sen_to_char(line["text"]) for line in lines]

In [10]:
predected = trained_model.predict(x = (one_hot_line_number,
                                       one_hot_total_lines,
                                       tf.constant(line_list),
                                       tf.constant(splitted_lines_in_chars)))



In [11]:
predected = tf.argmax(predected,axis = 1)

In [12]:
classes_ = ['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS']
      
test_abstract_pred_classes = [classes_[i] for i in predected]
test_abstract_pred_classes

['OBJECTIVE',
 'METHODS',
 'METHODS',
 'METHODS',
 'RESULTS',
 'METHODS',
 'RESULTS',
 'RESULTS']

In [13]:
for i, line in enumerate(line_list):
  print(f"{test_abstract_pred_classes[i]}: {line}")

OBJECTIVE: This RCT examined the efficacy of a manualized social intervention for children with HFASDs.
METHODS: Participants were randomly assigned to treatment or wait-list conditions.
METHODS: Treatment included instruction and therapeutic activities targeting social skills, face-emotion recognition, interest expansion, and interpretation of non-literal language.
METHODS: A response-cost program was applied to reduce problem behaviors and foster skills acquisition.
RESULTS: Significant treatment effects were found for five of seven primary outcome measures (parent ratings and direct child measures).
METHODS: Secondary measures based on staff ratings (treatment group only) corroborated gains reported by parents.
RESULTS: High levels of parent, child and staff satisfaction were reported, along with high levels of treatment fidelity.
RESULTS: Standardized effect size estimates were primarily in the medium and large ranges and favored the treatment group.


In [14]:
from tensorflow.python.ops.math_ops import Range
def transform_para(predictions=test_abstract_pred_classes,lines_list = line_list):
  temp_dic = {
      "BACKGROUND":[],
      "OBJECTIVE":[],
      "METHODS":[],
      "RESULTS":[],
      "CONCLUSIONS":[],
  }
  i = 0
  for i in range(len(lines_list )- 1):
    pred_class = predictions[i]
    # print(pred_class)
    temp_dic[pred_class].append(lines_list[i])




  return temp_dic


transform_para()



{'BACKGROUND': [],
 'OBJECTIVE': ['This RCT examined the efficacy of a manualized social intervention for children with HFASDs.'],
 'METHODS': ['Participants were randomly assigned to treatment or wait-list conditions.',
  'Treatment included instruction and therapeutic activities targeting social skills, face-emotion recognition, interest expansion, and interpretation of non-literal language.',
  'A response-cost program was applied to reduce problem behaviors and foster skills acquisition.',
  'Secondary measures based on staff ratings (treatment group only) corroborated gains reported by parents.'],
 'RESULTS': ['Significant treatment effects were found for five of seven primary outcome measures (parent ratings and direct child measures).',
  'High levels of parent, child and staff satisfaction were reported, along with high levels of treatment fidelity.'],
 'CONCLUSIONS': []}