In [None]:
%run paths.ipynb
%run preprocess.ipynb
%run rouge.ipynb

**PARSE RAW DATA TO TEXT FILES**

In [None]:
p = parseNeuralSum()

In [None]:
p.parse_folder_1(input_folder=CONFIG.RAW_CNN_TRAIN_FOLDER, target_document_folder=CONFIG.CNN_TRAIN_DOCUMENTS_FOLDER, 
                 target_output_folder=CONFIG.CNN_TRAIN_OUTPUT_FOLDER, target_summary_folder=CONFIG.CNN_TRAIN_SUMMARY_FOLDER)
p.parse_folder_1(input_folder=CONFIG.RAW_CNN_TEST_FOLDER, target_document_folder=CONFIG.CNN_TEST_DOCUMENTS_FOLDER, 
                 target_output_folder=CONFIG.CNN_TEST_OUTPUT_FOLDER, target_summary_folder=CONFIG.CNN_TEST_SUMMARY_FOLDER)
p.parse_folder_1(input_folder=CONFIG.RAW_CNN_VALIDATION_FOLDER, target_document_folder=CONFIG.CNN_VALIDATION_DOCUMENTS_FOLDER, 
                 target_output_folder=CONFIG.CNN_VALIDATION_OUTPUT_FOLDER, target_summary_folder=CONFIG.CNN_VALIDATION_SUMMARY_FOLDER)

**CONVERT TEXT DATA TO WORD EMBEDDING IDS**

In [None]:
g = NeuralSumGlove(glove_filename=CONFIG.GLOVE_FILENAME)

In [None]:
g.convert_to_indices_(document_folder=CONFIG.CNN_TRAIN_DOCUMENTS_FOLDER, output_folder=CONFIG.CNN_TRAIN_OUTPUT_FOLDER, 
                     dump_file=CONFIG.CNN_TRAIN_DUMP)
g.convert_to_indices_(document_folder=CONFIG.CNN_TEST_DOCUMENTS_FOLDER, output_folder=CONFIG.CNN_TEST_OUTPUT_FOLDER, 
                     dump_file=CONFIG.CNN_TEST_DUMP)
g.convert_to_indices_(document_folder=CONFIG.CNN_VALIDATION_DOCUMENTS_FOLDER, output_folder=CONFIG.CNN_VALIDATION_OUTPUT_FOLDER, 
                     dump_file=CONFIG.CNN_VALIDATION_DUMP)

In [None]:
g.dump_all(CONFIG.GLOVE_CACHE_FILENAME)

**PAD EMBEDDING IDS DATA**

In [None]:
d = NeuralSumDataHandler()

In [None]:
d.load_dump(CONFIG.CNN_TRAIN_DUMP)
d.pad(output=True)
d.padded_dump(CONFIG.CNN_PADDED_TRAIN_DUMP)
d.load_dump(CONFIG.CNN_TEST_DUMP)
d.pad(output=True)
d.padded_dump(CONFIG.CNN_PADDED_TEST_DUMP)

**COMPUTE ROUGE SCORES FOR REINFORCE TRAINING**

In [None]:
%%time
r = RougeNeuralSum()
r.computeRefreshScores(CONFIG.CNN_TRAIN_DOCUMENTS_FOLDER, CONFIG.CNN_TRAIN_SUMMARY_FOLDER)
r.dump(CONFIG.CNN_TRAIN_ROUGE_DUMP)

**RUN FROM HERE FOR TRAINING**

In [42]:
%run paths.ipynb
%run preprocess.ipynb
%run model.ipynb
%run utils.ipynb
%run rouge.ipynb

In [75]:
d = NeuralSumDataHandler()
d.load_padded_dump(CONFIG.CNN_PADDED_TRAIN_DUMP)
d.make_batches(20)

g = NeuralSumGlove(dump_filename=CONFIG.GLOVE_CACHE_FILENAME)

r = RougeNeuralSum()
r.load(CONFIG.CNN_TRAIN_ROUGE_DUMP)

torch.cuda.set_device(0)

In [44]:
m = EncoderDecoder(g.vectors, word_emb_size=g.dim, sen_emb_size=150, doc_emb_size=200, sen_len=50, batch_size=20, output_dim=1)
m.cuda()

EncoderDecoder(
  (sentenceEncoder): LSTMSentenceEncoderParallel(
    (embeddings): Embedding(400001, 100, padding_idx=400000)
    (sentenceEncoder): LSTM(100, 150, batch_first=True)
  )
  (documentEncoder): LSTM(150, 200, batch_first=True)
  (documentDecoder): LSTM(150, 200, batch_first=True)
  (classifier): Linear(in_features=200, out_features=1, bias=True)
)

In [None]:
train_reinforce(m, batches=d.batches, lines=d.lines, scores=r.summary_scores, iterations=1)

tensor([0.8418], device='cuda:0') 3420

In [74]:
torch.save(m.state_dict(), '../parameters/best_params')

**RUN FROM HERE FOR TESTING**

In [None]:
%run paths.ipynb
%run preprocess.ipynb
%run model.ipynb
%run utils.ipynb
%run rouge.ipynb

In [None]:
d = NeuralSumDataHandler()
d.load_padded_dump(CONFIG.CNN_PADDED_TEST_DUMP)
d.make_batches(20)
g = NeuralSumGlove(dump_filename=CONFIG.GLOVE_CACHE_FILENAME)

In [None]:
#m = EncoderDecoder(g.vectors, word_emb_size=g.dim, sen_emb_size=150, doc_emb_size=200, sen_len=50, batch_size=20, output_dim=1)
#m.load_state_dict(torch.load('../parameters/parameters'))
#m.cuda()

In [None]:
compute_accuracies(m, d.lines, d.output, d.batches)

In [None]:
get_summaries(m, d.lines, d.batches, doc_folder=CONFIG.CNN_TEST_DOCUMENTS_FOLDER, write_folder=CONFIG.PREDICTED_SUMMARY_FOLDER)
output = computeRouge(CONFIG.PREDICTED_SUMMARY_FOLDER, CONFIG.CNN_TEST_SUMMARY_FOLDER)

In [None]:
output[1]

In [72]:
output[1]

{'rouge_1_f_score': 0.28506,
 'rouge_1_f_score_cb': 0.27835,
 'rouge_1_f_score_ce': 0.29201,
 'rouge_1_precision': 0.21359,
 'rouge_1_precision_cb': 0.20829,
 'rouge_1_precision_ce': 0.2192,
 'rouge_1_recall': 0.46002,
 'rouge_1_recall_cb': 0.44929,
 'rouge_1_recall_ce': 0.47023,
 'rouge_2_f_score': 0.0953,
 'rouge_2_f_score_cb': 0.08971,
 'rouge_2_f_score_ce': 0.10115,
 'rouge_2_precision': 0.07068,
 'rouge_2_precision_cb': 0.06646,
 'rouge_2_precision_ce': 0.07517,
 'rouge_2_recall': 0.15691,
 'rouge_2_recall_cb': 0.14778,
 'rouge_2_recall_ce': 0.16641,
 'rouge_3_f_score': 0.05155,
 'rouge_3_f_score_cb': 0.04718,
 'rouge_3_f_score_ce': 0.05655,
 'rouge_3_precision': 0.0381,
 'rouge_3_precision_cb': 0.03479,
 'rouge_3_precision_ce': 0.0419,
 'rouge_3_recall': 0.08567,
 'rouge_3_recall_cb': 0.07843,
 'rouge_3_recall_ce': 0.09363,
 'rouge_4_f_score': 0.03318,
 'rouge_4_f_score_cb': 0.02969,
 'rouge_4_f_score_ce': 0.03714,
 'rouge_4_precision': 0.02451,
 'rouge_4_precision_cb': 0.0219,
 

In [53]:
get_file(1001, CONFIG.PREDICTED_SUMMARY_FOLDER)

@entity1 , @entity2 amid growing scrutiny over whether a 73 - year - old volunteer deputy who killed a suspect during a sting operation was qualified to be policing the streets , a new report raises a troubling allegation
@entity15 claims he meant to use his taser but accidentally fired his handgun at @entity23 instead
claims that the volunteer deputy 's records had been falsified emerged " almost immediately " from multiple sources after @entity15 killed @entity23 on april 2 , reporter @entity18 said



In [54]:
get_file(1001, CONFIG.CNN_TEST_SUMMARY_FOLDER)

@entity153 in @entity154 says @entity15 never trained with them
" he met every requirement , and all he did was give of himself , " his attorney says
@entity11 newspaper : three supervisors who refused to sign forged records on @entity15 were reassigned

