# Import News List

In [None]:
file_name = 'Data/NewsData_0_1000000fix'
last_index = 10000000

from data import NewsList

def news_calibrate(news_list):
    for news in news_list:
        if news.Bias > 300:
            news.Bias = 300
        if news.Bias < -300:
            news.Bias = -300
        news.Bias = news.Bias / 150
    for news in news_list:
        for index, bias in enumerate(news.Sentence_Bias):
            if bias > 300:
                news.Sentence_Bias[index] = 300
            if bias < -300:
                news.Sentence_Bias[index] = -300
            news.Sentence_Bias[index] = bias / 150
    return news_list

news_list = NewsList().importPickle(file_name)
print(str(len(news_list)) + ' News Imported')
news_list = news_list[:last_index]
print(str(len(news_list)) + ' News will be used')
bias = [i.Bias for i in news_list]
print('Maximum Bias : ' + str(max(bias)))
print('Minimum Bias : ' + str(min(bias)))
print('Calibrating Data')
news_list = news_calibrate(news_list)

# Build RNN Data

In [None]:
max_length = 100

from tqdm import tqdm
from numpy import array
from tensorflow.keras.preprocessing.text import Tokenizer

def pad(a, max_len):
    if len(a) == max_len:
        return a
    elif len(a) > max_len:
        return a[0:max_len]
    else:
        a.extend([0 for _ in range(max_len - len(a))])
        return a

rnn_x = []
rnn_y = []
for news in tqdm(news_list):
    rnn_x.extend(news.Content)
    rnn_y.extend(news.Sentence_Bias)

tokenizer = Tokenizer(num_words=100000)
tokenizer.fit_on_texts(rnn_x)
rnn_x_list = tokenizer.texts_to_sequences(rnn_x)
rnn_x_array = array([pad(i, max_length) for i in rnn_x_list])
rnn_x = rnn_x_array
rnn_y = array(rnn_y)

# Build CNN Data

In [None]:
from tqdm import tqdm
from math import ceil, sqrt
from numpy import array

def square(a, side):
    try:
        avg = sum(a) / len(a)
    except ZeroDivisionError:
        avg = 0
    output = [[avg] * side for _ in range(side)]
    for i, bias in enumerate(a):
        output[i // side][i % side] = bias
    return output

print('Count Maximum Sentence')
max_sentence = 0
for i in news_list:
    if max_sentence < len(i.Content):
        max_sentence = len(i.Content)
cnn_side = ceil(sqrt(max_sentence))
print('Maximum Sentences Count : ' + str(max_sentence))

cnn_x = []
cnn_y = []
for news in tqdm(news_list):
    cnn_x.append(square(news.Sentence_Bias, cnn_side))
    cnn_y.append(news.Bias)

cnn_x = array(cnn_x)
cnn_x = cnn_x.reshape((len(cnn_x), cnn_side, cnn_side, 1))
cnn_y = array(cnn_y)

# Run RNN Model

In [None]:
print('Run RNN Model')
rnn_epoch = 1
rnn_batch = 64
rnn_max_features = 100000

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dropout, Dense
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import binary_accuracy

def rms(y_true, y_pred):
    diff = y_true - y_pred
    return tf.sqrt(tf.reduce_mean(tf.square(diff)))

rnn_model:Sequential = Sequential([
    Input(shape=(max_length,)),
    Embedding(rnn_max_features, 100),
    Bidirectional(LSTM(100, return_sequences=False)),
    Dropout(rate=0.2),
    Dense(units=1),
    Dropout(rate=0.2),
])
rnn_model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[binary_accuracy, rms])
rnn_history = rnn_model.fit(rnn_x, rnn_y,
                            batch_size=rnn_batch, epochs=rnn_epoch, validation_split=0.2)


# Run CNN Model

In [None]:
print('Run CNN Model')
cnn_epoch = 10
cnn_batch = 64

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import binary_accuracy

cnn_model:Sequential = Sequential([
    Input((cnn_side, cnn_side, 1)),
    Conv2D(filters=2, kernel_size=(2, 2)),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(rate=0.2),
    Flatten(),
    Dense(units=1),
    Dropout(rate=0.2),
])
cnn_model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[binary_accuracy, rms])
cnn_history = cnn_model.fit(cnn_x, cnn_y,
                            batch_size=cnn_batch, epochs=cnn_epoch, validation_split=0.2)


# Predict

In [None]:
print('Predict')
import numpy as np

PredictX = [814219, 42159, 141318, 48937, 248414]
PredictY = []
for item in PredictX:
    cnn_input = np.expand_dims(np.array(cnn_x[item]), axis=0)
    PredictY.append(cnn_model.predict(cnn_input))


In [None]:
import matplotlib.pyplot as plt
from datetime import datetime
from os import makedirs

fig, ((rnn_loss, cnn_loss), (rnn_acc, cnn_acc)) = plt.subplots(nrows=2, ncols=2, constrained_layout=True)

rnn_loss.plot(rnn_history.history['binary_accuracy'], 'y', label='train bin acc')
rnn_loss.plot(rnn_history.history['val_binary_accuracy'], 'r', label='val bin acc')
rnn_acc.plot(rnn_history.history['rms'], 'b', label='train rms')
rnn_acc.plot(rnn_history.history['val_rms'], 'g', label='val rms')
cnn_loss.plot(cnn_history.history['binary_accuracy'], 'y', label='train bin acc')
cnn_loss.plot(cnn_history.history['val_binary_accuracy'], 'r', label='val bin acc')
cnn_acc.plot(cnn_history.history['rms'], 'b', label='train rms')
cnn_acc.plot(cnn_history.history['val_rms'], 'g', label='val rms')

rnn_loss.set_xlabel('epoch')
rnn_acc.set_xlabel('epoch')
cnn_loss.set_xlabel('epoch')
cnn_acc.set_xlabel('epoch')

rnn_loss.set_ylabel('loss')
rnn_acc.set_ylabel('RMSD')
cnn_loss.set_ylabel('loss')
cnn_acc.set_ylabel('RMSD')

rnn_loss.title.set_text('RNN Loss')
rnn_acc.title.set_text('RNN RMSD')
cnn_loss.title.set_text('CNN Loss')
cnn_acc.title.set_text('CNN RMSD')



n = str(datetime.now())
makedirs('./result/' + n)

Fig.savefig('./result/' + n + '/plot.png', dpi=1000)

rnn_model.save('./result/' + n + '/rnn_model.h5')
cnn_model.save('./result/' + n + '/cnn_model.h5')

history = [rnn_history.history['loss'],
            rnn_history.history['val_loss'],
            rnn_history.history['binary_accuracy'],
            rnn_history.history['val_binary_accuracy'],
            rnn_history.history['rms'],
            rnn_history.history['val_rms'],
            cnn_history.history['loss'],
            cnn_history.history['val_loss'],
            cnn_history.history['binary_accuracy'],
            cnn_history.history['val_binary_accuracy'],
            cnn_history.history['rms'],
            cnn_history.history['val_rms']]

from csv import writer

with open('./result/' + n + '/history.csv', mode='w') as f:
    csv = writer(f)
    for line in history:
        csv.writerow(line)

with open('./result/' + n + '/predict.csv', mode='w') as f:
    csv = writer(f)
    for line in PredictY:
        csv.writerow(line)
    for index in PredictX:
        csv.writerow(news_list[index].Title)
    for index in PredictY:
        csv.writerow(news_list[index].Bias)

basic = """
<html>
<body>

<h1>Machine Learning Result</h1>
<div class="datetime">Date and Time : </div>

<h2>Environment</h2>
<div class="sys_info">System Info : </div>
<div class="py_version">Python Version : </div>
<div class="keras_backend">Keras Backend : </div>

<h2>RNN Configuration</h2>
<div class="rnn_epoch">Epoch : </div>
<div class="rnn_batch">Batch Size : </div>
<div class="rnn_model"></div>

<h2>CNN Configuration</h2>
<div class="cnn_epoch">Epoch : </div>
<div class="cnn_batch">Batch Size : </div>
<div class="cnn_model"></div>

<h2>Plot<h2>
<img src="plot.png" alt="plt" height="400" width="600">

</body>
</html>
"""
soup = BeautifulSoup(basic, 'html.parser')
soup.html.body.find('div', attrs={'class': 'datetime'}).append(str(n))
soup.html.body.find('div', attrs={'class': 'sys_info'}).append(str(platform()))
soup.html.body.find('div', attrs={'class': 'py_version'}).append(str(version_info))

soup.html.body.find('div', attrs={'class': 'rnn_epoch'}).append(str(rnn_epoch))
soup.html.body.find('div', attrs={'class': 'rnn_batch'}).append(str(rnn_batch))
Rnn.summary(print_fn=lambda x: to_html(soup, 'rnn_model', x))

soup.html.body.find('div', attrs={'class': 'cnn_epoch'}).append(str(cnn_epoch))
soup.html.body.find('div', attrs={'class': 'cnn_batch'}).append(str(cnn_batch))
Rnn.summary(print_fn=lambda x: to_html(soup, 'cnn_model', x))

soup.prettify()
with open('./result/' + n + '/result.html', mode='w') as f:
    f.write(str(soup))

def to_html(soup: BeautifulSoup, class_: str, x: str):
    soup.html.body.find('div', attrs={'class': class_}).append(x)
    soup.html.body.find('div', attrs={'class': class_}).append(soup.new_tag('br'))

