The goal of this notebook is to convert the results found in the previous notebooks to docx format for the FYP report

In [1]:
# Mount the drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Navigate to the current directory
from google.colab import userdata
import os
os.chdir(userdata.get('CURRENT_DIR'))

Mounted at /content/drive


In [2]:
!pip install python-docx

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.1.2


In [3]:
import pandas as pd

In [4]:
# Create a function that takes in a DataFrame and writes the table to .docx
from docx import Document
from docx.shared import Pt
from docx.oxml.ns import qn
from docx.oxml import OxmlElement

def write_to_docx(df, filename):
    doc = Document()

    # Add table with header row
    table = doc.add_table(rows=1, cols=len(df.columns))
    table.style = 'Table Grid'  # Adds visible borders to the table

    # Populate header row
    hdr_cells = table.rows[0].cells
    for i, column_name in enumerate(df.columns):
        cell = hdr_cells[i]
        cell.text = column_name
        for paragraph in cell.paragraphs:
            for run in paragraph.runs:
                run.bold = True  # Bold header text

    # Populate data rows
    for index, row in df.iterrows():
        row_cells = table.add_row().cells
        for i, val in enumerate(row):
            row_cells[i].text = str(val)

    # Save the document
    doc.save(filename)


# 1. Sentiment Eval Results

In [None]:
# Read the original dataframe
overall_metrics = pd.read_csv('sentiment_eval_metrics/overall_metrics.csv')
class_metrics = pd.read_csv('sentiment_eval_metrics/class_metrics.csv')

In [None]:
# Round the numerical results to 4 decimals
overall_metrics = overall_metrics.round(4)
class_metrics = class_metrics.round(4)

In [None]:
# Divide class_metrics into dfs based on the class examined
neg_metrics = class_metrics.loc[:, ["model", "class_0_precision", "class_0_recall", "class_0_f1", "class_0_support"]]
neu_metrics = class_metrics.loc[:, ["model", "class_1_precision", "class_1_recall", "class_1_f1", "class_1_support"]]
pos_metrics = class_metrics.loc[:, ["model", "class_2_precision", "class_2_recall", "class_2_f1", "class_2_support"]]

In [None]:
# Convert and save as docx
write_to_docx(overall_metrics, 'sentiment_eval_metrics/overall_metrics.docx')
write_to_docx(neg_metrics, 'sentiment_eval_metrics/neg_metrics.docx')
write_to_docx(neu_metrics, 'sentiment_eval_metrics/neu_metrics.docx')
write_to_docx(pos_metrics, 'sentiment_eval_metrics/pos_metrics.docx')

# 2. Stock Eval Results

## 2.1. R2 of models

In [5]:
# Read the original dataframe
linear_r2 = pd.read_csv('stock_eval_metrics/linear_r2.csv')
lstm_r2 = pd.read_csv('stock_eval_metrics/lstm_r2.csv')

In [6]:
# Round the numerical results to 4 decimals
linear_r2 = linear_r2.round(4)
lstm_r2 = lstm_r2.round(4)

In [7]:
# Convert and save as docx
write_to_docx(linear_r2, 'stock_eval_metrics/linear_r2.docx')
write_to_docx(lstm_r2, 'stock_eval_metrics/lstm_r2.docx')

## 2.2. Demo: Linear Coefs & LSTM SHAP Values

In [48]:
# Read the original dataframes
linear_coefs = pd.read_csv('stock_eval_metrics/linear_coefs.csv')
lstm_shap = pd.read_csv('stock_eval_metrics/shap_values.csv')

In [49]:
# Round the numerical results to 4 decimals
linear_coefs = linear_coefs.round(4)
lstm_shap = lstm_shap.round(4)

In [50]:
# Select the rows of only 1 type of input for linear_coefs
linear_coefs = linear_coefs[linear_coefs['method'] == "market_features+ground_truth"]

In [51]:
# Remove method from the columns
linear_coefs = linear_coefs.drop(columns=['method'])

# Transpose the df with target as index
linear_coefs = linear_coefs.set_index('target').T

In [52]:
linear_coefs.head()

target,Return,Volatility,Log_Volume
Return_coef,0.045,0.0198,0.124
Volatility_coef,-0.087,-0.0418,-0.21
Log_Volume_coef,0.0271,-0.0121,-0.007
Sentiment_score_coef,-0.0647,0.0228,0.025
Sentiment_volatility_coef,0.0314,0.171,0.0342


In [54]:
# Make the current index a column again
linear_coefs.reset_index(inplace=True)

In [55]:
linear_coefs.head()

target,index,Return,Volatility,Log_Volume
0,Return_coef,0.045,0.0198,0.124
1,Volatility_coef,-0.087,-0.0418,-0.21
2,Log_Volume_coef,0.0271,-0.0121,-0.007
3,Sentiment_score_coef,-0.0647,0.0228,0.025
4,Sentiment_volatility_coef,0.0314,0.171,0.0342


In [56]:
# Rename "index" to "Input_Coef/Target"
linear_coefs.rename(columns={'index': 'Input_Coef/Target'}, inplace=True)

In [57]:
linear_coefs

target,Input_Coef/Target,Return,Volatility,Log_Volume
0,Return_coef,0.045,0.0198,0.124
1,Volatility_coef,-0.087,-0.0418,-0.21
2,Log_Volume_coef,0.0271,-0.0121,-0.007
3,Sentiment_score_coef,-0.0647,0.0228,0.025
4,Sentiment_volatility_coef,0.0314,0.171,0.0342
5,residual,-0.118,0.114,0.0644


In [58]:
# Convert and save as docx
write_to_docx(linear_coefs, 'stock_eval_metrics/linear_coefs.docx')
write_to_docx(lstm_shap, 'stock_eval_metrics/lstm_shap.docx')