#### Import Modules, Reload Packages

In [1]:
import pandas as pd
import pickle
import numpy as np
import warnings
warnings.filterwarnings("ignore")
from ordered_set import OrderedSet

import importlib
import utils.db_funcs as db
import utils.df_funcs
import utils.fmp_funcs
import utils.graph_db_funcs

importlib.reload(db) 
importlib.reload(utils.df_funcs) 
importlib.reload(utils.fmp_funcs) 
importlib.reload(utils.graph_db_funcs)

from utils import db_funcs as db
from utils.df_funcs import *
from utils.fmp_funcs import *
from utils.graph_db_funcs import *

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

neo4j+s://c4ccd126.databases.neo4j.io
neo4j+s://c4ccd126.databases.neo4j.io


#### Read Pickle File & Write STOCK INFO to Graph DB

In [2]:
pd.set_option('display.max_colwidth', None)

with open('../files/stock_info_final.pkl', 'rb') as file:
    stock_info_final = pickle.load(file)

stock_info_final[stock_info_final['symbol'] =='688613.SS']

stock_info_final = stock_info_final.head(10000)

create_indexes()
BATCH_SIZE = 500  # Adjust based on performance
load_data_in_batches(stock_info_final, BATCH_SIZE)

2025-10-28 10:14:35 [INFO]: Index created: CREATE INDEX IF NOT EXISTS FOR (s:Stock) ON (s.symbol);
2025-10-28 10:14:36 [INFO]: Index created: CREATE INDEX IF NOT EXISTS FOR (c:Country) ON (c.name);
2025-10-28 10:14:36 [INFO]: Index created: CREATE INDEX IF NOT EXISTS FOR (s:Sector) ON (s.name);
2025-10-28 10:14:36 [INFO]: Index created: CREATE INDEX IF NOT EXISTS FOR (i:Industry) ON (i.name);
2025-10-28 10:14:36 [INFO]: Index created: CREATE INDEX IF NOT EXISTS FOR (a:Attribute) ON (a.type, a.value);
2025-10-28 10:14:36 [INFO]: Index created: CREATE INDEX IF NOT EXISTS FOR (d:Description) ON (d.text);
2025-10-28 10:14:36 [INFO]: Index created: CREATE INDEX IF NOT EXISTS FOR (d:Description_Chunk) ON (d.text);
2025-10-28 10:14:36 [INFO]: All indexes created successfully.
2025-10-28 10:14:36 [INFO]: Loading 10000 rows into Neo4j in 20 batches...
2025-10-28 10:14:39 [INFO]: Batch 1/20 loaded.
2025-10-28 10:14:40 [INFO]: Batch 2/20 loaded.
2025-10-28 10:14:42 [INFO]: Batch 3/20 loaded.
2025

#### Embedd the Description info in the Description Node

In [None]:

import os
#from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings


neo4j_vector_index = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(),
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),
    index_name="description_embeddings",
    node_label="Description",
    text_node_properties=[
        "symbol",
        "name",
        "text",
        ],
    embedding_node_property="embedding",
)

with driver.session() as session:
    result = session.run("MATCH (n:Description) WHERE n.text IS NOT NULL RETURN count(n) AS node_count")
    result2 = session.run("MATCH (n:Description) RETURN count(n) AS node_count")
    for record in result:
        print(record)
    for record in result2:
        print(record)

#### Write Stock Data to PDF

In [None]:

with open('../files/stock_info_final.pkl', 'rb') as file:
    stock_info_final = pickle.load(file)
    

pdf_prep = stock_info_final[['symbol', 'name', 'country', 'country_full_name', 'currency', 'stock_exchange',
       'exchange_short_name',  'sector', 'industry', 'beta', 'data_points', 
       'return_coeff', 'volatility_coeff', 'market_cap_euro',      'avg_trade_vol_euro',
       'return',       'volatility',       'market_capitalization', 'average_trading_volume']]

from fpdf import FPDF
df = pdf_prep.copy()

import pandas as pd
from fpdf import FPDF


import pandas as pd
from fpdf import FPDF


def dataframe_to_pdf(df, filename):
    pdf = FPDF(orientation="L", format=(1000, 1000)) 
    pdf.add_page()
    pdf.set_font("Arial", size=10)  # Font size 10 for better readability

    # Calculate column widths based on content length
    col_widths = []
    for col in df.columns:
        max_content_width = max(
            len(str(col)),  # Length of the column name
            df[col].astype(str).str.len().max()  # Max length of content in the column
        )
        col_widths.append(max_content_width * 2)  # Adjust multiplier for padding

    row_height = pdf.font_size * 1.5  # Row height for table cells

    # Add headers
    for i, header in enumerate(df.columns):
        pdf.cell(col_widths[i], row_height, header, border=1, align='C')
    pdf.ln(row_height)

    # Add rows
    for row in df.itertuples(index=False):
        for i, value in enumerate(row):
            pdf.cell(col_widths[i], row_height, str(value), border=1, align='C')
        pdf.ln(row_height)

    # Save PDF
    pdf.output(filename)
    print(f"PDF saved to {filename}")

# Convert DataFrame to PDF
dataframe_to_pdf(df, "stock_data.pdf")

