<a href="https://colab.research.google.com/github/nonyeezeh/Research-Project-Code/blob/main/code_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [2]:
import numpy as np
import pandas as pd
import networkx as nx
import plotly.graph_objects as go

# Research Question

##### In this research, how does the predictive accuracy of a neural network compare to that of a Bayesian network in predicting stock prices, particularly when trained on varying sample sizes of data generated by a Bayesian network?

# Expectations

1. With larger training samples, the neural network's performance is expected to improve due to having sufficient data for effective learning, while the Bayesian network may outperform the neural network on smaller samples.
2. The Bayesian network is anticipated to show more consistent performance across different sample sizes due to its probabilistic nature and reliance on prior knowledge.
3. The neural network might require more computational resources and time to train, especially with increasing sample sizes, compared to the Bayesian network.

# Data: 3 Nodes, 500 Samples

## Bayesian Network Data Generation

In [36]:
# Define the number of samples
num_samples = 500

# Define the possible values for each variable
values = {
    'IR': ['low', 'medium', 'high'],
    'EI': ['poor', 'average', 'good'],
    'SP': ['decrease', 'stable', 'increase']
}

# Functions to sample each variable with probabilities
def sample_IR():
    probabilities = np.random.dirichlet(np.ones(len(values['IR'])))
    rounded_probs = [round(p, 2) for p in probabilities]
    chosen_value = np.random.choice(values['IR'], p=probabilities)
    return chosen_value, rounded_probs

def sample_EI(ir=None):
    probabilities = np.random.dirichlet(np.ones(len(values['EI'])))
    rounded_probs = [round(p, 2) for p in probabilities]
    chosen_value = np.random.choice(values['EI'], p=probabilities)
    return chosen_value, rounded_probs

def sample_SP(ir=None, ei=None):
    probabilities = np.random.dirichlet(np.ones(len(values['SP'])))
    rounded_probs = [round(p, 2) for p in probabilities]
    chosen_value = np.random.choice(values['SP'], p=probabilities)
    return chosen_value, rounded_probs

# Randomly determine the structure
edges = []
if np.random.rand() > 0.5:
    edges.append(('IR', 'EI'))
if np.random.rand() > 0.5:
    edges.append(('EI', 'IR'))
if np.random.rand() > 0.5:
    edges.append(('IR', 'SP'))
if np.random.rand() > 0.5:
    edges.append(('EI', 'SP'))

# Ensure there's at least one edge to SP (either from IR or EI)
if not any(edge[1] == 'SP' for edge in edges):
    edges.append(np.random.choice([('IR', 'SP'), ('EI', 'SP')]))

# Generate the data and capture probabilities
data = []
probabilities_data = []

for _ in range(num_samples):
    ir, ir_probs = sample_IR()
    ei, ei_probs = sample_EI(ir)
    sp, sp_probs = sample_SP(ir, ei)

    data.append([ir, ei, sp])
    probabilities_data.append([
        ','.join(map(str, ir_probs)),
        ir,
        ','.join(map(str, ei_probs)),
        ei,
        ','.join(map(str, sp_probs)),
        sp
    ])

# Convert to DataFrame for the main data
df = pd.DataFrame(data, columns=['IR', 'EI', 'SP'])

# Save the main data to a CSV file
df.to_csv('bn_data_structure.csv', index=False)

# Convert to DataFrame for probabilities and chosen values
probabilities_df = pd.DataFrame(probabilities_data, columns=[
    'IR_Probabilities', 'Chosen_IR',
    'EI_Probabilities', 'Chosen_EI',
    'SP_Probabilities', 'Chosen_SP'
])

# Save the probabilities and chosen values to a CSV file
probabilities_df.to_csv('bn_probabilities.csv', index=False)

# Display the first 5 rows of each DataFrame
print("Generated data:")
print(df.head())

print("\nProbabilities and chosen values:")
print(probabilities_df.head())

print("\nMain data and probabilities saved successfully.")

Generated data:
       IR       EI        SP
0    high  average  increase
1  medium     good  increase
2  medium     poor  increase
3    high  average  increase
4    high     poor    stable

Probabilities and chosen values:
  IR_Probabilities Chosen_IR EI_Probabilities Chosen_EI SP_Probabilities  \
0   0.27,0.31,0.42      high   0.34,0.55,0.11   average    0.13,0.38,0.5   
1    0.16,0.45,0.4    medium   0.27,0.61,0.12      good    0.32,0.38,0.3   
2   0.02,0.33,0.65    medium   0.42,0.42,0.16      poor    0.1,0.31,0.59   
3   0.33,0.09,0.59      high   0.32,0.54,0.13   average    0.33,0.0,0.66   
4   0.56,0.29,0.15      high    0.15,0.35,0.5      poor    0.12,0.68,0.2   

  Chosen_SP  
0  increase  
1  increase  
2  increase  
3  increase  
4    stable  

Main data and probabilities saved successfully.


In [37]:
# Visualize the Bayesian Network structure using Plotly
G = nx.DiGraph()

# Add nodes and edges
G.add_edges_from(edges)

# Extract node positions for Plotly
pos = nx.spring_layout(G)
edge_x = []
edge_y = []
arrow_x = []
arrow_y = []

for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

    # Move arrows closer to the target node (x1, y1)
    arrow_x.append(0.85 * x1 + 0.15 * x0)
    arrow_y.append(0.85 * y1 + 0.15 * y0)


edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=2, color='gray'),
    hoverinfo='none',
    mode='lines')

node_x = []
node_y = []
node_text = []
node_color = []

for node in G.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)
    node_text.append(node)

    # Highlight the SP node with a different color
    if node == 'SP':
        node_color.append('pink')
    else:
        node_color.append('purple')

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',
    text=node_text,
    textposition="top center",
    hoverinfo='text',
    marker=dict(size=50, color=node_color, line=dict(width=2)))

# Adding the arrow heads, placing them correctly outside the nodes
arrow_trace = go.Scatter(
    x=arrow_x, y=arrow_y,
    mode='markers',
    marker=dict(size=10, color='black', symbol='triangle-up'),
    hoverinfo='none'
)

fig = go.Figure(data=[edge_trace, node_trace, arrow_trace],
             layout=go.Layout(
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20, l=20, r=20, t=50),  # Adjusted margins to fit the title
                xaxis=dict(showgrid=False, zeroline=False),
                yaxis=dict(showgrid=False, zeroline=False),
                plot_bgcolor='aliceblue')
                )

# Update layout to include a proper title
fig.update_layout(title_text="Bayesian Network Structure", title_x=0.5)

fig.show()