In [2]:
import pandas as pd
import numpy as np
import networkx as nx
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load the data from the CSV file
data = pd.read_csv('/home/kunal/Desktop/reasearch/data/fullDataset.csv')

# Create a belief network using the data
belief_network = BayesianModel()
belief_network.add_nodes_from(data.columns)
belief_network.fit(data, estimator=MaximumLikelihoodEstimator)

# Prepare the data for the Random Forest classifier
le = LabelEncoder()
encoded_data = data.apply(le.fit_transform)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(encoded_data.drop('target', axis=1), encoded_data['target'], test_size=0.2, random_state=42)

# Train the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Identify closely related entities
feature_importance = rf_classifier.feature_importances_
closely_related = np.argsort(feature_importance)[-2:]  # Get indices of the two most important features
closely_related_labels = encoded_data.columns[closely_related]

# Plot the graph and output the closely related entities
G = nx.Graph()
G.add_nodes_from(data.columns)

for feature in closely_related_labels:
    G.add_edge('target', feature)

pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, font_weight='bold', node_color='lightblue', node_size=3000, font_size=10)
plt.title("Belief Network with Closely Related Entities")
plt.show()

print("Closely related entities: ", closely_related_labels.tolist())




KeyError: "['target'] not found in axis"

In [1]:
import pandas as pd
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from collections import deque

# Read the graph data from CSV file
df = pd.read_csv('graph_data.csv')

# Create a NetworkX graph object
graph = nx.Graph()
for i, row in df.iterrows():
    graph.add_edge(row['node1'], row['node2'])

# Define the structure of the belief neural network
num_hidden_layers = 3
num_hidden_units = 64
num_classes = len(set(df['class_label']))
num_features = 5
model = nn.Sequential(
    nn.Linear(num_features, num_hidden_units),
    nn.ReLU(),
    *[nn.Sequential(nn.Linear(num_hidden_units, num_hidden_units), nn.ReLU()) for _ in range(num_hidden_layers)],
    nn.Linear(num_hidden_units, num_classes)
)

# Define the graph traversal algorithm
def bfs_traversal(graph, start_node):
    queue = deque([start_node])
    visited = set()
    while queue:
        node = queue.popleft()
        if node not in visited:
            visited.add(node)
            yield node
            queue.extend(graph.neighbors(node))

# Generate feature vectors for each entity
def extract_features(graph, node):
    features = []
    neighbors = list(graph.neighbors(node))
    num_neighbors = len(neighbors)
    features.append(num_neighbors)
    features.append(sum([graph.degree(n) for n in neighbors])/num_neighbors)
    features.append(np.var([graph.degree(n) for n in neighbors]))
    features.append(graph.degree(node))
    features.append(nx.clustering(graph, node))
    return features

feature_vectors = []
class_labels = []
for node in graph.nodes:
    features = extract_features(graph, node)
    feature_vectors.append(features)
    class_labels.append(df.loc[df['node'] == node, 'class_label'].iloc[0])

# Train a random forest classifier
clf = RandomForestClassifier(n_estimators=100)
clf.fit(feature_vectors, class_labels)

# Predict the class labels of all entities
predicted_labels = []
for node in graph.nodes:
    features = extract_features(graph, node)
    predicted_label = clf.predict([features])[0]
    predicted_labels.append(predicted_label)

# Plot the graph with closely related entities highlighted
pos = nx.spring_layout(graph)
node_colors = []
for node in graph.nodes:
    if predicted_labels[node] == clf.predict_proba([features])[0].argmax():
        node_colors.append('green')
    else:
        node_colors.append('red')
nx.draw_networkx(graph, pos, node_color=node_colors, with_labels=True)
plt.show()


KeyError: "None of [Index(['source', 'target'], dtype='object')] are in the [columns]"