In [18]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import os
import pandas as pd



# Directory containing the CSV files
directory = 'C:/Users/Public/Documents/UNCC/2024 SPRING/ITCS 6124 - Illustrative Visualization/Assignments/Project_Communication/IV-Communication-Project/policy'

# List all files in the directory
files = os.listdir(directory)

# Filter out only the files ending with '_policy'
policy_terms = [file for file in files if file.endswith('_policy.csv')]

# Define the common column names
common_columns = ['date', 'policy', 'Restrict/Close', 'Opening (State)', 'Deferring to County', 'Testing', 'Education', 'Health/Medical', 'Emergency Level', 'Transportation', 'Budget', 'Social Distancing', 'Other', 'Vaccine']

# Concatenate policy text from all states into a single string
policy_text = ''
for file in policy_terms:
    file_path = os.path.join(directory, file)
    df = pd.read_csv(file_path, names=common_columns)
    policy_text += ' '.join(df['policy'].dropna()) + ' '


    


# Create a directed graph
G = nx.DiGraph()

# Add nodes for each policy term
for term in policy_terms:
    G.add_node(term)

# Calculate co-occurrence of terms
co_occurrence_matrix = pd.DataFrame(0, index=policy_terms, columns=policy_terms)
for sentence in policy_text.split('.'):
    for term1 in policy_terms:
        if term1 in sentence:
            for term2 in policy_terms:
                if term2 in sentence and term1 != term2:
                    co_occurrence_matrix.loc[term1, term2] += 1

# Add edges based on co-occurrence
for term1 in policy_terms:
    for term2 in policy_terms:
        if co_occurrence_matrix.loc[term1, term2] > 0:
            G.add_edge(term1, term2, weight=co_occurrence_matrix.loc[term1, term2])

# Add edges between all pairs of nodes to ensure full connectivity
for term1 in policy_terms:
    for term2 in policy_terms:
        if term1 != term2:
            G.add_edge(term1, term2)

# Calculate positions for the nodes
pos = nx.spring_layout(G, seed=42)

# Create edge traces
edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

# Create node traces
node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=False,
        colorscale='YlGnBu',
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))

# Add positions to node and edge traces
for node in G.nodes():
    x, y = pos[node]
    node_trace['x'] += (x,)
    node_trace['y'] += (y,)
    node_trace['text'] += (node,)

for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += (x0, x1, None)
    edge_trace['y'] += (y0, y1, None)

# Create network graph figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='Network Graph of Policy Terms Co-occurrence',
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20, l=5, r=5, t=40),
                    annotations=[dict(
                        text="",
                        showarrow=False,
                        xref="paper", yref="paper",
                        x=0.005, y=-0.002)],
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )

# Show the network graph
fig.show()
