In [5]:
# Install required libraries if not already present
!pip install azure-storage-blob pandas networkx matplotlib

from azure.storage.blob import BlobServiceClient
import pandas as pd
from datetime import datetime
import io
import networkx as nx
import matplotlib.pyplot as plt
import os

# Connect to Azure Blob Storage
connection_string = "DefaultEndpointsProtocol=https;AccountName=sg092620240215;AccountKey=+PaTF6WCZ0NY63Hni1XIWRJfWsnTI7QJCLVP0f1OXUoVzJyl0AcE4h2Pe1b7ZbgldGkDDFA0j9iK+AStvU4auA==;EndpointSuffix=core.windows.net"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
silver_client = blob_service_client.get_container_client("silver")

# Find the latest merged_rosters_players_*.csv
latest_blob = None
latest_time = None
print("Checking for merged_rosters_players_ files:")
for blob in silver_client.list_blobs(name_starts_with="merged_rosters_players_"):
    print(f"Found: {blob.name}")
    try:
        parts = blob.name.split("_")
        if len(parts) < 5:
            print(f"Skipping {blob.name}: Not enough parts")
            continue
        timestamp_str = f"{parts[-2]}_{parts[-1].replace('.csv', '')}"
        timestamp = datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S")
        if latest_time is None or timestamp > latest_time:
            latest_time = timestamp
            latest_blob = blob.name
    except ValueError as e:
        print(f"Skipping {blob.name}: Invalid timestamp ({e})")
        continue

if not latest_blob:
    raise ValueError("No merged_rosters_players_*.csv files found in silver")

# Load the latest CSV
print(f"Loading {latest_blob}")
blob_client = silver_client.get_blob_client(latest_blob)
blob_data = blob_client.download_blob().readall().decode("utf-8")
df = pd.read_csv(io.StringIO(blob_data))

# Optional: Limit rows for clarity (uncomment if needed)
# df = df.head(50)

# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges
for index, row in df.iterrows():
    player_name = row['name']
    team_name = row['teamName']
    position = row['position']
    
    G.add_node(player_name, layer='name')
    G.add_node(team_name, layer='team')
    G.add_node(position, layer='position')
    
    G.add_edge(player_name, team_name)  # Name -> Team (right)
    G.add_edge(position, player_name)   # Position -> Name (left)

# Define positions with aligned spacing
pos = {}
name_nodes = [n for n, d in G.nodes(data=True) if d['layer'] == 'name']
team_nodes = set([n for n, d in G.nodes(data=True) if d['layer'] == 'team'])
position_nodes = set([n for n, d in G.nodes(data=True) if d['layer'] == 'position'])

# Calculate spacing based on number of name nodes
y_spacing = 2.0
max_height = 50
height = min(len(name_nodes) * y_spacing, max_height)

# Assign positions for name nodes (center)
for i, node in enumerate(name_nodes):
    pos[node] = (0, i * y_spacing)

# Align team and position nodes to their connected name nodes
for player_name in name_nodes:
    y_pos = pos[player_name][1]
    for neighbor in G.successors(player_name):
        if neighbor in team_nodes:
            pos[neighbor] = (2, y_pos)
    for predecessor in G.predecessors(player_name):
        if predecessor in position_nodes:
            pos[predecessor] = (-2, y_pos)

# Draw the graph
plt.figure(figsize=(20, height), dpi=80)
nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=1500, font_size=10, 
        font_weight='bold', arrows=True, arrowstyle='->', arrowsize=20)
plt.title("Player Mapping: Position -> Name -> Team", pad=20)
plt.tight_layout()

# Save the graph locally first
local_filename = "player_mapping.png"
plt.savefig(local_filename, dpi=80, bbox_inches='tight')
print(f"Graph saved locally as {local_filename}")

# Upload to silver container
blob_name = f"player_mapping_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.png"
blob_client = silver_client.get_blob_client(blob_name)
with open(local_filename, "rb") as f:
    blob_client.upload_blob(f, overwrite=True)
print(f"Graph uploaded to silver as {blob_name}")

# Display the graph
plt.show()

# Clean up local file (optional)
os.remove(local_filename)
print(f"Local file {local_filename} removed")

StatementMeta(b73298e2-f1b8-4875-a47a-e543f5595c3b, 4, 10, Submitted, Running, Running)