In [41]:
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity
from pyvis.network import Network

In [42]:
# Load the dataset
data = pd.read_csv('data.csv')

# Extract game percentages
def extract_game_percentages(row):
    games = {}
    for col in ['GamePlayed1%', 'GamePlayed2%', 'GamePlayed3%']:
        if pd.notna(row[col]):
            game, percent = row[col].split(':')
            games[game] = float(percent)
    return games

data['Games'] = data.apply(extract_game_percentages, axis=1)


In [43]:
# Normalize the node size based on TotalHoursWatched
max_hours_watched = data['TotalHoursWatched'].max()
data['NodeSize'] = (data['TotalHoursWatched'] / max_hours_watched) * 50  # Scaled for better visibility

# Determine dominant game for each streamer
def dominant_game(row):
    if row['Games']:
        return max(row['Games'], key=row['Games'].get)
    return None

data['DominantGame'] = data.apply(dominant_game, axis=1)

In [44]:
# Create a vector for each streamer based on all unique games
def similarity_vector(row, all_games):
    vec = {game: 0 for game in all_games}
    for game, percent in row['Games'].items():
        vec[game] = percent / 100.0
    return np.array(list(vec.values()))

# Identify all unique games
all_unique_games = set(
    game for row_games in data['Games'] for game in row_games.keys()
)

data['Vector'] = data.apply(similarity_vector, axis=1, all_games=all_unique_games)


In [45]:
vectors = np.stack(data['Vector'])
similarity_matrix = cosine_similarity(vectors)

# Ensure distances are valid (positive by transforming into dissimilarity)
distance_matrix = 1 - similarity_matrix
distance_matrix = np.clip(distance_matrix, 0, None)  # Clip negative values to 0

# Apply t-SNE to position nodes based on the similarity matrix
tsne = TSNE(n_components=2, metric="precomputed", random_state=42, init='random')
positions = tsne.fit_transform(distance_matrix)

data['x'] = positions[:, 0]
data['y'] = positions[:, 1]


In [35]:
# Map dominant games to colors
unique_games = data['DominantGame'].dropna().unique()
color_map = {game: color for game, color in zip(unique_games, ['red', 'blue', 'green', 'orange', 'purple', 'pink', 'cyan', 'yellow', 'lime'])}
data['Color'] = data['DominantGame'].map(color_map).fillna('gray')

# Create a pyvis network
net = Network(height="800px", width="100%", bgcolor="#000000", font_color="white", directed=False)

# Add nodes to the network
for _, row in data.iterrows():
    net.add_node(
        n_id=row['StreamerName'],
        label=row['StreamerName'],
        x=row['x'] * 100,  
        y=row['y'] * 100,
        size=row['NodeSize'],
        color=row['Color']
    )


In [36]:
# Custom physics options (added directly to the HTML)
physics_options = """
var options = {
  "physics": {
    "enabled": true,
    "solver": "forceAtlas2Based",
    "forceAtlas2Based": {
      "gravitationalConstant": -50,
      "centralGravity": 0.01,
      "springLength": 100,
      "springConstant": 0.08
    },
    "minVelocity": 0.75
  }
}
"""

# Generate the HTML output with custom options
html_content = net.generate_html()
custom_html_content = html_content.replace(
    '<script type="application/javascript">var options = {};</script>',
    f'<script type="application/javascript">{physics_options}</script>'
)

# Save the HTML file
output_filename = "streamer_network.html"
with open(output_filename, "w", encoding="utf-8") as f:
    f.write(custom_html_content)

print(f"Network saved to {output_filename}. Open this file in a browser to view the visualization.")

Network saved to streamer_network.html. Open this file in a browser to view the visualization.
