Load CSV data into a pandas DataFrame.

In [None]:
import pandas as pd

df = pd.read_csv('data.csv')

Get the top 20 kernel creators by count.

In [None]:
top_creators = df['KernelAuthor'].value_counts().head(20)

Count the number of entries for each performance tier.

In [None]:
performance_tiers = df['KernelAuthorPerformanceTier'].value_counts()

Count the number of kernels by medal type.

In [None]:
kernel_medals = df['medal'].value_counts()

Count the number of kernels per programming language.

In [None]:
kernels_by_language = df['languageName'].value_counts()

Calculate the mean of forks, treating NaN as 0.

In [None]:
forks_analysis = df['isFork'].fillna(0).mean()

Get descriptive statistics for lines changed in forks.

In [None]:
lines_changed_forks = df['ForkLinesChanged'].describe()

Calculate the mean usage of GPU, treating NaN as 0.

In [None]:
gpu_usage = df['isGpuEnabled'].fillna(0).mean()

Visualize the relationship between upvotes and views.

In [None]:
upvotes_vs_views = df.plot.scatter(x='upvotes', y='views')

Sort kernels by the total number of comments.

In [None]:
best_score vs total_comments = df[['title', 'totalComments']].sort_values(by='totalComments')

Create a directed graph for the kernels.

In [None]:
import networkx as nx
G = nx.DiGraph()

Clean up connected components in the graph.

In [None]:
cleaned_components = [c for c in nx.connected_components(G)]

Visualize the fork graph.

In [None]:
nx.draw(G)

Calculate average subtree size and votes.

In [None]:
subtree_size_votes = forks[['subtreeSize', 'votes']].mean()

Get the top 20 most forked kernels.

In [None]:
most_forked_kernels = forks.sort_values('TotalForkedKernels', ascending=False).head(20)

Get the top 20 kernels with the most cumulative votes.

In [None]:
most_cumulative_votes = forks.sort_values('CumulatedVotes', ascending=False).head(20)