# GitHub Stars Analysis for `daytonaio/daytona`
This Jupyter notebook analyses the GitHub star data for the `daytonaio/daytona` repository. It covers collection, temporal analysis, user demographics, engagement metrics, and data visualization.


In [2]:
# Importing necessary libraries
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from datetime import datetime, timedelta
import networkx as nx

# Load environment variables
import os
from dotenv import load_dotenv

load_dotenv()

# API token for accessing GitHub API
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
HEADERS = {'Authorization': f'token {GITHUB_TOKEN}'}

# Set up matplotlib
plt.style.use('ggplot')


ModuleNotFoundError: No module named 'requests'

## Data Collection
We will collect stargazer data using the GitHub API. To handle rate limits, we will implement batch processing and store data in a CSV file.


In [None]:
# Function to fetch stargazer data
def fetch_stargazer_data(owner, repo):
    url = f"https://api.github.com/repos/{owner}/{repo}/stargazers"
    page = 1
    stargazer_data = []

    while True:
        response = requests.get(url, headers=HEADERS, params={'per_page': 100, 'page': page})
        data = response.json()
        
        if not data or response.status_code != 200:
            break
        
        # Append user data to list
        for entry in data:
            stargazer_data.append({
                'user': entry['user']['login'],
                'starred_at': entry['starred_at'],
                'user_type': entry['user']['type'],
                'site_admin': entry['user']['site_admin'],
                'followers_url': entry['user']['followers_url'],
                'following_url': entry['user']['following_url'].replace('{/other_user}', ''),
                'repos_url': entry['user']['repos_url'],
                'gists_url': entry['user']['gists_url']
            })
        
        page += 1

    # Save collected data to CSV
    pd.DataFrame(stargazer_data).to_csv('stargazers.csv', index=False)
    return pd.DataFrame(stargazer_data)

# Fetch data for the daytonaio/daytona repository
stargazers_df = fetch_stargazer_data('daytonaio', 'daytona')


## Temporal Analysis
We analyze the star growth over time: daily, weekly, and monthly trends.


In [None]:
# Parse dates
stargazers_df['starred_at'] = pd.to_datetime(stargazers_df['starred_at'])

# Daily, Weekly, and Monthly analysis
stargazers_df['day'] = stargazers_df['starred_at'].dt.date
stargazers_df['week'] = stargazers_df['starred_at'].dt.to_period('W').apply(lambda r: r.start_time)
stargazers_df['month'] = stargazers_df['starred_at'].dt.to_period('M').apply(lambda r: r.start_time)

daily_stars = stargazers_df.groupby('day').size()
weekly_stars = stargazers_df.groupby('week').size()
monthly_stars = stargazers_df.groupby('month').size()


## User Demographics
Analyze user demographics based on 'Type' and 'Site Admin' status.


In [None]:
# User demographic analysis
user_type_distribution = stargazers_df['user_type'].value_counts()
site_admin_distribution = stargazers_df['site_admin'].value_counts()


## User Engagement Metrics
Analyzing followers/following patterns using API data.


In [None]:
def fetch_follow_data(url_list):
    counts = []
    for url in url_list:
        response = requests.get(url, headers=HEADERS)
        if response.status_code == 200:
            counts.append(len(response.json()))
        else:
            counts.append(np.nan)
    return counts

# Fetching followers/following data
stargazers_df['followers_count'] = fetch_follow_data(stargazers_df['followers_url'])
stargazers_df['following_count'] = fetch_follow_data(stargazers_df['following_url'])


## Data Visualization
Visualize star growth, user distributions, and interaction heatmaps.


In [None]:
# Daily/weekly/monthly growth time series
fig_daily = px.line(daily_stars, title='Daily Star Growth')
fig_daily.show()

fig_weekly = px.line(weekly_stars, title='Weekly Star Growth')
fig_weekly.show()

fig_monthly = px.line(monthly_stars, title='Monthly Star Growth')
fig_monthly.show()

# User type distribution pie chart
fig_user_type = px.pie(user_type_distribution, names=user_type_distribution.index, values=user_type_distribution.values, title='User Type Distribution')
fig_user_type.show()

# Heatmaps for time-of-day and day-of-week starring patterns
stargazers_df['hour'] = stargazers_df['starred_at'].dt.hour
stargazers_df['weekday'] = stargazers_df['starred_at'].dt.weekday

hour_weekday_heatmap = stargazers_df.pivot_table(index='weekday', columns='hour', values='user', aggfunc='count')
sns.heatmap(hour_weekday_heatmap, cmap='YlGnBu', annot=True, fmt='d')
plt.title('Starring Patterns by Time and Day')
plt.xlabel('Hour of the Day')
plt.ylabel('Day of the Week')
plt.show()


## Network Graph of User Connections
(Optional) Visualize a subset of the network graph of user connections based on followers.


In [None]:
# Create network graph for user connections
G = nx.Graph()

# Limit the number of nodes for visualization
limited_df = stargazers_df.head(30)

for _, row in limited_df.iterrows():
    G.add_node(row['user'])
    followers_data = requests.get(row['followers_url'], headers=HEADERS).json()
    
    if isinstance(followers_data, list):
        for follower in followers_data:
            G.add_node(follower['login'])
            G.add_edge(row['user'], follower['login'])

# Draw the graph
nx.draw_networkx(G, with_labels=False, node_size=50)
plt.title("User Connections Graph")
plt.show()


## Cross-referencing User Activity
Analyze user activity patterns by cross-referencing with public repositories and gists.


In [None]:
def fetch_repo_data(url_list):
    repo_counts = []
    for url in url_list:
        response = requests.get(url, headers=HEADERS)
        if response.status_code == 200:
            repo_counts.append(len(response.json()))
        else:
            repo_counts.append(np.nan)
    return repo_counts

def fetch_gist_data(url_list):
    gist_counts = []
    for url in url_list:
        response = requests.get(url, headers=HEADERS)
        if response.status_code == 200:
            gist_counts.append(len(response.json()))
        else:
            gist_counts.append(np.nan)
    return gist_counts

# Fetch repositories and gists counts
stargazers_df['repos_count'] = fetch_repo_data(stargazers_df['repos_url'])
stargazers_df['gists_count'] = fetch_gist_data(stargazers_df['gists_url'])
