<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# GitHub - Send contributors score by email with chat plugin CTA

**Tags:** #github #tasks #likes #naas_drivers #operations #snippet #dataframe

**Author:** [Jeremy Ravenel](https://www.linkedin.com/in/jeremyravenel/)

**Description:** This notebook provides a way to retrieve all contributors score from a repo

## Input

### Import libraries

In [None]:
import os
import json
import naas 
import naas_drivers
import plotly.graph_objects as go
import requests
from naas_drivers import emailbuilder
from datetime import datetime, date
import random
import time
from dateutil.parser import parse
import matplotlib.pyplot as plt
import pandas as pd

try:
    import tiktoken
except:
    !pip install tiktoken --user
    import tiktoken

### Setup variables

In [None]:
# Define scenario 
is_demo = False # Default to True

# For GitHub 
github_username = "jravenel"
github_token = naas.secret.get("GITHUB_TOKEN")
github_auth = (github_username, github_token)
repo_urls = [
    "https://api.github.com/repos/jupyter-naas/awesome-notebooks",
]

# For Emails
EMAIL_TO = "jeremy@naas.ai"  # you will receive weekly summary at this email
EMAIL_FROM = None  # summary will have this email as sender. Only available for your naas email, otherwise you will receive this email from notification@naas.ai
EMAIL_SUBJECT = (f"⚙️ Operations Engine Demo - Email Update, {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
DATE_FORMAT = "%Y-%m-%d"


# For openAI API
openai_api_key = naas.secret.get("OPENAI_API_KEY")


# For AI Chat Plugin
plugin_name = "⚙️ Operations Agent Demo"
plugin_model = "gpt-3.5-turbo-16k"
plugin_temperature = 0
plugin_max_tokens = 8192
system_prompt_max_tokens = 2084

# For Asset Generation
output_dir = "../outputs/"
csv_file_name = "data.csv"
image_file_name = "image.png"
plugin_file_name = "plugin.json"

## Model

### Setup directories

In [None]:
# Check if directory exists and create it if not
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
# Generate outputs files path
csv_file_path = os.path.join(output_dir, csv_file_name)
image_file_path = os.path.join(output_dir, image_file_name)
plugin_file_path = os.path.join(output_dir, plugin_file_name)
print('📂 CSV file path:', csv_file_path)
print('📂 Image file path:', image_file_path)
print('📂 Plugin file path:', plugin_file_path)

### Get or load data

In [None]:
def get_or_load_data(
    github_username, 
    github_token, 
    repo_urls,
    demo_mode=is_demo  
):
    
    # Init
    df = pd.DataFrame()
    update_data = True
    
    # Check if demo_mode is True
    if demo_mode:
        file_path = "../inputs/demo_data-github_contributions.csv"
        if os.path.exists(file_path):
            # Read file
            df = pd.read_csv(file_path)
            df.to_csv(csv_file_path, index=False)
            return df
    
    # Check if the file exists
    if os.path.isfile(csv_file_path):
        # If the file exists, load it into a DataFrame
        return pd.read_csv(csv_file_path)
    else:
        # If the file doesn't exist, query GitHub and save the results
        github_auth = (github_username, github_token)
        contrib_dict = {}

        for repo_url in repo_urls:
            # Get repository details
            response = requests.get(repo_url, auth=github_auth)
            repo_data = response.json()

            # Get contributors details
            contrib_url = repo_data["contributors_url"]
            response = requests.get(contrib_url, auth=github_auth)
            contrib_data = response.json()

            for c in contrib_data:
                login = c["login"]
                contributions = c["contributions"]

                # Initialize the dictionary for the user if not already done
                if login not in contrib_dict:
                    contrib_dict[login] = {"contributions": 0, "issues_created": 0, "issues_closed": 0, "issue_pr_comments": 0, "commits": 0}

                # Update the contributions
                contrib_dict[login]["contributions"] += contributions

                # Get issues created by the user
                issues_url = f"https://api.github.com/search/issues?q=is:issue+author:{login}"
                response = requests.get(issues_url, auth=github_auth)
                issues_data = response.json()
                contrib_dict[login]["issues_created"] = issues_data.get('total_count', 0)

                # Get issues closed by the user
                issues_url = f"https://api.github.com/search/issues?q=is:issue+is:closed+assignee:{login}"
                response = requests.get(issues_url, auth=github_auth)
                issues_data = response.json()
                contrib_dict[login]["issues_closed"] = issues_data.get('total_count', 0)

                # Get PR comments by the user
                comments_url = f"https://api.github.com/search/issues?q=type:pr+commenter:{login}"
                response = requests.get(comments_url, auth=github_auth)
                comments_data = response.json()
                contrib_dict[login]["issue_pr_comments"] = comments_data.get('total_count', 0)

                # Get commits by the user
                commits_url = f"https://api.github.com/search/commits?q=author:{login}"
                response = requests.get(commits_url, auth=github_auth, headers={'Accept': 'application/vnd.github.cloak-preview'})
                commits_data = response.json()
                contrib_dict[login]["commits"] = commits_data.get('total_count', 0)

                # Get the user's location
                url = f"https://api.github.com/users/{login}"
                response = requests.get(url, auth=github_auth)
                user_data = response.json()
                location = user_data.get("location")
                if location:
                    country = location.split(",")[-1].strip()
                    contrib_dict[login]["country"] = country
                    
        # Convert the dictionary to a DataFrame and save it to a CSV file
        df = pd.DataFrame.from_dict(contrib_dict, orient='index')
        return df

df = get_or_load_data(github_username, github_token, repo_urls)
df

### Apply weights

In [None]:
# Define the weights for each category
weights = {
    "contributions": 0.5,
    "issues_created": 2,
    "issues_closed": 3,
    "issue_pr_comments": 1,
    "commits": 0.1
}

# Apply the weights to each category and sum the results to create the "pts" column
df["pts"] = (
    df["contributions"] * weights["contributions"] +
    df["issues_created"] * weights["issues_created"] +
    df["issues_closed"] * weights["issues_closed"] +
    df["issue_pr_comments"] * weights["issue_pr_comments"] +
    df["commits"] * weights["commits"]
)

# Round the 'pts' column
df["pts"] = df["pts"].round().astype(int)

### Sort df by points

In [None]:
# Sort the DataFrame by the "pts" column in descending order
df.sort_values(by="pts", ascending=False, inplace=True)
df

### Prepare data

In [None]:
# Rename the 'Unnamed: 0' column to 'contributor'
df.rename(columns={"Unnamed: 0": "contributor"}, inplace=True)
df

### Generate asset from CSV

In [None]:
df.to_csv(csv_file_path, index=False)

naas.asset.add(csv_file_path)

### Create Chart

In [None]:
import plotly.graph_objects as go

def create_horizontal_barchart(df,
                               label="contributor",
                               value="pts"):
    # Init
    fig = go.Figure()

    # Return empty fig if dataframe is empty
    if len(df) == 0:
        return fig
    
    # Sort dataframe by the value column in descending order
    df = df.sort_values(by=value, ascending=True)

    # Create fig
    fig.add_trace(
        go.Bar(
            y=df[label],      # This now represents the categories/labels
            x=df[value],      # This now represents the values
            text=df[value],   # Display the value at the end of the bar
            textposition="outside",
            marker=dict(color="#181a1c"),
            orientation="h"   # Set to "h" for horizontal
        )
    )

    # Add logo
    fig.add_layout_image(
        dict(
            #source="logo.png",
            xref="paper",
            yref="paper",
            x=0.28,
            y=.035,
            sizex=0.15,
            sizey=0.15,
            xanchor="right",
            yanchor="bottom"
        )
    )

    fig.update_traces(showlegend=False)

    # Plotly: Create title
    total_value = "{:,.0f}".format(df[value].sum()).replace(",", " ")
    title = f"<b><span style='font-size: 20px;'>GitHub Contribution Leaderboard</b>"
    fig.update_layout(
        title=title,
        #title_x=0.09,
        title_font=dict(family="Arial", color="black"),
        paper_bgcolor="#ffffff",
        plot_bgcolor="#ffffff",
        width=1200,
        height=600,
        #margin_pad=10,
        #margin_r=10,
        #margin_l=10,
    )

    fig.update_xaxes(showticklabels=True)

    return fig

# You can call the function with a dataframe
create_horizontal_barchart(df)


### Generate asset from chart

In [None]:
import plotly.io as pio

# Create the chart
chart = create_horizontal_barchart(df)

# Save as PNG
pio.write_image(chart, image_file_path)

#graph_url = naas.asset.add("chart.html", {"inline": True})
graph_image = naas.asset.add(image_file_path)

### Set email parameters

In [None]:
today = datetime.now().strftime(DATE_FORMAT)
today

### Create NaasAI Chat plugin

In [None]:
system_prompt = f"""
Act as an Operations Efficiency Agent with access to comprehensive data sources, including detailed GitHub contributor activity. 
Your primary role is to analyze and optimize the list of contributors, ensuring you identify the most active and influential individuals to guide collaborative strategies. 
Leverage the data to decipher patterns, contributions, and engagement levels to strategize on operations and collaborations.
Your ultimate goal is to foster a high-performing and cohesive community, playing a pivotal role in the overall success of the project's development and growth.
- Start by introducing yourself with a maximum of 5 bullet points.
- Display the current GitHub analytics data as an image inside the markdown of the chat: {graph_image}.
Wait for the user's initial response, and then delve into a high-level analysis of the contributors' data.
Here is the specific GitHub analytics data you should emphasize: {df}
"""

### Check token count 

In [None]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

system_prompt_tokens = num_tokens_from_string(system_prompt, "cl100k_base")
if system_prompt_tokens > system_prompt_max_tokens:
    print("⚠️ Be carefull, your system prompt looks too big. Tokens:", system_prompt_tokens)
else:
    print("✅ System prompt tokens count OK:", system_prompt_tokens)

### Generate plugin from JSON

In [None]:
# Create json
plugin = {
    "name": plugin_name,
    "model": plugin_model,
    "temperature": plugin_temperature,
    "max_tokens": plugin_max_tokens,
    "prompt": system_prompt,
}

# Save dict to JSON file
with open(plugin_file_path, "w") as f:
    json.dump(plugin, f)
print("💾 Plugin successfully saved:")

plugin = naas.asset.add(plugin_file_path, params={"inline": True})

### Create Top 10

In [None]:
def format_number(num):
    NUMBER_FORMAT = "{:,.0f}"
    num = str(NUMBER_FORMAT.format(num)).replace(",", " ")
    return num

In [None]:
def get_top_contributors(df, top_n=10):
    # Sort and get top contributors
    top_contributors = df.sort_values(by='pts', ascending=False).head(top_n)

    # Format the contributors for email incorporation
    email_list = []
    for idx, (_, row) in enumerate(top_contributors.iterrows(), 1):
        contributor_name = row['contributor']
        score = format_number(row['pts'])
        email_list.append(f"{idx}. {contributor_name} : <b>{score} pts</b>")
    
    return email_list

# Use the function
top_contributors_list = get_top_contributors(df)
print("\n".join(top_contributors_list))

## Output

### Create email content

In [None]:
def email_brief(
    today,
    top_contributors_list,

):
    content = {
        'title': ("⚙️ Operations Engine - Email Update"),
        'heading': (f"Date:{today}"),
        "txt_intro": (
            f"Hi there,<br><br>" f"Here is your operations engine email as of {today}."
        ),
        "title_1": emailbuilder.text(
            "Overview", font_size="27px", text_align="center", bold=True
        ),
        "text_1": emailbuilder.text(
            f"Here are the total activity scores of your community by contributors since the start of the project:"
        ),
        "image_1": emailbuilder.image(graph_image),
        "title_2": emailbuilder.text(
            "Top 10", font_size="27px", text_align="center", bold=True
        ),
        "text": emailbuilder.list(top_contributors_list),
        "button_1": emailbuilder.button(
            link=(f"https://naas.ai/chat/use?plugin_url={plugin}"),
            text="Start Chatting With Agent",
            background_color="#181a1c",
        ),
        "footer_cs": emailbuilder.footer_company(naas=True),
    }

    email_content = emailbuilder.generate(display="iframe", **content)
    return email_content


email_content = email_brief(
    today,
    top_contributors_list,
)

## Output

### Send post engagement by email

In [None]:
# sends the email
naas.notification.send(
    email_to=EMAIL_TO, subject=EMAIL_SUBJECT, html=email_content, email_from=EMAIL_FROM
)