In [1]:
import requests
import csv

# Personal access token to access GitHub API
personal_access_token = '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'

def fetch_issues(state): # Function to fetch issue data from GitHub
    issues = [] # List to hold issue data
    page = 1 # Starting page, needed because GitHub limits the amount of data retrieved per page
    while True: # Loop to fetch all issue data
        url = f"https://api.github.com/repos/npm/node-semver/issues?state={state}&page={page}" # GitHub API URL to fetch issue data
        headers = {
            "Accept": "application/vnd.github.v3+json", # Header to accept response in JSON format, using JSON because it's easier to process than responses in HTML or XML format
            "Authorization": f"token {personal_access_token}" # Header for authentication using personal access token
        }
        response = requests.get(url, headers=headers) # Send request to GitHub API
        if response.status_code != 200: # Check if response status code is 200 (OK)
            raise Exception(f"GitHub API error: {response.status_code}") # If not, raise exception
        data = response.json() # Get response data in JSON format
        if not data: # If data is empty, meaning no more data, then stop loop
            break   
        for item in data: # Loop to fetch issue data
            # Check if item is a pull request, if not, add to issues list
            if 'pull_request' not in item:
                issues.append(item) # Add issue data to issues list
        page += 1 # Increment page number to fetch next set of issue data
    return issues # Return issues list


# Fetch all issues
open_issues = fetch_issues('open') # Fetch all open issues
closed_issues = fetch_issues('closed') # Fetch all closed issues


# Save data to CSV
with open('node-semver.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["No", "Title", "Tags", "Status", "Link"])
    
    for issue in open_issues + closed_issues:
        no = issue['number'] # Get issue number
        title = issue['title'] # Get issue title
        tags = ";".join([label['name'] for label in issue['labels']]) # Get issue tags
        status = "Open" if issue['state'] == 'open' else "Closed" # Get issue status
        link = issue['html_url'] # Get issue link
        
        writer.writerow([no, title, tags, status, link]) # Write data to CSV

print("Data successfully saved to node-semver.csv")


Data successfully saved to node-semver.csv


In [2]:
#count how many Title in stdlib-list.csv
import pandas as pd
df_stdlib = pd.read_csv('node-semver.csv')
total_title = df_stdlib['Title'].count()
total_open = df_stdlib['Status'].value_counts()['Open']
total_closed = df_stdlib['Status'].value_counts()['Closed']
print(f"Total Title: {total_title}")
print(f"Total Open: {total_open}")
print(f"Total Closed: {total_closed}")


Total Title: 335
Total Open: 37
Total Closed: 298


In [3]:
from tabulate import tabulate
# Read the csv file into a DataFrame
df_warehouse = pd.read_csv('node-semver.csv')

# Sort the DataFrame based on the "No" column
df_warehouse_sorted = df_warehouse.sort_values(by='No')

# Convert the sorted DataFrame to a table using tabulate
table = tabulate(df_warehouse_sorted, headers='keys', tablefmt='psql', showindex=False)

print(table)



+------+-------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------+----------+-----------------------------------------------+
|   No | Title                                                                                                                                                 | Tags                                                 | Status   | Link                                          |
|------+-------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------+----------+-----------------------------------------------|
|    1 | Easily increment a given semver string                                                                                                                | nan           

> Source Code and Documentation

In [2]:
import csv
from tabulate import tabulate

# Define the paths to the CSV files
csv_file_paths = ['node-semver_commit.csv']

# Initialize a list to hold the processed data
data = []

csv.field_size_limit(130000000)

# Open and read the CSV files
for csv_file_path in csv_file_paths:
    with open(csv_file_path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            link = row['Link']  # Extract the link

            # Process "Lines Added" and "Lines Removed" columns
            for action in ['Lines Added', 'Lines Removed']:
                if row[action]:
                    # Split the string by commas to handle multiple files
                    file_changes = row[action].split(", ")
                    for file_change in file_changes:
                        if ':' in file_change:
                            file_name, lines = file_change.rsplit(':', 1)  # Split by the last occurrence of ':' to handle ':' in file paths
                            lines = int(lines)  # Convert the number of lines to an integer
                            # Check if file_name is not 'None' or empty
                            if file_name and file_name != 'None':
                                file_type = 'Source Code' if file_name.endswith('.js') else 'Documentation'
                                # Check for duplicates based on file name and link
                                existing_entry = next((d for d in data if d[0] == file_name and d[2] == link), None)
                                if existing_entry:
                                    # Update the existing entry with lines added or removed
                                    if action == 'Lines Added':
                                        data[data.index(existing_entry)] = (file_name, file_type, link, lines, existing_entry[4])
                                    else:
                                        data[data.index(existing_entry)] = (file_name, file_type, link, existing_entry[3], lines)
                                else:
                                    # Add a new entry
                                    if action == 'Lines Added':
                                        data.append((file_name, file_type, link, lines, 0))
                                    else:
                                        data.append((file_name, file_type, link, 0, lines))

# Sort the data by file name
data.sort(key=lambda x: x[0])

# Prepare the table header
headers = ["No", "File Name", "Format", "Link", "Total Lines Added", "Total Lines Removed"]

# Prepare the table rows with data enumeration
rows = []
for i, (file_name, file_type, link, total_added, total_removed) in enumerate(data):
    rows.append((i+1, file_name, file_type, link, total_added, total_removed))



# Print the total of format Documentation and Source Code
total_documentation = sum(1 for row in data if row[1] == 'Documentation')
total_source_code = sum(1 for row in data if row[1] == 'Source Code')
print(f"Total Documentation: {total_documentation}")
print(f"Total Source Code: {total_source_code}")

# Print the tabulated table
print(tabulate(rows, headers=headers, tablefmt="grid"))



Total Documentation: 541
Total Source Code: 538
+------+--------------------------------+---------------+------------------------------------------------------------------------------------+---------------------+-----------------------+
|   No | File Name                      | Format        | Link                                                                               |   Total Lines Added |   Total Lines Removed |
|    1 | .commitlintrc.js               | Source Code   | https://github.com/npm/node-semver/commit/4907647d169948a53156502867ed679268063a9f |                  10 |                     0 |
+------+--------------------------------+---------------+------------------------------------------------------------------------------------+---------------------+-----------------------+
|    2 | .eslintrc.js                   | Source Code   | https://github.com/npm/node-semver/commit/4907647d169948a53156502867ed679268063a9f |                  14 |                     0 |
+------