In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [13]:
# Define the column headers
column_headers = [
    'Score', 'Pass', 'Organisation', 'Repo_Name', 'Finos_Lifecycle_State', 'License',
    'Issue_Activity', 'Commit_Activity', 'OpenSSF_Status', 'Github_Archived',
    'Branch_Rules_Private', 'SemGrep', 'CVE_Scanning', 'Default_Branch_Name', 'Excess_Admins',
    'Main_Issue_Participants', 'Main_Committers', 'Length_of_Readme'
]

url = 'https://raw.githubusercontent.com/robmoffat/landscape-scanning/main/scan.csv'
df = pd.read_csv(url, header=None, names=column_headers)

# Convert relevant columns to numeric and handle errors
numeric_columns = ['Score', 'Issue_Activity', 'Commit_Activity', 'Length_of_Readme']
for column in numeric_columns:
    df[column] = pd.to_numeric(df[column], errors='coerce')

# Fill missing numeric values with 0
df[numeric_columns] = df[numeric_columns].fillna(0)


In [16]:
# Define ranges for issue and commit activity with inclusive bins
issue_bins = [0, 10, 20, 50, 100, float('inf')]
commit_bins = [0, 10, 20, 50, 100, float('inf')]

# Categorize projects into these ranges
df['Issue_Range'] = pd.cut(df['Issue_Activity'], bins=issue_bins, labels=['0-10', '11-20', '21-50', '51-100', '100+'], right=False)
df['Commit_Range'] = pd.cut(df['Commit_Activity'], bins=commit_bins, labels=['0-10', '11-20', '21-50', '51-100', '100+'], right=False)

# Count the number of projects in each range
issue_range_counts = df['Issue_Range'].value_counts().sort_index()
commit_range_counts = df['Commit_Range'].value_counts().sort_index()

# Create a summary DataFrame
summary_df = pd.DataFrame({
    'Issue Activity Range': issue_range_counts.index,
    'Number of Projects (Issue Activity)': issue_range_counts.values,
    'Commit Activity Range': commit_range_counts.index,
    'Number of Projects (Commit Activity)': commit_range_counts.values
})

# Add a total row
total_row = pd.DataFrame({
    'Issue Activity Range': ['Total'],
    'Number of Projects (Issue Activity)': [issue_range_counts.sum()],
    'Commit Activity Range': ['Total'],
    'Number of Projects (Commit Activity)': [commit_range_counts.sum()]
})

summary_df = pd.concat([summary_df, total_row], ignore_index=True)


In [17]:
from IPython.display import display, HTML

# Top 20 projects by issue activity
top_20_issues = df[['Repo_Name', 'Issue_Activity']].sort_values(by='Issue_Activity', ascending=False).head(20)

# Top 20 projects by commit activity
top_20_commits = df[['Repo_Name', 'Commit_Activity']].sort_values(by='Commit_Activity', ascending=False).head(20)

# Projects with issue activity below 10, sorted in descending order
bottom_issues = df[['Repo_Name', 'Issue_Activity']].loc[df['Issue_Activity'] < 10].sort_values(by='Issue_Activity', ascending=False)

# Projects with commit activity below 10, sorted in descending order
bottom_commits = df[['Repo_Name', 'Commit_Activity']].loc[df['Commit_Activity'] < 10].sort_values(by='Commit_Activity', ascending=False)

# Convert dataframes to HTML tables
top_20_issues_html = top_20_issues.to_html(index=False)
top_20_commits_html = top_20_commits.to_html(index=False)
bottom_issues_html = bottom_issues.to_html(index=False)
bottom_commits_html = bottom_commits.to_html(index=False)

# Displaying the tables side by side
html_content = f"""
<div style="display: flex; justify-content: space-around;">
    <div style="margin-right: 20px;">
        <h3>Top 20 Projects by Issue Activity</h3>
        {top_20_issues_html}
    </div>
    <div>
        <h3>Top 20 Projects by Commit Activity</h3>
        {top_20_commits_html}
    </div>
</div>
<br>
<div style="display: flex; justify-content: space-around;">
    <div style="margin-right: 20px;">
        <h3>Projects with Issue Activity Below 10</h3>
        {bottom_issues_html}
    </div>
    <div>
        <h3>Projects with Commit Activity Below 10</h3>
        {bottom_commits_html}
    </div>
</div>
"""

# Display the summary table
display(summary_df)
display(HTML(html_content))


Unnamed: 0,Issue Activity Range,Number of Projects (Issue Activity),Commit Activity Range,Number of Projects (Commit Activity)
0,0-10,173,0-10,157
1,11-20,5,11-20,6
2,21-50,3,21-50,11
3,51-100,9,51-100,11
4,100+,9,100+,14
5,Total,199,Total,199


Repo_Name,Issue_Activity
common-cloud-controls,291.0
common-domain-model,251.0
devops-automation,235.0
a11y-theme-builder,235.0
FDC3,206.0
git-proxy,167.0
open-source-readiness,125.0
technical-oversight-committee,109.0
DEI-SIG,109.0
traderX,95.0

Repo_Name,Commit_Activity
FDC3,100.0
legend-studio,100.0
common-domain-model,100.0
git-proxy,100.0
open-source-readiness,100.0
architecture-as-code,100.0
morphir-elm,100.0
legend-engine,100.0
SymphonyElectron,100.0
calendar,100.0

Repo_Name,Issue_Activity
finos-landscape,9.0
spring-bot,9.0
symphony-bdk-java,6.0
cla-bot,3.0
jupyterlab_templates,3.0
SymphonyMediaBridge,3.0
fdc3-dotnet,3.0
morphir-scala,2.0
plexus-interop,2.0
legend-engine,2.0

Repo_Name,Commit_Activity
tracdap,8.0
DEI-SIG,7.0
ipyregulartable,7.0
ai-readiness,7.0
clabot-config,6.0
pylegend,6.0
compliant-financial-infrastructure,5.0
ambassadors,5.0
symphony-api-spec,5.0
morphir-jvm,5.0
