In [None]:
import requests
import os
import logging
from collections import Counter

# get licence data
- from turing org repos
- using github's graphql api

In [None]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [None]:
# endpoint
url = 'https://api.github.com/graphql'
token = os.environ.get('GITHUB_TOKEN')
org_name = "alan-turing-institute"

In [None]:
query = '''
query($org: String!, $cursor: String) {
  organization(login: $org) {
    repositories(first: 100, after: $cursor) {
      pageInfo {
        hasNextPage
        endCursor
      }
      nodes {
        name
        licenseInfo {
          name
        }
        isPrivate
      }
    }
  }
}
'''

In [None]:
def run_query(query, variables):
    headers = {'Authorization': f'Bearer {token}'}
    response = requests.post(url, json={'query': query, 'variables': variables}, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Query failed with status code: {response.status_code}")



In [None]:
def get_all_licenses():
    licenses = []
    inaccessible_repos = []
    variables = {'org': org_name, 'cursor': None}
    
    while True:
        try:
            result = run_query(query, variables)
            repos = result['data']['organization']['repositories']
            
            for repo in repos['nodes']:
                repo_name = repo['name']
                license_info = repo['licenseInfo']
                is_private = repo['isPrivate']
                #created_at = repo['createdAt']
                
                if license_info:
                    licenses.append(license_info['name'])
                    logging.info(f"Repository: {repo_name}, License: {license_info['name']}")
                else:
                    if is_private:
                        inaccessible_repos.append(repo_name)
                        logging.warning(f"No access to license info for private repository: {repo_name}")
                    else:
                        licenses.append('No license')
                        logging.info(f"Repository: {repo_name}, License: No license")
            
            if not repos['pageInfo']['hasNextPage']:
                break
            
            variables['cursor'] = repos['pageInfo']['endCursor']
        
        except Exception as e:
            logging.error(f"Error occurred while fetching data: {str(e)}")
            break
    
    return licenses, inaccessible_repos

In [None]:
logging.info(f"Starting script with token: {token[:4]}...{token[-4:]} (length: {len(token)})")
logging.info(f"Organization name: {org_name}")
    
all_licenses, inaccessible_repos= get_all_licenses()

In [None]:
len(all_licenses)
len(inaccessible_repos)

# plot

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

sns.set_palette("deep")

In [None]:
df = pd.DataFrame(all_licenses, columns=['license'])
df.head()
df['license'].value_counts()

In [None]:
plt.style.use('dark_background')

fig, ax = plt.subplots(figsize=(12, 9))
fig.patch.set_facecolor('#404040')  
ax.set_facecolor('#404040') 

licence_counts = df['license'].value_counts()
licence_counts['Inaccessible'] = len(inaccessible_repos)

licence_counts_sorted = licence_counts.sort_values(ascending=False)
inaccessible_count = licence_counts_sorted.pop('Inaccessible')
licence_counts_sorted = pd.concat([licence_counts_sorted, pd.Series({'Inaccessible': inaccessible_count})])

licence_counts_sorted.plot(kind='bar', ax=ax, color='goldenrod')

ax.set_title('Licences in Turing repos', fontsize=20, fontweight='bold', pad=20, color='white')
ax.set_ylabel('# repos', fontsize=18, labelpad=10, color='white')
ax.set_xlabel('')

plt.xticks(rotation=45, ha='right', fontsize=14, color='lightgrey')
plt.yticks(fontsize=14, color='lightgrey')

for i, v in enumerate(licence_counts_sorted):
    ax.text(i, v + 0.5, str(v), ha='center', va='bottom', color='white', fontsize=14)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_color('lightgrey')
ax.spines['left'].set_color('lightgrey')

ax.tick_params(axis='x', colors='lightgrey')
ax.tick_params(axis='y', colors='lightgrey')

plt.tight_layout()

plt.savefig('turing_licenses_plot.png', format='png', dpi=600, bbox_inches='tight', transparent=True)
