In [35]:
# Import des biblioth√®ques n√©cessaires
import requests
import pandas as pd
import json
from datetime import datetime, timedelta
import os
from dotenv import load_dotenv
import matplotlib.pyplot as plt
import numpy as np

load_dotenv()

True

In [36]:
# Configuration de l'API GitHub
GITHUB_API_URL = "https://api.github.com"

# Optionnel : Token d'authentification pour augmenter les limites de l'API
TOKEN = os.getenv("GITHUB_TOKEN")
headers = {"Authorization": f"token {TOKEN}"}

In [37]:
def get_repositories(query="", sort="stars", per_page=30):
    """
    R√©cup√®re les repositories depuis l'API GitHub
    
    Args:
        query (str): Terme de recherche (ex: "python", "machine learning")
        sort (str): Tri par 'stars', 'forks', 'updated'
        per_page (int): Nombre de r√©sultats par page (max 100)
    
    Returns:
        list: Liste des repositories
    """
    url = f"{GITHUB_API_URL}/search/repositories"
    params = {
        "q": query if query else "stars:>1000",  # Par d√©faut, projets avec plus de 1000 √©toiles
        "sort": sort,
        "order": "desc",
        "per_page": per_page
    }
    
    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        return response.json()["items"]
    except requests.exceptions.RequestException as e:
        print(f"Erreur lors de la requ√™te : {e}")
        return []

In [38]:
print("R√©cup√©ration des projets populaires...")
repositories = get_repositories(query="machine learning", per_page=20)

print(f"Nombre de projets r√©cup√©r√©s : {len(repositories)}")

R√©cup√©ration des projets populaires...
Nombre de projets r√©cup√©r√©s : 20


In [None]:
def get_all_repository_releases(owner, repo_name, per_page=100):
    """
    R√©cup√®re TOUTES les releases d'un repository (toutes pages)
    
    Args:
        owner (str): Propri√©taire du repository
        repo_name (str): Nom du repository
        per_page (int): Nombre de releases par page (max 100)
    
    Returns:
        list: Liste de TOUTES les releases, tri√©es de la plus r√©cente √† la plus ancienne
    """
    url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/releases"
    all_releases = []
    page = 1
    
    print(f"R√©cup√©ration de toutes les releases pour {owner}/{repo_name}...")
    
    while True:
        params = {
            "per_page": per_page,
            "page": page
        }
        
        try:
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            releases = response.json()
            
            if not releases:  # Plus de releases disponibles
                break
                
            all_releases.extend(releases)
            print(f"  Page {page}: {len(releases)} releases (Total: {len(all_releases)})")
            page += 1
            
        except requests.exceptions.RequestException as e:
            print(f"Erreur lors de la r√©cup√©ration des releases page {page}: {e}")
            break
    
    print(f"Total r√©cup√©r√©: {len(all_releases)} releases")
    if all_releases:
        # Afficher la p√©riode couverte
        newest = all_releases[0]['published_at'] or all_releases[0]['created_at']
        oldest = all_releases[-1]['published_at'] or all_releases[-1]['created_at']
        print(f"P√©riode: {oldest[:10]} ‚Üí {newest[:10]}")
        
        # Afficher quelques exemples
        print("\nPremi√®res releases:")
        for i, release in enumerate(all_releases[:5]):
            date = (release['published_at'] or release['created_at'])[:10]
            name = release['name'] or release['tag_name']
            print(f"  {i+1}. {date}: {name}")
        
        if len(all_releases) > 5:
            print("  ...")
    
    return all_releases


In [40]:
def list_all_releases_summary(owner, repo_name):
    """
    Affiche un r√©sum√© de toutes les releases d'un repository
    """
    releases = get_all_repository_releases(owner, repo_name)
    
    if not releases:
        print(f"Aucune release trouv√©e pour {owner}/{repo_name}")
        return []
    
    print(f"\nüìã TOUTES LES RELEASES DE {owner}/{repo_name}:")
    print("=" * 80)
    
    for i, release in enumerate(releases, 1):
        date = (release['published_at'] or release['created_at'])[:10]
        name = release['name'] or release['tag_name']
        tag = release['tag_name']
        
        # Indicateur si c'est une pre-release
        status = ""
        if release.get('prerelease'):
            status += " [PRE-RELEASE]"
        if release.get('draft'):
            status += " [DRAFT]"
        
        print(f"{i:3d}. {date} - {name} ({tag}){status}")
    
    return releases

In [50]:
repository = repositories[0]
owner = repository["owner"]["login"]
repo_name = repository["name"]

In [42]:
def select_releases_by_time_spacing(releases, num_releases=8):
    """
    S√©lectionne les releases avec l'espacement temporel maximum
    
    Args:
        releases (list): Liste de toutes les releases
        num_releases (int): Nombre de releases √† s√©lectionner
    
    Returns:
        list: Liste des releases avec espacement temporel maximal, tri√©es du plus ancien au plus r√©cent
    """
    if not releases:
        return []
    
    if len(releases) <= num_releases:
        # Pas assez de releases, retourner toutes tri√©es par date
        return sorted(releases, key=lambda x: x['published_at'] or x['created_at'])
    
    # Trier par date (plus ancien en premier)
    releases_sorted = sorted(releases, key=lambda x: x['published_at'] or x['created_at'])
    
    # Convertir les dates en objets datetime
    releases_with_dates = []
    for release in releases_sorted:
        date_str = release['published_at'] or release['created_at']
        date_obj = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
        releases_with_dates.append((release, date_obj))
    
    # Algorithme de s√©lection pour maximiser l'espacement temporel
    selected = []
    
    # Toujours prendre la premi√®re release (la plus ancienne)
    selected.append(releases_with_dates[0])
    
    # Calculer la p√©riode totale
    total_period_days = (releases_with_dates[-1][1] - releases_with_dates[0][1]).days
    
    if num_releases == 1:
        return [selected[0][0]]
    
    # Calculer l'intervalle cible pour un espacement optimal
    target_interval_days = total_period_days / (num_releases - 1)
    
    print(f"P√©riode totale: {total_period_days} jours")
    print(f"Intervalle cible: {target_interval_days:.1f} jours")
    
    # S√©lectionner les releases suivantes en cherchant l'espacement optimal
    for i in range(1, num_releases - 1):
        target_date = selected[0][1] + timedelta(days=int(i * target_interval_days))
        
        # Trouver la release la plus proche de la date cible
        best_release = None
        min_diff = float('inf')
        
        for release, date_obj in releases_with_dates:
            # Ne pas reprendre une release d√©j√† s√©lectionn√©e
            if any(r[0]['tag_name'] == release['tag_name'] for r in selected):
                continue
                
            # Calculer la diff√©rence avec la date cible
            diff = abs((date_obj - target_date).days)
            
            if diff < min_diff:
                min_diff = diff
                best_release = (release, date_obj)
        
        if best_release:
            selected.append(best_release)
    
    # Toujours prendre la derni√®re release (la plus r√©cente)
    if not any(r[0]['tag_name'] == releases_with_dates[-1][0]['tag_name'] for r in selected):
        selected.append(releases_with_dates[-1])
    
    # Trier par date et extraire seulement les releases
    selected_sorted = sorted(selected, key=lambda x: x[1])
    result = [item[0] for item in selected_sorted]
    
    # Limiter au nombre demand√©
    if len(result) > num_releases:
        result = result[:num_releases]
    
    return result



In [43]:
def get_time_spaced_releases_summary(owner, repo_name, num_releases=8):
    """
    R√©cup√®re les releases avec espacement temporel maximal
    
    Args:
        owner (str): Propri√©taire du repository
        repo_name (str): Nom du repository
        num_releases (int): Nombre de releases √† s√©lectionner
    
    Returns:
        list: Liste des releases s√©lectionn√©es avec espacement temporel optimal
    """
    print(f"\nüéØ S√âLECTION DE {num_releases} RELEASES AVEC ESPACEMENT TEMPOREL MAXIMAL")
    print(f"Repository: {owner}/{repo_name}")
    print("=" * 80)
    
    # R√©cup√©rer toutes les releases
    all_releases = get_all_repository_releases(owner, repo_name)
    
    if not all_releases:
        print(f"Aucune release trouv√©e pour {owner}/{repo_name}")
        return []
    

     # Filtrer pour exclure les pre-releases et drafts
    stable_releases = [release for release in all_releases 
                      if not release.get('prerelease', False) and not release.get('draft', False)]
    
    print(f"Releases totales: {len(all_releases)}")
    print(f"Releases stables (sans pre-releases/drafts): {len(stable_releases)}")
    
    if not stable_releases:
        print("Aucune release stable trouv√©e")
        return []
    # S√©lectionner les releases avec espacement temporel
    spaced_releases = select_releases_by_time_spacing(stable_releases, num_releases)
    
    # Afficher les releases s√©lectionn√©es
    print(f"\nüìã RELEASES S√âLECTIONN√âES (du plus ancien au plus r√©cent):")
    print("-" * 80)
    
    for i, release in enumerate(spaced_releases, 1):
        date = (release['published_at'] or release['created_at'])[:10]
        name = release['name'] or release['tag_name']
        tag = release['tag_name']
        
        status = ""
        if release.get('prerelease'):
            status += " [PRE-RELEASE]"
        if release.get('draft'):
            status += " [DRAFT]"
        
        print(f"{i:2d}. {date} - {name} ({tag}){status}")
    
    # Calculer et afficher les intervalles temporels r√©els
    if len(spaced_releases) > 1:
        print(f"\nüìä INTERVALLES TEMPORELS R√âALIS√âS:")
        print("-" * 50)
        
        intervals = []
        total_days = 0
        
        for i in range(1, len(spaced_releases)):
            date1 = datetime.fromisoformat((spaced_releases[i-1]['published_at'] or spaced_releases[i-1]['created_at']).replace('Z', '+00:00'))
            date2 = datetime.fromisoformat((spaced_releases[i]['published_at'] or spaced_releases[i]['created_at']).replace('Z', '+00:00'))
            
            interval_days = (date2 - date1).days
            interval_months = interval_days / 30.44
            intervals.append(interval_days)
            total_days += interval_days
            
            print(f"  {date1.strftime('%Y-%m-%d')} ‚Üí {date2.strftime('%Y-%m-%d')}: {interval_days:4d} jours ({interval_months:5.1f} mois)")
        
        avg_interval = total_days / len(intervals)
        min_interval = min(intervals)
        max_interval = max(intervals)
        
        print(f"\nStatistiques des intervalles:")
        print(f"  Moyenne: {avg_interval:6.0f} jours ({avg_interval/30.44:5.1f} mois)")
        print(f"  Minimum: {min_interval:6d} jours ({min_interval/30.44:5.1f} mois)")
        print(f"  Maximum: {max_interval:6d} jours ({max_interval/30.44:5.1f} mois)")
        
        # P√©riode totale couverte
        first_date = datetime.fromisoformat((spaced_releases[0]['published_at'] or spaced_releases[0]['created_at']).replace('Z', '+00:00'))
        last_date = datetime.fromisoformat((spaced_releases[-1]['published_at'] or spaced_releases[-1]['created_at']).replace('Z', '+00:00'))
        total_period = (last_date - first_date).days
        
        print(f"\nP√©riode totale couverte: {total_period} jours ({total_period/365.25:.1f} ann√©es)")
    
    return spaced_releases

In [44]:
get_time_spaced_releases_summary(owner, repo_name, num_releases=8)


üéØ S√âLECTION DE 8 RELEASES AVEC ESPACEMENT TEMPOREL MAXIMAL
Repository: tensorflow/tensorflow
R√©cup√©ration de toutes les releases pour tensorflow/tensorflow...


  Page 1: 100 releases (Total: 100)
  Page 2: 100 releases (Total: 200)
  Page 3: 19 releases (Total: 219)
Total r√©cup√©r√©: 219 releases
P√©riode: 2016-02-16 ‚Üí 2025-08-13

Premi√®res releases:
  1. 2025-08-13: TensorFlow 2.19.1
  2. 2025-08-13: TensorFlow 2.20.0
  3. 2025-07-28: TensorFlow 2.20.0-rc0
  4. 2025-03-11: TensorFlow 2.18.1
  5. 2025-03-12: TensorFlow 2.19.0
  ...
Releases totales: 219
Releases stables (sans pre-releases/drafts): 108
P√©riode totale: 3465 jours
Intervalle cible: 495.0 jours

üìã RELEASES S√âLECTIONN√âES (du plus ancien au plus r√©cent):
--------------------------------------------------------------------------------
 1. 2016-02-16 - TensorFlow 0.6.0 (v0.6.0)
 2. 2017-06-30 - TensorFlow 1.2.1 (v1.2.1)
 3. 2018-11-05 - TensorFlow 1.12.0 (v1.12.0)
 4. 2020-01-26 - TensorFlow 1.15.2 (v1.15.2)
 5. 2021-08-10 - TensorFlow 2.5.1 (v2.5.1)
 6. 2022-11-18 - TensorFlow 2.11.0 (v2.11.0)
 7. 2024-03-08 - TensorFlow 2.15.1 (v2.15.1)
 8. 2025-08-13 - TensorFlow 2.20.0

[{'url': 'https://api.github.com/repos/tensorflow/tensorflow/releases/2603983',
  'assets_url': 'https://api.github.com/repos/tensorflow/tensorflow/releases/2603983/assets',
  'upload_url': 'https://uploads.github.com/repos/tensorflow/tensorflow/releases/2603983/assets{?name,label}',
  'html_url': 'https://github.com/tensorflow/tensorflow/releases/tag/v0.6.0',
  'id': 2603983,
  'author': {'login': 'martinwicke',
   'id': 577277,
   'node_id': 'MDQ6VXNlcjU3NzI3Nw==',
   'avatar_url': 'https://avatars.githubusercontent.com/u/577277?v=4',
   'gravatar_id': '',
   'url': 'https://api.github.com/users/martinwicke',
   'html_url': 'https://github.com/martinwicke',
   'followers_url': 'https://api.github.com/users/martinwicke/followers',
   'following_url': 'https://api.github.com/users/martinwicke/following{/other_user}',
   'gists_url': 'https://api.github.com/users/martinwicke/gists{/gist_id}',
   'starred_url': 'https://api.github.com/users/martinwicke/starred{/owner}{/repo}',
   'subscr

In [45]:
def get_repository_at_commit(owner, repo_name, commit_sha):
    """
    R√©cup√®re les informations d'un repository √† un commit sp√©cifique
    
    Args:
        owner (str): Propri√©taire du repository
        repo_name (str): Nom du repository
        commit_sha (str): SHA du commit
    
    Returns:
        dict: Informations du repository √† ce commit
    """
    url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/git/trees/{commit_sha}"
    params = {"recursive": "1"}
    
    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Erreur lors de la r√©cup√©ration du tree pour {commit_sha}: {e}")
        return {}


In [46]:
def analyze_languages_github_style(tree_data):
    """
    Analyse les langages en se rapprochant de la m√©thode GitHub
    """
    language_extensions = {
        '.py': 'Python', '.js': 'JavaScript', '.ts': 'TypeScript',
        '.java': 'Java', '.cpp': 'C++', '.c': 'C', '.cs': 'C#',
        '.php': 'PHP', '.rb': 'Ruby', '.go': 'Go', '.rs': 'Rust',
        '.kt': 'Kotlin', '.swift': 'Swift', '.scala': 'Scala',
        '.r': 'R', '.sql': 'SQL', '.html': 'HTML', '.css': 'CSS',
        '.scss': 'SCSS', '.vue': 'Vue', '.jsx': 'JSX', '.tsx': 'TSX'
    }
    
    # Dossiers √† ignorer (comme GitHub)
    ignored_paths = ['/vendor/', '/node_modules/', '/build/', '/dist/', 
                    '/.git/', '/docs/', '/documentation/', '/test/', '/tests/']
    
    language_data = {}
    
    if 'tree' in tree_data:
        for file_info in tree_data['tree']:
            if file_info['type'] == 'blob':
                path = file_info['path']
                
                # Ignorer certains dossiers
                if any(ignored in path.lower() for ignored in ignored_paths):
                    continue
                
                # Ignorer les fichiers README, LICENSE, etc.
                filename = path.split('/')[-1].lower()
                if filename.startswith(('readme', 'license', 'changelog')):
                    continue
                
                # Analyser l'extension
                for ext, lang in language_extensions.items():
                    if path.lower().endswith(ext):
                        if lang not in language_data:
                            language_data[lang] = {'files': 0, 'size': 0}
                        
                        language_data[lang]['files'] += 1
                        # Approximation de taille (GitHub utilise les octets r√©els)
                        language_data[lang]['size'] += file_info.get('size', 1000)
                        break
    
    return language_data

In [47]:
def analyze_release_languages_historical(owner, repo_name, release):
    """
    Analyse les langages utilis√©s dans une release sp√©cifique en utilisant l'historique Git
    
    Args:
        owner (str): Propri√©taire du repository
        repo_name (str): Nom du repository
        release (dict): Donn√©es de la release
    
    Returns:
        dict: Analyse des langages avec pourcentages bas√©e sur les octets de la release
    """
    tag = release['tag_name']
    date = (release['published_at'] or release['created_at'])[:10]
    
    print(f"\nüîç ANALYSE HISTORIQUE DES LANGAGES - {tag} ({date})")
    print("=" * 60)
    
    # R√©cup√©rer le commit SHA de la release
    commit_sha = release.get('target_commitish', 'main')
    if not commit_sha:
        # Essayer de r√©cup√©rer le SHA depuis l'API des tags
        url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/git/refs/tags/{tag}"
        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            tag_data = response.json()
            commit_sha = tag_data['object']['sha']
        except requests.exceptions.RequestException as e:
            print(f"Erreur lors de la r√©cup√©ration du SHA pour {tag}: {e}")
            return {}
    
    # R√©cup√©rer l'arbre des fichiers pour ce commit
    tree_data = get_repository_at_commit(owner, repo_name, commit_sha)
    
    if not tree_data:
        print(f"Impossible de r√©cup√©rer l'arbre pour {tag}")
        return {}
    
    # Analyser les langages avec taille des fichiers
    language_data = analyze_languages_github_style(tree_data)

    if not language_data:
        print("Aucun langage d√©tect√© dans cette release")
        return {}

    # Extraire les donn√©es (fichiers et taille)
    language_stats = {}
    total_bytes = 0
    
    for lang, data in language_data.items():
        language_stats[lang] = {
            'files': data['files'],
            'bytes': data['size']
        }
        total_bytes += data['size']
    
    # Calculer les pourcentages bas√©s sur les octets
    languages_analysis = {}
    for lang, stats in language_stats.items():
        percentage = (stats['bytes'] / total_bytes) * 100 if total_bytes > 0 else 0
        languages_analysis[lang] = {
            'files': stats['files'],
            'bytes': stats['bytes'],
            'percentage': percentage
        }
    
    # Trier par pourcentage d√©croissant
    sorted_languages = sorted(languages_analysis.items(), 
                             key=lambda x: x[1]['percentage'], 
                             reverse=True)
    
    print("üìä R√âPARTITION DES LANGAGES (bas√©e sur les octets de code):")
    print("-" * 60)
    print(f"Total d'octets analys√©s: {total_bytes:,}")
    print(f"Total de fichiers analys√©s: {sum(lang['files'] for lang in languages_analysis.values())}")
    print("-" * 60)
    
    for lang, stats in sorted_languages:
        bar_length = int(stats['percentage'] / 2)  # Barre de 50 chars max
        bar = "‚ñà" * bar_length + "‚ñë" * (50 - bar_length)
        print(f"{lang:<12} {stats['percentage']:6.1f}% |{bar}| ({stats['bytes']:,} octets, {stats['files']} fichiers)")
    
    return languages_analysis


def analyze_all_releases_languages_historical(owner, repo_name, releases):
    """
    Analyse les langages pour toutes les releases s√©lectionn√©es avec donn√©es historiques
    
    Args:
        owner (str): Propri√©taire du repository
        repo_name (str): Nom du repository
        releases (list): Liste des releases √† analyser
    
    Returns:
        dict: Analyse compl√®te des langages par release avec donn√©es historiques
    """
    print(f"\nüåê ANALYSE HISTORIQUE DES LANGAGES POUR TOUTES LES RELEASES")
    print(f"Repository: {owner}/{repo_name}")
    print("=" * 80)
    
    all_analyses = {}
    
    for i, release in enumerate(releases, 1):
        tag = release['tag_name']
        date = (release['published_at'] or release['created_at'])[:10]
        name = release['name'] or tag
        
        print(f"\n[{i}/{len(releases)}] Release: {name} ({tag}) - {date}")
        
        # Analyser les langages historiques pour cette release
        languages_analysis = analyze_release_languages_historical(owner, repo_name, release)
        all_analyses[tag] = {
            'release_info': release,
            'languages': languages_analysis
        }
    
    return all_analyses
def get_languages_evolution_summary_historical(languages_analyses):
    """
    R√©sume l'√©volution des langages √† travers les releases (version historique bas√©e sur octets)
    
    Args:
        languages_analyses (dict): Analyses des langages par release
    
    Returns:
        dict: R√©sum√© de l'√©volution avec donn√©es historiques
    """
    print(f"\nüìà R√âSUM√â DE L'√âVOLUTION HISTORIQUE DES LANGAGES")
    print("=" * 80)
    
    # Collecter tous les langages uniques
    all_languages = set()
    for analysis in languages_analyses.values():
        if analysis['languages']:
            all_languages.update(analysis['languages'].keys())
    
    if not all_languages:
        print("Aucun langage d√©tect√© dans les releases analys√©es")
        return {}
    
    # Analyser la pr√©sence de chaque langage
    language_presence = {}
    for lang in all_languages:
        appearances = 0
        total_percentage = 0
        total_files = 0
        total_bytes = 0
        
        for analysis in languages_analyses.values():
            if analysis['languages'] and lang in analysis['languages']:
                appearances += 1
                total_percentage += analysis['languages'][lang]['percentage']
                total_files += analysis['languages'][lang]['files']
                total_bytes += analysis['languages'][lang]['bytes']
        
        language_presence[lang] = {
            'appearances': appearances,
            'avg_percentage': total_percentage / appearances if appearances > 0 else 0,
            'total_files': total_files,
            'total_bytes': total_bytes,
            'total_releases': len(languages_analyses)
        }
    
    # Trier par pourcentage moyen
    sorted_langs = sorted(language_presence.items(), 
                         key=lambda x: x[1]['avg_percentage'], 
                         reverse=True)
    
    print("üèÜ LANGAGES LES PLUS UTILIS√âS (moyenne sur toutes les releases - bas√© sur octets):")
    print("-" * 80)
    print(f"{'Langage':<12} {'Moy %':<8} {'Pr√©sence':<10} {'Total octets':<15} {'Total fichiers':<15}")
    print("-" * 80)
    
    for lang, stats in sorted_langs[:10]:  # Top 10
        presence_rate = (stats['appearances'] / stats['total_releases']) * 100
        print(f"{lang:<12} {stats['avg_percentage']:6.1f}% {presence_rate:7.0f}% {stats['total_bytes']:12,} {stats['total_files']:12d}")
    
    return language_presence

def get_complete_releases_analysis_historical(owner, repo_name, num_releases=8):
    """
    Analyse compl√®te des releases avec langages historiques
    
    Args:
        owner (str): Propri√©taire du repository
        repo_name (str): Nom du repository
        num_releases (int): Nombre de releases √† analyser
    
    Returns:
        tuple: (releases_s√©lectionn√©es, analyse_langages_historique)
    """
    # R√©cup√©rer les releases avec espacement temporel
    selected_releases = get_time_spaced_releases_summary(owner, repo_name, num_releases)
    
    if not selected_releases:
        return [], {}
    
    # Analyser les langages historiques
    languages_analysis = analyze_all_releases_languages_historical(owner, repo_name, selected_releases)
    
    # R√©sum√© de l'√©volution
    evolution_summary = get_languages_evolution_summary_historical(languages_analysis)
    
    return selected_releases, languages_analysis

In [48]:
# Remplacer votre cellule d'analyse par :
selected_releases, historical_languages_data = get_complete_releases_analysis_historical(owner, repo_name, num_releases=8)



üéØ S√âLECTION DE 8 RELEASES AVEC ESPACEMENT TEMPOREL MAXIMAL
Repository: tensorflow/tensorflow
R√©cup√©ration de toutes les releases pour tensorflow/tensorflow...


  Page 1: 100 releases (Total: 100)
  Page 2: 100 releases (Total: 200)
  Page 3: 19 releases (Total: 219)
Total r√©cup√©r√©: 219 releases
P√©riode: 2016-02-16 ‚Üí 2025-08-13

Premi√®res releases:
  1. 2025-08-13: TensorFlow 2.19.1
  2. 2025-08-13: TensorFlow 2.20.0
  3. 2025-07-28: TensorFlow 2.20.0-rc0
  4. 2025-03-11: TensorFlow 2.18.1
  5. 2025-03-12: TensorFlow 2.19.0
  ...
Releases totales: 219
Releases stables (sans pre-releases/drafts): 108
P√©riode totale: 3465 jours
Intervalle cible: 495.0 jours

üìã RELEASES S√âLECTIONN√âES (du plus ancien au plus r√©cent):
--------------------------------------------------------------------------------
 1. 2016-02-16 - TensorFlow 0.6.0 (v0.6.0)
 2. 2017-06-30 - TensorFlow 1.2.1 (v1.2.1)
 3. 2018-11-05 - TensorFlow 1.12.0 (v1.12.0)
 4. 2020-01-26 - TensorFlow 1.15.2 (v1.15.2)
 5. 2021-08-10 - TensorFlow 2.5.1 (v2.5.1)
 6. 2022-11-18 - TensorFlow 2.11.0 (v2.11.0)
 7. 2024-03-08 - TensorFlow 2.15.1 (v2.15.1)
 8. 2025-08-13 - TensorFlow 2.20.0

In [49]:
def create_languages_matrix(historical_languages_data):
    """
    Cr√©e une matrice DataFrame des langages par releases avec dates
    
    Args:
        historical_languages_data (dict): Donn√©es des langages par release
    
    Returns:
        pd.DataFrame: Matrice avec les dates en index et les langages en colonnes
    """
    # Pr√©parer les donn√©es pour le graphique
    release_data = []

    # Collecter toutes les releases dans l'ordre chronologique avec leurs dates
    for tag, data in historical_languages_data.items():
        release_info = data['release_info']
        date_str = release_info['published_at'] or release_info['created_at']
        # Extraire seulement la date (YYYY-MM-DD)
        date_only = date_str[:10]
        release_data.append((date_str, date_only, tag, data))

    # Trier par date
    release_data.sort(key=lambda x: x[0])
    
    # Extraire les informations dans l'ordre chronologique
    release_dates = [item[1] for item in release_data]  # Dates format√©es (YYYY-MM-DD)
    release_tags = [item[2] for item in release_data]   # Tags correspondants
    release_info_ordered = [item[3] for item in release_data]  # Donn√©es correspondantes

    print(f"Ordre chronologique des releases:")
    for date, tag in zip(release_dates, release_tags):
        print(f"  {date} ({tag})")

    # Collecter tous les langages uniques
    all_languages = set()
    for data in historical_languages_data.values():
        all_languages.update(data['languages'].keys())

    print(f"\nLangages d√©tect√©s: {sorted(all_languages)}")

    # Cr√©er la matrice avec pandas
    matrix_data = {}
    
    for lang in sorted(all_languages):
        percentages = []
        for data in release_info_ordered:
            if lang in data['languages']:
                percentages.append(data['languages'][lang]['percentage'])
            else:
                percentages.append(0.0)  # 0% si le langage n'est pas pr√©sent
        matrix_data[lang] = percentages

    # Cr√©er le DataFrame avec les dates comme index et les langages comme colonnes
    languages_matrix = pd.DataFrame(matrix_data, index=release_dates)
    languages_matrix.index.name = 'Date'
    
    return languages_matrix

def display_languages_matrix(languages_matrix, min_percentage=1.0):
    """
    Affiche la matrice des langages avec dates et formatage am√©lior√©
    
    Args:
        languages_matrix (pd.DataFrame): Matrice des langages avec dates
        min_percentage (float): Pourcentage minimum pour afficher un langage
    """
    print("\nüìä MATRICE DES LANGAGES PAR DATE DE RELEASE:")
    print("=" * 100)
    
    # Filtrer les langages avec un pourcentage significatif
    significant_langs = languages_matrix.columns[languages_matrix.max() >= min_percentage]
    filtered_matrix = languages_matrix[significant_langs]
    
    # Affichage format√©
    print(filtered_matrix.round(1))
    
    print(f"\nNote: Seuls les langages avec au moins {min_percentage}% dans au moins une release sont affich√©s.")
    print(f"Langages filtr√©s: {len(languages_matrix.columns) - len(significant_langs)}")
    
    return filtered_matrix

def export_languages_matrix(languages_matrix, filename=None):
    """
    Exporte la matrice avec dates vers un fichier CSV
    
    Args:
        languages_matrix (pd.DataFrame): Matrice des langages avec dates
        filename (str): Nom du fichier (optionnel)
    
    Returns:
        str: Chemin du fichier export√©
    """
    if filename is None:
        filename = f"languages_matrix_dates_{owner}_{repo_name}.csv"
    
    languages_matrix.to_csv(filename)
    print(f"Matrice avec dates export√©e vers: {filename}")
    return filename

# Utilisation des fonctions modifi√©es
languages_matrix = create_languages_matrix(historical_languages_data)

# Affichage de la matrice compl√®te avec dates
print("\nüîç MATRICE COMPL√àTE AVEC DATES:")
print(languages_matrix)

# Affichage filtr√© (langages significatifs seulement)
filtered_matrix = display_languages_matrix(languages_matrix, min_percentage=1.0)

# Export vers CSV
export_filename = export_languages_matrix(languages_matrix)

# Statistiques sur la matrice
print(f"\nüìà STATISTIQUES DE LA MATRICE AVEC DATES:")
print(f"P√©riode couverte: {languages_matrix.index[0]} ‚Üí {languages_matrix.index[-1]}")
print(f"Dimensions: {languages_matrix.shape[0]} releases √ó {languages_matrix.shape[1]} langages")
print(f"Langages avec pr√©sence > 0%: {(languages_matrix > 0).sum().sum()}")
print(f"Langages dominants (>10% dans au moins une release): {(languages_matrix > 10).any().sum()}")

Ordre chronologique des releases:
  2016-02-16 (v0.6.0)
  2017-06-30 (v1.2.1)
  2018-11-05 (v1.12.0)
  2020-01-26 (v1.15.2)
  2021-08-10 (v2.5.1)
  2022-11-18 (v2.11.0)
  2024-03-08 (v2.15.1)
  2025-08-13 (v2.20.0)

Langages d√©tect√©s: ['C', 'C#', 'C++', 'CSS', 'Go', 'HTML', 'Java', 'JavaScript', 'Python', 'Swift', 'TypeScript']

üîç MATRICE COMPL√àTE AVEC DATES:
                   C        C#       C++       CSS        Go       HTML  \
Date                                                                      
2016-02-16  0.159377  0.025055  0.398702  0.005092  4.012620  14.144968   
2017-06-30  0.152685  0.000000  0.000000  0.003324  3.630310   4.901877   
2018-11-05  0.102359  0.019011  0.000000  0.000000  2.821746  10.533962   
2020-01-26  0.299022  0.015457  4.666739  0.000000  3.015164   8.449092   
2021-08-10  0.299235  0.027101  0.003239  0.000000  4.064260   9.495694   
2022-11-18  0.176277  0.027969  0.000000  0.000000  4.342703   9.726632   
2024-03-08  0.182173  0.026892  

In [None]:
def get_matrix_repository(owner, repo_name, per_page=100, num_releases=8):
    """
    R√©cup√®re la matrice des langages pour un repository donn√©
    
    Args:
        owner (str): Propri√©taire du repository
        repo_name (str): Nom du repository
        per_page (int): Nombre de releases par page (max 100)
    
    Returns:
        pd.DataFrame: Matrice des langages avec dates
    """
    # R√©cup√©rer toutes les releases
    all_releases = get_time_spaced_releases_summary(owner, repo_name, num_releases=8)
    if not all_releases:
        print(f"Aucune release trouv√©e pour {owner}/{repo_name}")
        return pd.DataFrame()
    
    
    # Analyser les langages historiques
    languages_analysis = analyze_all_releases_languages_historical(owner, repo_name, all_releases)
    
    # Cr√©er la matrice avec dates
    languages_matrix = create_languages_matrix(languages_analysis)
    
    return languages_matrix
    