In [None]:
import requests
import pandas as pd
import time
from datetime import datetime

def get_pr_counts(repo_owner, repo_name, headers):
    variables = {
        "owner": repo_owner,
        "name": repo_name
    }

    try:
        response = requests.post(
            'https://api.github.com/graphql',
            headers=headers,
            json={'query': query, 'variables': variables}
        )

        if response.status_code == 200:
            data = response.json().get('data', {}).get('repository', {})
            if data:
                open_prs = data['pullRequests']['totalCount']
                closed_prs = data['closedPullRequests']['totalCount']
                merged_prs = data['mergedPullRequests']['totalCount']
                return {
                    'open_prs': open_prs,
                    'closed_prs': closed_prs + merged_prs,
                    'total_prs': open_prs + closed_prs + merged_prs
                }
        return {'open_prs': 0, 'closed_prs': 0, 'total_prs': 0}
    except Exception as e:
        print(f"Error fetching PRs for {repo_owner}/{repo_name}: {e}")
        return {'open_prs': 0, 'closed_prs': 0, 'total_prs': 0}

def get_github_repos(language, headers):
    all_repos = []
    page = 1
    max_pages = 10

    while page <= max_pages:
        url = "https://api.github.com/search/repositories"
        params = {
            'q': f'language:{language}',
            'sort': 'stars',
            'order': 'desc',
            'per_page': 100,
            'page': page
        }

        response = requests.get(url, headers=headers, params=params)

        if response.status_code == 403:
            reset_time = int(response.headers.get('X-RateLimit-Reset', time.time() + 60))
            sleep_time = max(reset_time - time.time(), 0) + 1
            print(f"\nGitHub rate limit reached! Sleeping for {sleep_time:.0f} seconds before retrying...")
            time.sleep(sleep_time)
            continue

        if response.status_code != 200:
            print(f"Error fetching {language} repositories (page {page}): {response.status_code}")
            break

        data = response.json()
        items = data.get('items', [])

        if not items:
            break

        all_repos.extend(items)

        remaining_calls = int(response.headers.get('X-RateLimit-Remaining', 0))
        if remaining_calls < 2:
            reset_time = int(response.headers.get('X-RateLimit-Reset', 0))
            sleep_time = max(reset_time - time.time(), 0) + 1
            print(f"\nRate limit reached. Sleeping for {sleep_time:.0f} seconds...")
            time.sleep(sleep_time)

        print(f"Fetched page {page} for {language} ({len(items)} repositories)")
        page += 1

    return all_repos

def get_repo_details(repo, headers):
    owner, name = repo['full_name'].split('/')
    pr_counts = get_pr_counts(owner, name, headers)
    time.sleep(1)

    return {
        'repo_name': repo['full_name'],
        'stars': repo['stargazers_count'],
        'issues': repo['open_issues_count'],
        'open_prs': pr_counts['open_prs'],
        'closed_prs': pr_counts['closed_prs'],
        'total_prs': pr_counts['total_prs'],
        'language': repo['language'],
        'is_fork': repo['fork'],
        'url': repo['html_url'],
        'size_mb': round(repo['size'] / 1024, 2)
    }

def main():
    github_token = "your_actual_token"  #put your github token here
    headers = {
        'Authorization': f'bearer {github_token}',
        'Accept': 'application/vnd.github.v3+json'
    }

    languages = ['Java', 'JavaScript', 'Python', 'PHP', 'C', 'C++', 'Ruby']

    all_repos_data = []

    for language in languages:
        print(f"\nFetching {language} repositories...")
        repos = get_github_repos(language, headers)
        print(f"Fetching PR data for {len(repos)} {language} repositories...")
        repos_data = [get_repo_details(repo, headers) for repo in repos]
        all_repos_data.extend(repos_data)
        print(f"Total {language} repositories collected: {len(repos)}")

    df = pd.DataFrame(all_repos_data)
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f'github_repos_{timestamp}.csv'

    df.to_csv(filename, index=False)
    print(f"\nData saved to {filename}")
    print(f"Total repositories collected: {len(df)}")
    print("\nRepositories per language:")
    print(df['language'].value_counts())

    print("\nAverage stats per language:")
    stats = df.groupby('language').agg({
        'stars': 'mean',
        'issues': 'mean',
        'open_prs': 'mean',
        'closed_prs': 'mean',
        'total_prs': 'mean'
    }).round(2)
    print(stats)

    print("\nTop 5 repositories by stars:")
    columns = ['repo_name', 'language', 'stars', 'open_prs', 'closed_prs', 'total_prs']
    print(df.nlargest(5, 'stars')[columns])

if __name__ == "__main__":
    main()


Fetching Java repositories...

GitHub rate limit reached! Sleeping for 16 seconds before retrying...
Fetched page 1 for Java (100 repositories)
Fetched page 2 for Java (100 repositories)
Fetched page 3 for Java (100 repositories)
Fetched page 4 for Java (100 repositories)
Fetched page 5 for Java (100 repositories)
Fetched page 6 for Java (100 repositories)
Fetched page 7 for Java (100 repositories)
Fetched page 8 for Java (100 repositories)
Fetched page 9 for Java (100 repositories)
Fetched page 10 for Java (100 repositories)
Fetching PR data for 1000 Java repositories...
Total Java repositories collected: 1000

Fetching JavaScript repositories...
Fetched page 1 for JavaScript (100 repositories)
Fetched page 2 for JavaScript (100 repositories)
Fetched page 3 for JavaScript (100 repositories)
Fetched page 4 for JavaScript (100 repositories)
Fetched page 5 for JavaScript (100 repositories)
Fetched page 6 for JavaScript (100 repositories)
Fetched page 7 for JavaScript (100 repositories)


In [None]:
import pandas as pd
from datetime import datetime

def filter_repositories(input_file):
    df = pd.read_csv(input_file)

    print(f"Total repositories before filtering: {len(df)}")

    filters = {
        'stars >= 100': df['stars'] >= 100,
        'total PRs >= 100': df['total_prs'] >= 100,
        'issues >= 100': df['issues'] >= 100,
        'size >= 10MB': df['size_mb'] >= 10,
        'not forked': ~df['is_fork']
    }

    print("\nImpact of individual filters:")
    for name, filter_condition in filters.items():
        count = len(df[filter_condition])
        percentage = (count / len(df)) * 100
        print(f"{name}: {count} repositories ({percentage:.2f}%)")

    for filter_condition in filters.values():
        df = df[filter_condition]

    print(f"\nRepositories meeting all criteria: {len(df)}")
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_file = f'github_repos_filtered_{timestamp}.csv'
    df.to_csv(output_file, index=False)
    print(f"\nFiltered results saved to: {output_file}")

    print("\nQualifying repositories per language:")
    print(df['language'].value_counts())

    print("\nAverage stats for qualifying repositories per language:")
    stats = df.groupby('language').agg({
        'stars': ['count', 'mean'],
        'issues': 'mean',
        'total_prs': 'mean',
        'size_mb': 'mean'
    }).round(2)
    print(stats)

    print("\nTop 5 qualifying repositories by stars:")
    columns = ['repo_name', 'language', 'stars', 'issues', 'total_prs', 'size_mb']
    print(df.nlargest(5, 'stars')[columns])

    return df

if __name__ == "__main__":
    input_file = "/content/github_repos_20250212_234546.csv"
    filtered_df = filter_repositories(input_file)

Total repositories before filtering: 7000

Impact of individual filters:
stars >= 100: 7000 repositories (100.00%)
total PRs >= 100: 4566 repositories (65.23%)
issues >= 100: 2957 repositories (42.24%)
size >= 10MB: 3612 repositories (51.60%)
not forked: 7000 repositories (100.00%)

Repositories meeting all criteria: 1779

Filtered results saved to: github_repos_filtered_20250213_001321.csv

Qualifying repositories per language:
language
C++           412
Python        336
JavaScript    297
Java          292
C             242
PHP           136
Ruby           64
Name: count, dtype: int64

Average stats for qualifying repositories per language:
           stars            issues total_prs size_mb
           count      mean    mean      mean    mean
language                                            
C            242   9296.49  469.59   2675.98  192.75
C++          412  12448.05  725.60   4265.84  473.41
Java         292  11855.92  583.13   3979.33  268.81
JavaScript   297  23299.06  552

  df = df[filter_condition]
  df = df[filter_condition]
  df = df[filter_condition]


In [None]:
import pandas as pd
import requests
import time
from datetime import datetime

def check_travis_ci(repo_name, headers):
    owner, repo = repo_name.split('/')
    url = f"https://api.github.com/repos/{owner}/{repo}/contents/.travis.yml"

    try:
        response = requests.get(url, headers=headers)
        return response.status_code == 200
    except Exception as e:
        print(f"Error checking Travis CI for {repo_name}: {e}")
        return False

def analyze_travis_usage(input_file, github_token):
    df = pd.read_csv(input_file)
    print(f"Analyzing {len(df)} repositories for Travis CI usage...")
    headers = {
        'Authorization': f'token {github_token}',
        'Accept': 'application/vnd.github.v3+json'
    }

    travis_usage = []
    for index, row in df.iterrows():
        print(f"Checking {row['repo_name']} ({index + 1}/{len(df)})")
        has_travis = check_travis_ci(row['repo_name'], headers)
        travis_usage.append(has_travis)
        time.sleep(1)
    df['uses_travis'] = travis_usage

    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_file = f'github_repos_travis_{timestamp}.csv'

    df.to_csv(output_file, index=False)

    total_repos = len(df)
    travis_repos = sum(travis_usage)
    usage_percentage = (travis_repos / total_repos) * 100

    print("\nTravis CI Usage Statistics:")
    print(f"Total repositories analyzed: {total_repos}")
    print(f"Repositories using Travis CI: {travis_repos}")
    print(f"Usage percentage: {usage_percentage:.2f}%")

    print("\nTravis CI usage by language:")
    language_stats = df.groupby('language').agg({
        'uses_travis': ['count', 'sum']
    })
    language_stats['percentage'] = (language_stats['uses_travis']['sum'] /
                                  language_stats['uses_travis']['count'] * 100).round(2)
    print(language_stats)

    print("\nTop 5 repositories with Travis CI by stars:")
    travis_repos = df[df['uses_travis']].nlargest(5, 'stars')
    print(travis_repos[['repo_name', 'language', 'stars', 'total_prs']])

    print(f"\nResults saved to: {output_file}")

    return df

if __name__ == "__main__":
    input_file = "/content/github_repos_filtered_20250213_001321.csv"  
    github_token = "your_actual_token"

    df_with_travis = analyze_travis_usage(input_file, github_token)

Analyzing 1779 repositories for Travis CI usage...
Checking iluwatar/java-design-patterns (1/1779)
Checking spring-projects/spring-boot (2/1779)
Checking elastic/elasticsearch (3/1779)
Checking kdn251/interviews (4/1779)
Checking spring-projects/spring-framework (5/1779)
Checking NationalSecurityAgency/ghidra (6/1779)
Checking google/guava (7/1779)
Checking Stirling-Tools/Stirling-PDF (8/1779)
Checking skylot/jadx (9/1779)
Checking dbeaver/dbeaver (10/1779)
Checking apache/dubbo (11/1779)
Checking PhilJay/MPAndroidChart (12/1779)
Checking alibaba/arthas (13/1779)
Checking bumptech/glide (14/1779)
Checking halo-dev/halo (15/1779)
Checking netty/netty (16/1779)
Checking Blankj/AndroidUtilCode (17/1779)
Checking alibaba/easyexcel (18/1779)
Checking TeamNewPipe/NewPipe (19/1779)
Checking SeleniumHQ/selenium (20/1779)
Checking alibaba/nacos (21/1779)
Checking apache/kafka (22/1779)
Checking apolloconfig/apollo (23/1779)
Checking alibaba/canal (24/1779)
Checking xuxueli/xxl-job (25/1779)
Che

In [None]:
import pandas as pd

def filter_travis_repositories(input_file):
    df = pd.read_csv(input_file)
    travis_repos = df[df['uses_travis'] == True]
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_file = f'travis_repositories_{timestamp}.csv'
    travis_repos.to_csv(output_file, index=False)
    print(f"Filtered repositories with Travis CI saved to: {output_file}")
    return travis_repos

if __name__ == "__main__":
    input_file = "/content/github_repos_travis_20250213_004917.csv"
    travis_repos_df = filter_travis_repositories(input_file)

Filtered repositories with Travis CI saved to: travis_repositories_20250213_005251.csv


In [None]:
import os
import requests
import pandas as pd
from git import Repo
from datetime import datetime
import time

TRAVIS_TOKEN = "your_actual_token"   #put travisCI token here

HEADERS = {
    "Travis-API-Version": "3",
    "Authorization": f"token {TRAVIS_TOKEN}"
}

TRAVIS_API_URL = "https://api.travis-ci.com/repo/{owner}%2F{repo}/builds?limit=1"

CLONE_DIR = "cloned_repos"
os.makedirs(CLONE_DIR, exist_ok=True)

def get_first_travis_commit(repo_path):
    try:
        repo = Repo(repo_path)
        commits = list(repo.iter_commits(paths=".travis.yml"))
        if commits:
            return datetime.utcfromtimestamp(commits[-1].committed_date).strftime('%Y-%m-%d %H:%M:%S')
    except Exception as e:
        print(f"Error retrieving first .travis.yml commit for {repo_path}: {e}")
    return None

def get_last_build_timestamp(owner, repo):
    url = TRAVIS_API_URL.format(owner=owner, repo=repo)
    response = requests.get(url, headers=HEADERS)
    print(f"Checking Travis CI for {owner}/{repo} - Status Code: {response.status_code}")
    if response.status_code == 200:
        builds = response.json().get("builds", [])
        if builds:
            return builds[0].get("finished_at")
        return "No builds found"
    elif response.status_code == 403:
        return "Access forbidden"
    elif response.status_code == 404:
        return "Not Found"
    else:
        return f"Error: {response.status_code}"

def process_repositories(csv_file, output_file, start=0, end=123):
    df = pd.read_csv(csv_file).iloc[start:end]
    df["travis_start_date"] = None
    df["travis_end_date"] = None
    df["travis_duration_days"] = None

    for index, row in df.iterrows():
        repo_url = row["url"]
        repo_name = row["repo_name"]
        owner = repo_url.split("/")[-2]
        repo = repo_url.split("/")[-1]
        print(f"Processing {owner}/{repo}...")

        repo_path = os.path.join(CLONE_DIR, repo)

        if not os.path.exists(repo_path):
            print(f"Cloning {repo_url}...")
            try:
                Repo.clone_from(repo_url, repo_path)
            except Exception as e:
                print(f"Failed to clone {repo_url}: {e}")
                continue

        start_date = get_first_travis_commit(repo_path)
        end_date = get_last_build_timestamp(owner, repo)

        df.at[index, "travis_start_date"] = start_date
        df.at[index, "travis_end_date"] = end_date

        if start_date and end_date and end_date not in ["No builds found", "Not Found", "Access forbidden"]:
            try:
                start_dt = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
                end_dt = datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%SZ")
                duration_days = (end_dt - start_dt).days
            except ValueError:
                duration_days = None
        else:
            duration_days = None

        df.at[index, "travis_duration_days"] = duration_days

        print(f"Processed {repo}: Start={start_date}, End={end_date}, Duration={duration_days}")

        time.sleep(5)

    df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

input_csv = "/content/travis_repositories_20250213_005251.csv"
output_csv = "repositories_with_travis_data_1.csv"

process_repositories(input_csv, output_csv, start=0, end=123)

Processing alibaba/nacos...
Cloning https://github.com/alibaba/nacos...
Checking Travis CI for alibaba/nacos - Status Code: 200
Processed nacos: Start=2018-07-20 16:27:23, End=2021-06-17T12:23:37Z, Duration=1062
Processing alibaba/canal...
Cloning https://github.com/alibaba/canal...
Checking Travis CI for alibaba/canal - Status Code: 200
Processed canal: Start=2019-03-21 08:16:31, End=2021-06-17T09:44:23Z, Duration=819
Processing alibaba/druid...
Cloning https://github.com/alibaba/druid...
Checking Travis CI for alibaba/druid - Status Code: 404
Processed druid: Start=2016-04-22 09:49:14, End=Not Found, Duration=None
Processing alibaba/fastjson...
Cloning https://github.com/alibaba/fastjson...
Checking Travis CI for alibaba/fastjson - Status Code: 404
Processed fastjson: Start=2016-04-16 06:01:01, End=Not Found, Duration=None
Processing apache/incubator-seata...
Cloning https://github.com/apache/incubator-seata...
Checking Travis CI for apache/incubator-seata - Status Code: 404
Processe

In [None]:
import os
import requests
import pandas as pd
from git import Repo
from datetime import datetime
import time

TRAVIS_TOKEN = "your_actual_token"

HEADERS = {
    "Travis-API-Version": "3",
    "Authorization": f"token {TRAVIS_TOKEN}"
}

TRAVIS_API_URL = "https://api.travis-ci.com/repo/{owner}%2F{repo}/builds?limit=1"

CLONE_DIR = "cloned_repos"
os.makedirs(CLONE_DIR, exist_ok=True)

def get_first_travis_commit(repo_path):
    try:
        repo = Repo(repo_path)
        commits = list(repo.iter_commits(paths=".travis.yml"))
        if commits:
            return datetime.utcfromtimestamp(commits[-1].committed_date).strftime('%Y-%m-%d %H:%M:%S')
    except Exception as e:
        print(f"Error retrieving first .travis.yml commit for {repo_path}: {e}")
    return None

def get_last_build_timestamp(owner, repo):
    url = TRAVIS_API_URL.format(owner=owner, repo=repo)
    response = requests.get(url, headers=HEADERS)
    print(f"Checking Travis CI for {owner}/{repo} - Status Code: {response.status_code}")
    if response.status_code == 200:
        builds = response.json().get("builds", [])
        if builds:
            return builds[0].get("finished_at")
        return "No builds found"
    elif response.status_code == 403:
        return "Access forbidden"
    elif response.status_code == 404:
        return "Not Found"
    else:
        return f"Error: {response.status_code}"

def process_repositories(csv_file, output_file, start=0, end=None):
    df = pd.read_csv(csv_file)
    if end:
        df = df.iloc[start:end]
    df["travis_start_date"] = None
    df["travis_end_date"] = None
    df["travis_duration_days"] = None

    for index, row in df.iterrows():
        repo_url = row["url"]
        repo_name = row["repo_name"]
        owner = repo_url.split("/")[-2]
        repo = repo_url.split("/")[-1]
        print(f"Processing {owner}/{repo}...")
        repo_path = os.path.join(CLONE_DIR, repo)

        if not os.path.exists(repo_path):
            print(f"Cloning {repo_url}...")
            try:
                Repo.clone_from(repo_url, repo_path)
            except Exception as e:
                print(f"Failed to clone {repo_url}: {e}")
                continue
        start_date = get_first_travis_commit(repo_path)
        end_date = get_last_build_timestamp(owner, repo)

        df.at[index, "travis_start_date"] = start_date
        df.at[index, "travis_end_date"] = end_date

        if start_date and end_date and end_date not in ["No builds found", "Not Found", "Access forbidden"]:
            try:
                start_dt = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
                end_dt = datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%SZ")
                duration_days = (end_dt - start_dt).days
            except ValueError:
                duration_days = None
        else:
            duration_days = None

        df.at[index, "travis_duration_days"] = duration_days

        print(f"Processed {repo}: Start={start_date}, End={end_date}, Duration={duration_days}")
        time.sleep(5)

    df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

input_csv = "/content/travis_repositories_20250213_005251.csv"
output_csv = "repositories_with_travis_data_2.csv"

process_repositories(input_csv, output_csv, start=123, end=183)

Processing clips/pattern...
Cloning https://github.com/clips/pattern...
Checking Travis CI for clips/pattern - Status Code: 404
Processed pattern: Start=2017-06-04 17:27:08, End=Not Found, Duration=None
Processing n1nj4sec/pupy...
Cloning https://github.com/n1nj4sec/pupy...
Checking Travis CI for n1nj4sec/pupy - Status Code: 404
Processed pupy: Start=2017-03-11 07:16:43, End=Not Found, Duration=None
Processing AirtestProject/Airtest...
Cloning https://github.com/AirtestProject/Airtest...
Checking Travis CI for AirtestProject/Airtest - Status Code: 404
Processed Airtest: Start=2018-03-29 16:02:01, End=Not Found, Duration=None
Processing vaexio/vaex...
Cloning https://github.com/vaexio/vaex...
Checking Travis CI for vaexio/vaex - Status Code: 404
Processed vaex: Start=2015-10-20 18:57:02, End=Not Found, Duration=None
Processing google/trax...
Cloning https://github.com/google/trax...
Checking Travis CI for google/trax - Status Code: 200
Processed trax: Start=2020-04-25 06:03:27, End=No b

In [None]:
import os
import requests
import pandas as pd
from git import Repo
from datetime import datetime
import time

TRAVIS_TOKEN = "your_actual_token"

HEADERS = {
    "Travis-API-Version": "3",
    "Authorization": f"token {TRAVIS_TOKEN}"
}

TRAVIS_API_URL = "https://api.travis-ci.com/repo/{owner}%2F{repo}/builds?limit=1"

CLONE_DIR = "cloned_repos"
os.makedirs(CLONE_DIR, exist_ok=True)

def get_first_travis_commit(repo_path):
    try:
        repo = Repo(repo_path)
        commits = list(repo.iter_commits(paths=".travis.yml"))
        if commits:
            return datetime.utcfromtimestamp(commits[-1].committed_date).strftime('%Y-%m-%d %H:%M:%S')
    except Exception as e:
        print(f"Error retrieving first .travis.yml commit for {repo_path}: {e}")
    return None

def get_last_build_timestamp(owner, repo):
    url = TRAVIS_API_URL.format(owner=owner, repo=repo)
    response = requests.get(url, headers=HEADERS)
    print(f"Checking Travis CI for {owner}/{repo} - Status Code: {response.status_code}")

    if response.status_code == 200:
        builds = response.json().get("builds", [])
        if builds:
            return builds[0].get("finished_at")
        return "No builds found"
    elif response.status_code == 403:
        return "Access forbidden"
    elif response.status_code == 404:
        return "Not Found"
    else:
        return f"Error: {response.status_code}"

def process_repositories(csv_file, output_file, start=0, end=None):
    df = pd.read_csv(csv_file)
    if end:
        df = df.iloc[start:end]
    df["travis_start_date"] = None
    df["travis_end_date"] = None
    df["travis_duration_days"] = None

    for index, row in df.iterrows():
        repo_url = row["url"]
        repo_name = row["repo_name"]
        owner = repo_url.split("/")[-2]
        repo = repo_url.split("/")[-1]
        print(f"Processing {owner}/{repo}...")
        repo_path = os.path.join(CLONE_DIR, repo)

        if not os.path.exists(repo_path):
            print(f"Cloning {repo_url}...")
            try:
                Repo.clone_from(repo_url, repo_path)
            except Exception as e:
                print(f"Failed to clone {repo_url}: {e}")
                continue

        start_date = get_first_travis_commit(repo_path)
        end_date = get_last_build_timestamp(owner, repo)

        df.at[index, "travis_start_date"] = start_date
        df.at[index, "travis_end_date"] = end_date

        if start_date and end_date and end_date not in ["No builds found", "Not Found", "Access forbidden"]:
            try:
                start_dt = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
                end_dt = datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%SZ")
                duration_days = (end_dt - start_dt).days
            except ValueError:
                duration_days = None
        else:
            duration_days = None

        df.at[index, "travis_duration_days"] = duration_days
        print(f"Processed {repo}: Start={start_date}, End={end_date}, Duration={duration_days}")
        time.sleep(5)
    df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

input_csv = "/content/travis_repositories_20250213_005251.csv"
output_csv = "repositories_with_travis_data_3.csv"

process_repositories(input_csv, output_csv, start=200, end=246)

Processing openframeworks/openFrameworks...
Cloning https://github.com/openframeworks/openFrameworks...
Checking Travis CI for openframeworks/openFrameworks - Status Code: 200
Processed openFrameworks: Start=2015-01-12 19:51:26, End=No builds found, Duration=None
Processing zeromq/libzmq...
Cloning https://github.com/zeromq/libzmq...
Checking Travis CI for zeromq/libzmq - Status Code: 200
Processed libzmq: Start=2013-05-21 07:32:13, End=2021-08-26T17:53:56Z, Duration=3019
Processing opencv/opencv_contrib...
Cloning https://github.com/opencv/opencv_contrib...
Checking Travis CI for opencv/opencv_contrib - Status Code: 404
Processed opencv_contrib: Start=2014-03-20 19:07:29, End=Not Found, Duration=None
Processing Studio3T/robomongo...
Cloning https://github.com/Studio3T/robomongo...
Checking Travis CI for Studio3T/robomongo - Status Code: 404
Processed robomongo: Start=2019-01-02 07:21:30, End=Not Found, Duration=None
Processing mamedev/mame...
Cloning https://github.com/mamedev/mame...

KeyboardInterrupt: 

In [None]:
import pandas as pd

file_1 = "repositories_with_travis_data_1-123.csv"
file_2 = "repositories_with_travis_data_123-140.csv"
file_3 = "repositories_with_travis_data_140-150.csv"
file_4 = "repositories_with_travis_data_150-183.csv"
file_5 = "repositories_with_travis_data_183-246.csv"

df1 = pd.read_csv(file_1)
df2 = pd.read_csv(file_2)
df3 = pd.read_csv(file_3)
df4 = pd.read_csv(file_4)
df5 = pd.read_csv(file_5)

combined_df = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)
combined_df.to_csv("combined_repositories_with_travis_data.csv", index=False)

print("Combined CSV saved as 'combined_repositories_with_travis_data.csv'")

Combined CSV saved as 'combined_repositories_with_travis_data.csv'


In [None]:
import pandas as pd

csv_path = "/content/combined_repositories_with_travis_data.csv"
df = pd.read_csv(csv_path)

filtered_df = df[df["travis_duration_days"] > 0]

filtered_df.to_csv("filtered_file.csv", index=False)

print("Filtered data saved successfully!")

Filtered data saved successfully!


In [None]:
import pandas as pd

csv_path = "/content/combined_repositories_with_travis_data.csv"
df = pd.read_csv(csv_path)
filtered_df = df[df["travis_duration_days"] > 0]

filtered_df.to_csv("filtered_file.csv", index=False)

print("Filtered data saved successfully!")
print("Total rows:", len(filtered_df))
print("Total rows:", filtered_df.shape[0])

Filtered data saved successfully!
Total rows: 70
Total rows: 70
