In [2]:
from collections import defaultdict
from GrabReleaseCommits import *
from StarHistory import get_star_data, format_dates
from GetRepoFromDataset import *
from CodeCovReport import get_codecov_all_builds, detect_coverage_tool_usage, get_coverall_all_builds
import numpy as np
from datetime import datetime
import pandas as pd
from Refined_Data_Set import *

In [None]:
github_repos = filter_github_repos('../data/github-ranking-2024-02-15.csv')
refinedRepo = list()
for repo in github_repos:
    username, repo_name, token, language = repo[0], repo[1], "7848dd6f-5308-43f6-a02f-e10e31118854", repo[2]
    repo_info = detect_coverage_tool_usage("github", username, repo_name, token, language)
    if repo_info != None:
        print(repo_info)
        refinedRepo.append(repo_info)

print(refinedRepo)
print(len(refinedRepo))

In [2]:
#use refinedData set that is saved
Refined_DataSet = final_data_set()

In [3]:
def append_data_to_csv(data, csv_filename):
    try:
        # Load existing data from CSV file
        existing_data = pd.read_csv(csv_filename)

        # Create a DataFrame from new data
        df_new = pd.DataFrame(data, columns=['Username', 'Repository', 'Percentage', 'Hash', 'Timestamp', 'Language', 'Star_List'])

        # Fill missing star counts with empty strings
        df_new['Star_List'] = df_new['Star_List'].apply(lambda x: x if isinstance(x, list) else ['',''])

        # Append the new data to the existing data
        combined_data = pd.concat([existing_data, df_new], ignore_index=True)

        # Save to CSV
        combined_data.to_csv(csv_filename, index=False)

        print(f"New data appended to '{csv_filename}' successfully.")
    except FileNotFoundError:
        print("CSV file not found. Creating a new CSV file...")
        df_new = pd.DataFrame(data, columns=['Username', 'Repository', 'Percentage', 'Hash', 'Timestamp', 'Language', 'Star_List'])

        # Fill missing star counts with empty strings
        df_new['Star_List'] = df_new['Star_List'].apply(lambda x: x if isinstance(x, list) else ['',''])

        df_new.to_csv(csv_filename, index=False)
        print(f"New CSV file '{csv_filename}' created with the new data.")

In [4]:
def uniform_distribution(coverage_list):
    if len(coverage_list) <= 50:
        return coverage_list
    else:
        # Generate 10 uniformly spaced indices
        uniform_indices = np.linspace(0, len(coverage_list) - 1, 50, dtype=int)
        # Retrieve the corresponding elements from the data
        return [coverage_list[i] for i in uniform_indices]

In [None]:
codecov_API_token = "7848dd6f-5308-43f6-a02f-e10e31118854"

for repo in Refined_DataSet:
    username, repo_name, codecov_used, coverall_used, language = repo[1], repo[2], repo[3], repo[4], repo[5]
    if codecov_used:
        print(f"CodeCov used {username}/{repo_name}")
        print(f"https://api.codecov.io/api/v2/github/{username}/repos/{repo_name}/commits/?page=1")
        codecov_report = get_codecov_all_builds('github', username, repo_name, codecov_API_token, language)
        if codecov_report == None:
            print("CodeCov report has a problem")
            continue

        reformat_dates_list = [format_dates(dates[4]) for dates in codecov_report]
        star_history = get_star_data(username, repo_name, reformat_dates_list)
        if star_history == None: #if error occurs in star history then skip the repo
            print("Star history has a problem")
            continue
        for i in range(len(codecov_report)):
            if star_history[i][1] != None:
                codecov_report[i].append(star_history[i])  
        filtered_data = [item for item in codecov_report if any(isinstance(elem, list) for elem in item)]   
        uniform_final = uniform_distribution(filtered_data)
        # filtered_data = [item for item in uniform_final if any(isinstance(elem, list) for elem in item)]
        append_data_to_csv(uniform_final, 'Feb20output.csv')
        continue
    elif coverall_used:
        print(f"Coverall used {username}/{repo_name}")
        print(f"https://coveralls.io/github/{username}/{repo_name}.json?page=1")
        coverall = get_coverall_all_builds('github', username, repo_name, language)
        reformat_dates_list = [format_dates(dates[4]) for dates in coverall]
        star_history = get_star_data(username, repo_name, reformat_dates_list)
        if star_history == None:
            print("Star history has a problem")
            continue
        for i in range(len(coverall)):
            if star_history[i][1] != None:
                coverall[i].append(star_history[i])
        filtered_data = [item for item in coverall if any(isinstance(elem, list) for elem in item)]
        uniform_final = uniform_distribution(filtered_data)
        append_data_to_csv(uniform_final, 'Feb20output.csv')
        continue
    else:
        print("No coverage tool used")
        continue

In [3]:
#df = pd.read_csv('Feb20output1.csv')
df = pd.read_csv('../data/test_output.csv')


# Get all unique usernames
unique_timestamps = df['Username'].unique()

for i in unique_timestamps:
    print(i)

facebook
vuejs
trekhleb
flutter
Significant-Gravitas
avelino
nodejs
mui
circe
Hammerspoon
php
sorich87
apache
mozilla-mobile
valyala
airbnb
JuliaDynamics
TabbyML
GopeedLab
alexjoverm
cesanta
date-fns
diem
mockk
JuliaPy
slackapi
SciML
tqdm
pluskid
certbot
rmanguinho
dapr
ash-project
JohnCoates
zio
koreader
QuantumBFS
containers
jedisct1
best-flutter
JuliaCollections
starship
cube-js
grafana
wireapp
tikv
skylot
BetterErrors
JuliaStats
alibaba
milvus-io
allegro
systemd
JetBrains
bitwarden
jump-dev
gorilla
tsenart
MakieOrg
JuliaArrays
bumptech
slackhq
iSoron
axios
PostgREST
jogboms
YiiGuxing
keepassxreboot
HeroTransitions
finagle
swc-project
JuliaWeb
nightscout
tlienart
activeadmin
fossas
railsadminteam
LightTable
typeorm
HangfireIO
isar
tekartik
wez
QuantEcon
JuliaReinforcementLearning
xi-editor
pnpm
httpie
malmaud
esp8266
mitmproxy
pola-rs
srs
cabol
toptal
pry
kubernetes
react-boilerplate
abpframework
tgstation
brettwooldridge
firecracker-microvm
phoenixframework
FluxML
JuliaCN
