In [2]:
from collections import defaultdict
from GrabReleaseCommits import *
from StarHistory import get_star_data, format_dates
from GetRepoFromDataset import *
from CodeCovReport import get_codecov_all_builds, detect_coverage_tool_usage, get_coverall_all_builds
import numpy as np
from datetime import datetime
import pandas as pd
from Refined_Data_Set import *

In [None]:
github_repos = filter_github_repos('../data/github-ranking-2024-02-15.csv')
refinedRepo = list()
for repo in github_repos:
    username, repo_name, token, language = repo[0], repo[1], "7848dd6f-5308-43f6-a02f-e10e31118854", repo[2]
    repo_info = detect_coverage_tool_usage("github", username, repo_name, token, language)
    if repo_info != None:
        print(repo_info)
        refinedRepo.append(repo_info)

print(refinedRepo)
print(len(refinedRepo))

In [2]:
#use refinedData set that is saved
Refined_DataSet = final_data_set()

In [3]:
def append_data_to_csv(data, csv_filename):
    try:
        # Load existing data from CSV file
        existing_data = pd.read_csv(csv_filename)

        # Create a DataFrame from new data
        df_new = pd.DataFrame(data, columns=['Username', 'Repository', 'Percentage', 'Hash', 'Timestamp', 'Language', 'Star_List'])

        # Fill missing star counts with empty strings
        df_new['Star_List'] = df_new['Star_List'].apply(lambda x: x if isinstance(x, list) else ['',''])

        # Append the new data to the existing data
        combined_data = pd.concat([existing_data, df_new], ignore_index=True)

        # Save to CSV
        combined_data.to_csv(csv_filename, index=False)

        print(f"New data appended to '{csv_filename}' successfully.")
    except FileNotFoundError:
        print("CSV file not found. Creating a new CSV file...")
        df_new = pd.DataFrame(data, columns=['Username', 'Repository', 'Percentage', 'Hash', 'Timestamp', 'Language', 'Star_List'])

        # Fill missing star counts with empty strings
        df_new['Star_List'] = df_new['Star_List'].apply(lambda x: x if isinstance(x, list) else ['',''])

        df_new.to_csv(csv_filename, index=False)
        print(f"New CSV file '{csv_filename}' created with the new data.")

In [4]:
def uniform_distribution(coverage_list):
    if len(coverage_list) <= 50:
        return coverage_list
    else:
        # Generate 10 uniformly spaced indices
        uniform_indices = np.linspace(0, len(coverage_list) - 1, 50, dtype=int)
        # Retrieve the corresponding elements from the data
        return [coverage_list[i] for i in uniform_indices]

In [None]:
codecov_API_token = "7848dd6f-5308-43f6-a02f-e10e31118854"

for repo in Refined_DataSet:
    username, repo_name, codecov_used, coverall_used, language = repo[1], repo[2], repo[3], repo[4], repo[5]
    if codecov_used:
        print(f"CodeCov used {username}/{repo_name}")
        print(f"https://api.codecov.io/api/v2/github/{username}/repos/{repo_name}/commits/?page=1")
        codecov_report = get_codecov_all_builds('github', username, repo_name, codecov_API_token, language)
        if codecov_report == None:
            print("CodeCov report has a problem")
            continue

        reformat_dates_list = [format_dates(dates[4]) for dates in codecov_report]
        star_history = get_star_data(username, repo_name, reformat_dates_list)
        if star_history == None: #if error occurs in star history then skip the repo
            print("Star history has a problem")
            continue
        for i in range(len(codecov_report)):
            if star_history[i][1] != None:
                codecov_report[i].append(star_history[i])  
        filtered_data = [item for item in codecov_report if any(isinstance(elem, list) for elem in item)]   
        uniform_final = uniform_distribution(filtered_data)
        # filtered_data = [item for item in uniform_final if any(isinstance(elem, list) for elem in item)]
        append_data_to_csv(uniform_final, 'Feb20output.csv')
        continue
    elif coverall_used:
        print(f"Coverall used {username}/{repo_name}")
        print(f"https://coveralls.io/github/{username}/{repo_name}.json?page=1")
        coverall = get_coverall_all_builds('github', username, repo_name, language)
        reformat_dates_list = [format_dates(dates[4]) for dates in coverall]
        star_history = get_star_data(username, repo_name, reformat_dates_list)
        if star_history == None:
            print("Star history has a problem")
            continue
        for i in range(len(coverall)):
            if star_history[i][1] != None:
                coverall[i].append(star_history[i])
        filtered_data = [item for item in coverall if any(isinstance(elem, list) for elem in item)]
        uniform_final = uniform_distribution(filtered_data)
        append_data_to_csv(uniform_final, 'Feb20output.csv')
        continue
    else:
        print("No coverage tool used")
        continue

In [10]:
#df = pd.read_csv('Feb20output1.csv')
df = pd.read_csv('../data/test_output.csv')


# Get all unique usernames
unique_timestamps = df['Timestamp'].unique()
unique_timestamps = np.sort(unique_timestamps)
month = []
for i in unique_timestamps:
    mon = i[:7]
    month.append(mon)
    print(i)

2013-08-29T21:09:22Z
2013-09-04T14:26:01Z
2013-09-09T16:22:01Z
2013-09-16T16:59:04Z
2013-09-25T00:12:41Z
2013-10-11T17:15:15Z
2013-10-15T14:36:38Z
2013-11-06T20:37:31Z
2013-11-11T20:13:44Z
2013-12-02T04:05:05Z
2013-12-06T17:19:06Z
2014-01-13T20:15:25Z
2014-01-31T17:21:00Z
2014-02-02T15:00:51Z
2014-02-04T00:01:30Z
2014-02-10T05:56:25Z
2014-02-27T19:59:03Z
2014-03-03T21:08:46Z
2014-03-07T03:53:59Z
2014-03-19T10:31:12Z
2014-03-20T15:24:13Z
2014-03-31T18:31:19Z
2014-05-14T17:21:26Z
2014-06-16T09:21:53Z
2014-09-26T10:26:29Z
2014-11-03T19:03:49Z
2014-11-07T21:14:47Z
2014-11-12T16:07:32Z
2014-11-17T15:09:06Z
2014-11-21T17:09:37Z
2014-11-28T20:28:10Z
2014-11-30T23:01:11Z
2014-12-02T20:37:27Z
2014-12-03T21:20:23Z
2014-12-05T22:20:28Z
2014-12-06T06:13:41Z
2014-12-09T11:50:01Z
2014-12-09T18:02:53Z
2014-12-10T17:20:03Z
2014-12-16T04:46:06Z
2014-12-16T10:17:02Z
2014-12-16T18:05:43Z
2014-12-17T15:35:06Z
2014-12-19T15:48:03Z
2014-12-20T08:27:09Z
2014-12-23T15:53:00Z
2014-12-26T21:27:56Z
2014-12-27T15