## Get List of Simulation Repositories

In [59]:
import matplotlib.pyplot as plt
import requests
import csv

def fetch_repository_information(query):
    url = "https://api.github.com/search/repositories"
    params = {
        "q": query,
        "per_page": 100,
        "sort": "stars",
        "order": "desc"
    }
    headers = {
        "Accept": "application/vnd.github.v3+json"
    }
    response = requests.get(url, params=params, headers=headers)

    repo_info = []

    if response.status_code == 200:
        last_page = 20
        for page in range(1,last_page):
            if response.status_code != 200:
                print("Disconnected at page ", page)
                break

            data = response.json()['items']

            repo_info.append(data)
            if page < last_page:
                print(response.links)
                if  not 'next' in response.links:
                    print("No Next Page after page ", page)
                    break
                next_url = response.links['next']['url']
                response = requests.get(next_url)
            else:
                break

        # print(repo_info)
        return repo_info
    else:
        print(f"Failed to fetch repository information. Status code: {response.status_code}")
        return None

def write_to_csv(repository_list, csv_filename):
    if not repository_list:
        print("No repository data to write.")
        return

    fieldnames = [
        "ID", "Name","URL", "IsFork", "Commits", "Branches", "Releases", "Forks", "MainLanguage",
        "DefaultBranch", "License", "Homepage", "Watchers", "Stargazers", "Contributors",
        "Size", "CreatedAt", "PushedAt", "UpdatedAt", "TotalIssues", "OpenIssues",
        "TotalPullRequests", "OpenPullRequests",   "HasWiki", "IsArchived", "IsDisabled", "Topics"
    ]

    with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for page in repository_list:
            for repo in page:
                writer.writerow({
                    "ID": repo["id"],
                    "Name": repo["name"],
                    "URL": repo["html_url"],
                    "IsFork": repo["fork"],
                    "Commits": repo["commits_url"],
                    "Branches": repo["branches_url"],
                    "Releases": repo["releases_url"],
                    "Forks": repo["forks_count"],
                    "MainLanguage": repo["language"],
                    "DefaultBranch": repo["default_branch"],
                    "License": repo["license"]["name"] if repo["license"] else None,
                    "Homepage": repo["homepage"],
                    "Watchers": repo["watchers_count"],
                    "Stargazers": repo["stargazers_count"],
                    "Contributors":  len(repo["contributors_url"]) if "contributors_url" in repo else 0,
                    "CreatedAt": repo["created_at"],
                    "PushedAt": repo["pushed_at"],
                    "UpdatedAt": repo["updated_at"],
                    "TotalIssues": repo["open_issues_count"],
                    "OpenIssues": repo["open_issues_count"],
                    "TotalPullRequests": repo["pulls_url"],
                    "OpenPullRequests": repo["pulls_url"],
                    "HasWiki": repo["has_wiki"],
                    "IsArchived": repo["archived"],
                    "IsDisabled": repo["disabled"],
                    "Topics": repo["topics"] if "topics" in repo else None
                })

search_query = "language:java topic:simulation"

# Fetching repository information
repositories = fetch_repository_information(search_query)

# Writing repository information to a CSV file
csv_filename = "data/java-sims.csv"
write_to_csv(repositories, csv_filename)

print(f"Repository information has been written to '{csv_filename}'.")

{'next': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Asimulation&per_page=100&sort=stars&order=desc&page=2', 'rel': 'next'}, 'last': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Asimulation&per_page=100&sort=stars&order=desc&page=10', 'rel': 'last'}}
{'prev': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Asimulation&per_page=100&sort=stars&order=desc&page=1', 'rel': 'prev'}, 'next': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Asimulation&per_page=100&sort=stars&order=desc&page=3', 'rel': 'next'}, 'last': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Asimulation&per_page=100&sort=stars&order=desc&page=10', 'rel': 'last'}, 'first': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Asimulation&per_page=100&sort=stars&order=desc&page=1', 'rel': 'first'}}
{'prev': {'url': 'https://api.github.com/search/repo

## Get List of Traditional Repositories

In [64]:
import matplotlib.pyplot as plt
import requests
import csv

def fetch_repository_information(query):
    url = "https://api.github.com/search/repositories"
    params = {
        "q": query,
        "per_page": 100,
        "sort": "stars",
        "order": "desc"
    }
    headers = {
        "Accept": "application/vnd.github.v3+json"
    }
    response = requests.get(url, params=params, headers=headers)

    repo_info = []

    if response.status_code == 200:
        last_page = 20
        for page in range(1,last_page):
            if response.status_code != 200:
                print("Disconnected at page ", page)
                break

            data = response.json()['items']

            repo_info.append(data)
            if page < last_page:
                print(response.links)
                if  not 'next' in response.links:
                    print("No Next Page after page ", page)
                    break
                next_url = response.links['next']['url']
                response = requests.get(next_url)
            else:
                break

        # print(repo_info)
        return repo_info
    else:
        print(f"Failed to fetch repository information. Status code: {response.status_code}")
        return None
    
def write_to_csv(repository_list, csv_name):
    if not repository_list:
        print("No repository data to write.")
        return

    fieldnames = [
        "ID", "Name", "URL", "IsFork", "Commits", "Branches", "Releases", "Forks", "MainLanguage",
        "DefaultBranch", "License", "Homepage", "Watchers", "Stargazers", "Contributors",
        "Size", "CreatedAt", "PushedAt", "UpdatedAt", "TotalIssues", "OpenIssues",
        "TotalPullRequests", "OpenPullRequests",   "HasWiki", "IsArchived", "IsDisabled", "Topics"
    ]

    with open(csv_name, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for page in repository_list:
            for repo in page:
                writer.writerow({
                    "ID": repo["id"],
                    "Name": repo["name"],
                    "URL": repo["html_url"],
                    "IsFork": repo["fork"],
                    "Commits": repo["commits_url"],
                    "Branches": repo["branches_url"],
                    "Releases": repo["releases_url"],
                    "Forks": repo["forks_count"],
                    "MainLanguage": repo["language"],
                    "DefaultBranch": repo["default_branch"],
                    "License": repo["license"]["name"] if repo["license"] else None,
                    "Homepage": repo["homepage"],
                    "Watchers": repo["watchers_count"],
                    "Stargazers": repo["stargazers_count"],
                    "Contributors": len(repo["contributors_url"]) if "contributors_url" in repo else 0,
                    "Size": repo["size"],
                    "CreatedAt": repo["created_at"],
                    "PushedAt": repo["pushed_at"],
                    "UpdatedAt": repo["updated_at"],
                    "TotalIssues": repo["open_issues_count"],
                    "OpenIssues": repo["open_issues_count"],
                    "TotalPullRequests": repo["pulls_url"],
                    "OpenPullRequests": repo["pulls_url"],
                    "HasWiki": repo["has_wiki"],
                    "IsArchived": repo["archived"],
                    "IsDisabled": repo["disabled"],
                    "Topics": repo["topics"] if "topics" in repo else None
                })

all_repositories = []

search_query = "language:java topic:web"
repositories = fetch_repository_information(search_query)
all_repositories.extend(repositories)

search_query = "language:java topic:android"
repositories = fetch_repository_information(search_query)
all_repositories.extend(repositories)

search_query = "language:java topic:desktop"
repositories = fetch_repository_information(search_query)
all_repositories.extend(repositories)


{'next': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Aweb&per_page=100&sort=stars&order=desc&page=2', 'rel': 'next'}, 'last': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Aweb&per_page=100&sort=stars&order=desc&page=10', 'rel': 'last'}}
{'prev': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Aweb&per_page=100&sort=stars&order=desc&page=1', 'rel': 'prev'}, 'next': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Aweb&per_page=100&sort=stars&order=desc&page=3', 'rel': 'next'}, 'last': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Aweb&per_page=100&sort=stars&order=desc&page=10', 'rel': 'last'}, 'first': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Aweb&per_page=100&sort=stars&order=desc&page=1', 'rel': 'first'}}
{'prev': {'url': 'https://api.github.com/search/repositories?q=language%3Ajava+topic%3Aweb&per

TypeError: 'NoneType' object is not iterable

In [65]:
all_repositories

[[{'id': 4710920,
   'node_id': 'MDEwOlJlcG9zaXRvcnk0NzEwOTIw',
   'name': 'dubbo',
   'full_name': 'apache/dubbo',
   'private': False,
   'owner': {'login': 'apache',
    'id': 47359,
    'node_id': 'MDEyOk9yZ2FuaXphdGlvbjQ3MzU5',
    'avatar_url': 'https://avatars.githubusercontent.com/u/47359?v=4',
    'gravatar_id': '',
    'url': 'https://api.github.com/users/apache',
    'html_url': 'https://github.com/apache',
    'followers_url': 'https://api.github.com/users/apache/followers',
    'following_url': 'https://api.github.com/users/apache/following{/other_user}',
    'gists_url': 'https://api.github.com/users/apache/gists{/gist_id}',
    'starred_url': 'https://api.github.com/users/apache/starred{/owner}{/repo}',
    'subscriptions_url': 'https://api.github.com/users/apache/subscriptions',
    'organizations_url': 'https://api.github.com/users/apache/orgs',
    'repos_url': 'https://api.github.com/users/apache/repos',
    'events_url': 'https://api.github.com/users/apache/events{/

In [66]:


# Writing repository information to a CSV file
csv_filename = "data/java-repos.csv"
write_to_csv(all_repositories, csv_filename)

print(f"Repository information has been written to '{csv_filename}'.")

Repository information has been written to 'data/java-repos.csv'.


## Filter Lists to get similar average star count for both lists

In [1]:
import pandas as pd
sim_df = pd.read_csv("data/java-sims.csv")
sim_df = sim_df[sim_df['Stargazers'] > 0]
sim_df = sim_df[sim_df['TotalIssues'] > 0]
sim_df = sim_df[sim_df['IsArchived'] == False]
sim_df = sim_df[sim_df['IsDisabled'] == False]
sim_df = sim_df[sim_df['IsFork'] == False]
sim_df = sim_df.dropna(subset=['License'])
sim_df = sim_df[sim_df['License'] != "Other"]
sim_df['UpdatedAt'] = pd.to_datetime(sim_df['UpdatedAt'])
sim_df = sim_df[sim_df['UpdatedAt'].dt.year >= 2023]
sim_df

Unnamed: 0,ID,Name,URL,IsFork,Commits,Branches,Releases,Forks,MainLanguage,DefaultBranch,...,PushedAt,UpdatedAt,TotalIssues,OpenIssues,TotalPullRequests,OpenPullRequests,HasWiki,IsArchived,IsDisabled,Topics
0,61399845,synthea,https://github.com/synthetichealth/synthea,False,https://api.github.com/repos/synthetichealth/s...,https://api.github.com/repos/synthetichealth/s...,https://api.github.com/repos/synthetichealth/s...,605,Java,master,...,2024-05-17T19:42:30Z,2024-05-17 15:03:04+00:00,179,179,https://api.github.com/repos/synthetichealth/s...,https://api.github.com/repos/synthetichealth/s...,True,False,False,"['fhir', 'health-data', 'simulation', 'synthea..."
2,8832601,finmath-lib,https://github.com/finmath/finmath-lib,False,https://api.github.com/repos/finmath/finmath-l...,https://api.github.com/repos/finmath/finmath-l...,https://api.github.com/repos/finmath/finmath-l...,166,Java,master,...,2023-11-27T22:45:58Z,2024-05-12 14:41:33+00:00,12,12,https://api.github.com/repos/finmath/finmath-l...,https://api.github.com/repos/finmath/finmath-l...,True,False,False,"['finmath-lib', 'java', 'mathematical-modellin..."
5,32472801,cloudsimplus,https://github.com/cloudsimplus/cloudsimplus,False,https://api.github.com/repos/cloudsimplus/clou...,https://api.github.com/repos/cloudsimplus/clou...,https://api.github.com/repos/cloudsimplus/clou...,192,Java,master,...,2024-05-17T16:34:18Z,2024-05-18 19:00:24+00:00,17,17,https://api.github.com/repos/cloudsimplus/clou...,https://api.github.com/repos/cloudsimplus/clou...,False,False,False,"['auto-scaling', 'cloud-computing', 'cloud-inf..."
7,56945187,Evolving-Protozoa,https://github.com/DylanCope/Evolving-Protozoa,False,https://api.github.com/repos/DylanCope/Evolvin...,https://api.github.com/repos/DylanCope/Evolvin...,https://api.github.com/repos/DylanCope/Evolvin...,21,Java,master,...,2023-06-14T22:52:20Z,2024-05-15 18:43:39+00:00,12,12,https://api.github.com/repos/DylanCope/Evolvin...,https://api.github.com/repos/DylanCope/Evolvin...,True,False,False,"['evolutionary-algorithms', 'simulation']"
8,163447483,PureEdgeSim,https://github.com/CharafeddineMechalikh/PureE...,False,https://api.github.com/repos/CharafeddineMecha...,https://api.github.com/repos/CharafeddineMecha...,https://api.github.com/repos/CharafeddineMecha...,73,Java,master,...,2024-04-04T07:52:44Z,2024-05-13 13:34:04+00:00,7,7,https://api.github.com/repos/CharafeddineMecha...,https://api.github.com/repos/CharafeddineMecha...,True,False,False,"['algorithm', 'cloud-computing', 'cloudsim', '..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220,259992626,tumor-growth-simulation,https://github.com/Jeffresh/tumor-growth-simul...,False,https://api.github.com/repos/Jeffresh/tumor-gr...,https://api.github.com/repos/Jeffresh/tumor-gr...,https://api.github.com/repos/Jeffresh/tumor-gr...,0,Java,master,...,2020-05-15T09:42:08Z,2024-03-07 19:52:52+00:00,1,1,https://api.github.com/repos/Jeffresh/tumor-gr...,https://api.github.com/repos/Jeffresh/tumor-gr...,True,False,False,"['cellular-automata', 'gui', 'java', 'java-swi..."
228,736035086,BoomChess-Android,https://github.com/mklemmingen/BoomChess-Android,False,https://api.github.com/repos/mklemmingen/BoomC...,https://api.github.com/repos/mklemmingen/BoomC...,https://api.github.com/repos/mklemmingen/BoomC...,0,Java,main,...,2024-02-09T00:24:38Z,2024-01-22 19:05:09+00:00,1,1,https://api.github.com/repos/mklemmingen/BoomC...,https://api.github.com/repos/mklemmingen/BoomC...,True,False,False,"['android', 'bot', 'chess', 'game', 'libgdx', ..."
406,96018333,pedroEngine,https://github.com/pedroth/pedroEngine,False,https://api.github.com/repos/pedroth/pedroEngi...,https://api.github.com/repos/pedroth/pedroEngi...,https://api.github.com/repos/pedroth/pedroEngi...,0,Java,master,...,2023-12-08T18:33:11Z,2023-11-21 22:36:17+00:00,1,1,https://api.github.com/repos/pedroth/pedroEngi...,https://api.github.com/repos/pedroth/pedroEngi...,True,False,False,"['algorithms', 'graph', 'simulation', 'spectra..."
422,223274113,supermarket-simulator,https://github.com/davidarny/supermarket-simul...,False,https://api.github.com/repos/davidarny/superma...,https://api.github.com/repos/davidarny/superma...,https://api.github.com/repos/davidarny/superma...,0,Java,master,...,2023-03-02T19:56:39Z,2023-03-08 20:46:20+00:00,1,1,https://api.github.com/repos/davidarny/superma...,https://api.github.com/repos/davidarny/superma...,True,False,False,['simulation']


In [2]:
import pandas as pd
df = pd.read_csv("data/java-repos.csv")
df = df[df['Stargazers'] > 0]
df = df[df['TotalIssues'] > 0]
df = df[df['IsArchived'] == False]
df = df[df['IsDisabled'] == False]
df = df[df['IsFork'] == False]
df = df.dropna(subset=['License'])
df = df[df['License'] != "Other"]
df['UpdatedAt'] = pd.to_datetime(df['UpdatedAt'])
df = df[df['UpdatedAt'].dt.year >= 2023]
df

Unnamed: 0,ID,Name,URL,IsFork,Commits,Branches,Releases,Forks,MainLanguage,DefaultBranch,...,PushedAt,UpdatedAt,TotalIssues,OpenIssues,TotalPullRequests,OpenPullRequests,HasWiki,IsArchived,IsDisabled,Topics
0,4710920,dubbo,https://github.com/apache/dubbo,False,https://api.github.com/repos/apache/dubbo/comm...,https://api.github.com/repos/apache/dubbo/bran...,https://api.github.com/repos/apache/dubbo/rele...,26278,Java,3.2,...,2024-05-19T09:37:22Z,2024-05-19 12:34:31+00:00,822,822,https://api.github.com/repos/apache/dubbo/pull...,https://api.github.com/repos/apache/dubbo/pull...,True,False,False,"['distributed-systems', 'dubbo', 'framework', ..."
1,110211147,spring-boot-demo,https://github.com/xkcoding/spring-boot-demo,False,https://api.github.com/repos/xkcoding/spring-b...,https://api.github.com/repos/xkcoding/spring-b...,https://api.github.com/repos/xkcoding/spring-b...,10680,Java,master,...,2024-04-08T06:40:37Z,2024-05-19 14:27:48+00:00,129,129,https://api.github.com/repos/xkcoding/spring-b...,https://api.github.com/repos/xkcoding/spring-b...,True,False,False,"['demo', 'in-action', 'java', 'spring', 'sprin..."
2,190964861,tech-interview-for-developer,https://github.com/gyoogle/tech-interview-for-...,False,https://api.github.com/repos/gyoogle/tech-inte...,https://api.github.com/repos/gyoogle/tech-inte...,https://api.github.com/repos/gyoogle/tech-inte...,3259,Java,master,...,2024-05-05T08:27:58Z,2024-05-19 14:05:16+00:00,15,15,https://api.github.com/repos/gyoogle/tech-inte...,https://api.github.com/repos/gyoogle/tech-inte...,True,False,False,"['algorithm', 'computer-science', 'cs', 'data-..."
4,691911176,spring-reading,https://github.com/xuchengsheng/spring-reading,False,https://api.github.com/repos/xuchengsheng/spri...,https://api.github.com/repos/xuchengsheng/spri...,https://api.github.com/repos/xuchengsheng/spri...,675,Java,master,...,2024-05-17T03:30:03Z,2024-05-19 13:54:36+00:00,26,26,https://api.github.com/repos/xuchengsheng/spri...,https://api.github.com/repos/xuchengsheng/spri...,True,False,False,"['annotations', 'awareinterface', 'backenddeve..."
5,32761091,FXGL,https://github.com/AlmasB/FXGL,False,https://api.github.com/repos/AlmasB/FXGL/commi...,https://api.github.com/repos/AlmasB/FXGL/branc...,https://api.github.com/repos/AlmasB/FXGL/relea...,537,Java,dev,...,2024-03-26T20:51:34Z,2024-05-18 20:51:49+00:00,131,131,https://api.github.com/repos/AlmasB/FXGL/pulls...,https://api.github.com/repos/AlmasB/FXGL/pulls...,True,False,False,"['2d', '2d-framework', '2d-game-engine', '2d-g..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
483,55153134,Jerrydog,https://github.com/sylvainhalle/Jerrydog,False,https://api.github.com/repos/sylvainhalle/Jerr...,https://api.github.com/repos/sylvainhalle/Jerr...,https://api.github.com/repos/sylvainhalle/Jerr...,2,Java,master,...,2023-06-18T18:51:05Z,2023-07-24 13:38:44+00:00,1,1,https://api.github.com/repos/sylvainhalle/Jerr...,https://api.github.com/repos/sylvainhalle/Jerr...,True,False,False,"['httpd', 'lightweight', 'server', 'tomcat', '..."
493,460858099,log_generator,https://github.com/donnemse/log_generator,False,https://api.github.com/repos/donnemse/log_gene...,https://api.github.com/repos/donnemse/log_gene...,https://api.github.com/repos/donnemse/log_gene...,0,Java,main,...,2023-09-15T06:17:18Z,2024-02-09 08:53:16+00:00,3,3,https://api.github.com/repos/donnemse/log_gene...,https://api.github.com/repos/donnemse/log_gene...,True,False,False,"['fw', 'generator', 'log', 'security', 'waf', ..."
520,446182315,webrest-starter,https://github.com/ratulSharker/webrest-starter,False,https://api.github.com/repos/ratulSharker/webr...,https://api.github.com/repos/ratulSharker/webr...,https://api.github.com/repos/ratulSharker/webr...,0,Java,staging,...,2024-05-07T08:21:33Z,2023-04-06 08:07:57+00:00,11,11,https://api.github.com/repos/ratulSharker/webr...,https://api.github.com/repos/ratulSharker/webr...,True,False,False,"['bootstrap', 'rest', 'rest-api', 'spring', 's..."
594,74473888,webinloop,https://github.com/sprylab/webinloop,False,https://api.github.com/repos/sprylab/webinloop...,https://api.github.com/repos/sprylab/webinloop...,https://api.github.com/repos/sprylab/webinloop...,0,Java,master,...,2021-08-02T16:59:19Z,2023-02-14 02:54:35+00:00,3,3,https://api.github.com/repos/sprylab/webinloop...,https://api.github.com/repos/sprylab/webinloop...,True,False,False,"['ant-task', 'cli', 'integration-testing', 'ma..."


In [3]:
merged = df.merge(sim_df, how='left', indicator=True)
df = merged[merged['_merge'] == 'left_only'].drop(columns='_merge')
df

Unnamed: 0,ID,Name,URL,IsFork,Commits,Branches,Releases,Forks,MainLanguage,DefaultBranch,...,PushedAt,UpdatedAt,TotalIssues,OpenIssues,TotalPullRequests,OpenPullRequests,HasWiki,IsArchived,IsDisabled,Topics
0,4710920,dubbo,https://github.com/apache/dubbo,False,https://api.github.com/repos/apache/dubbo/comm...,https://api.github.com/repos/apache/dubbo/bran...,https://api.github.com/repos/apache/dubbo/rele...,26278,Java,3.2,...,2024-05-19T09:37:22Z,2024-05-19 12:34:31+00:00,822,822,https://api.github.com/repos/apache/dubbo/pull...,https://api.github.com/repos/apache/dubbo/pull...,True,False,False,"['distributed-systems', 'dubbo', 'framework', ..."
1,110211147,spring-boot-demo,https://github.com/xkcoding/spring-boot-demo,False,https://api.github.com/repos/xkcoding/spring-b...,https://api.github.com/repos/xkcoding/spring-b...,https://api.github.com/repos/xkcoding/spring-b...,10680,Java,master,...,2024-04-08T06:40:37Z,2024-05-19 14:27:48+00:00,129,129,https://api.github.com/repos/xkcoding/spring-b...,https://api.github.com/repos/xkcoding/spring-b...,True,False,False,"['demo', 'in-action', 'java', 'spring', 'sprin..."
2,190964861,tech-interview-for-developer,https://github.com/gyoogle/tech-interview-for-...,False,https://api.github.com/repos/gyoogle/tech-inte...,https://api.github.com/repos/gyoogle/tech-inte...,https://api.github.com/repos/gyoogle/tech-inte...,3259,Java,master,...,2024-05-05T08:27:58Z,2024-05-19 14:05:16+00:00,15,15,https://api.github.com/repos/gyoogle/tech-inte...,https://api.github.com/repos/gyoogle/tech-inte...,True,False,False,"['algorithm', 'computer-science', 'cs', 'data-..."
3,691911176,spring-reading,https://github.com/xuchengsheng/spring-reading,False,https://api.github.com/repos/xuchengsheng/spri...,https://api.github.com/repos/xuchengsheng/spri...,https://api.github.com/repos/xuchengsheng/spri...,675,Java,master,...,2024-05-17T03:30:03Z,2024-05-19 13:54:36+00:00,26,26,https://api.github.com/repos/xuchengsheng/spri...,https://api.github.com/repos/xuchengsheng/spri...,True,False,False,"['annotations', 'awareinterface', 'backenddeve..."
4,32761091,FXGL,https://github.com/AlmasB/FXGL,False,https://api.github.com/repos/AlmasB/FXGL/commi...,https://api.github.com/repos/AlmasB/FXGL/branc...,https://api.github.com/repos/AlmasB/FXGL/relea...,537,Java,dev,...,2024-03-26T20:51:34Z,2024-05-18 20:51:49+00:00,131,131,https://api.github.com/repos/AlmasB/FXGL/pulls...,https://api.github.com/repos/AlmasB/FXGL/pulls...,True,False,False,"['2d', '2d-framework', '2d-game-engine', '2d-g..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,55153134,Jerrydog,https://github.com/sylvainhalle/Jerrydog,False,https://api.github.com/repos/sylvainhalle/Jerr...,https://api.github.com/repos/sylvainhalle/Jerr...,https://api.github.com/repos/sylvainhalle/Jerr...,2,Java,master,...,2023-06-18T18:51:05Z,2023-07-24 13:38:44+00:00,1,1,https://api.github.com/repos/sylvainhalle/Jerr...,https://api.github.com/repos/sylvainhalle/Jerr...,True,False,False,"['httpd', 'lightweight', 'server', 'tomcat', '..."
119,460858099,log_generator,https://github.com/donnemse/log_generator,False,https://api.github.com/repos/donnemse/log_gene...,https://api.github.com/repos/donnemse/log_gene...,https://api.github.com/repos/donnemse/log_gene...,0,Java,main,...,2023-09-15T06:17:18Z,2024-02-09 08:53:16+00:00,3,3,https://api.github.com/repos/donnemse/log_gene...,https://api.github.com/repos/donnemse/log_gene...,True,False,False,"['fw', 'generator', 'log', 'security', 'waf', ..."
120,446182315,webrest-starter,https://github.com/ratulSharker/webrest-starter,False,https://api.github.com/repos/ratulSharker/webr...,https://api.github.com/repos/ratulSharker/webr...,https://api.github.com/repos/ratulSharker/webr...,0,Java,staging,...,2024-05-07T08:21:33Z,2023-04-06 08:07:57+00:00,11,11,https://api.github.com/repos/ratulSharker/webr...,https://api.github.com/repos/ratulSharker/webr...,True,False,False,"['bootstrap', 'rest', 'rest-api', 'spring', 's..."
121,74473888,webinloop,https://github.com/sprylab/webinloop,False,https://api.github.com/repos/sprylab/webinloop...,https://api.github.com/repos/sprylab/webinloop...,https://api.github.com/repos/sprylab/webinloop...,0,Java,master,...,2021-08-02T16:59:19Z,2023-02-14 02:54:35+00:00,3,3,https://api.github.com/repos/sprylab/webinloop...,https://api.github.com/repos/sprylab/webinloop...,True,False,False,"['ant-task', 'cli', 'integration-testing', 'ma..."


In [4]:
sim_star_mean = sim_df['Stargazers'].mean()
sim_con_mean = sim_df['Contributors'].mean()
sim_issue_mean = sim_df['TotalIssues'].mean()

trad_star_mean = df['Stargazers'].mean()
trad_con_mean = df['Contributors'].mean()
trad_issue_mean = df['TotalIssues'].mean()

trad_star_min = df['Stargazers'].min()
trad_con_min = df['Contributors'].min()
trad_issue_min = df['TotalIssues'].min()


print(sim_star_mean, sim_con_mean, sim_issue_mean)
print(trad_star_mean, trad_con_mean, trad_issue_mean)

73.23880597014926 65.94029850746269 18.432835820895523
959.9186991869918 63.99186991869919 40.51219512195122


In [6]:
# df = df[df['TotalIssues'] >= 10]
# df

In [7]:
sim_star_mean = sim_df['Stargazers'].mean()
sim_con_mean = sim_df['Contributors'].mean()
sim_issue_mean = sim_df['TotalIssues'].mean()

trad_star_mean = df['Stargazers'].mean()
trad_con_mean = df['Contributors'].mean()
trad_issue_mean = df['TotalIssues'].mean()

trad_star_min = df['Stargazers'].min()
trad_con_min = df['Contributors'].min()
trad_issue_min = df['TotalIssues'].min()

print(sim_star_mean, sim_con_mean, sim_issue_mean)
print(trad_star_mean, trad_con_mean, trad_issue_mean)

73.23880597014926 65.94029850746269 18.432835820895523
959.9186991869918 63.99186991869919 40.51219512195122


In [8]:
import math 

while sim_star_mean < 2 * math.floor(trad_star_min):
    max_index = sim_df['Stargazers'].idxmin()
    sim_df = sim_df.drop(max_index)

    sim_star_mean = sim_df['Stargazers'].mean()
    print("Simulation mean stars", sim_star_mean, "Traditional minimum stars", trad_star_min)
sim_df

Unnamed: 0,ID,Name,URL,IsFork,Commits,Branches,Releases,Forks,MainLanguage,DefaultBranch,...,PushedAt,UpdatedAt,TotalIssues,OpenIssues,TotalPullRequests,OpenPullRequests,HasWiki,IsArchived,IsDisabled,Topics
0,61399845,synthea,https://github.com/synthetichealth/synthea,False,https://api.github.com/repos/synthetichealth/s...,https://api.github.com/repos/synthetichealth/s...,https://api.github.com/repos/synthetichealth/s...,605,Java,master,...,2024-05-17T19:42:30Z,2024-05-17 15:03:04+00:00,179,179,https://api.github.com/repos/synthetichealth/s...,https://api.github.com/repos/synthetichealth/s...,True,False,False,"['fhir', 'health-data', 'simulation', 'synthea..."
2,8832601,finmath-lib,https://github.com/finmath/finmath-lib,False,https://api.github.com/repos/finmath/finmath-l...,https://api.github.com/repos/finmath/finmath-l...,https://api.github.com/repos/finmath/finmath-l...,166,Java,master,...,2023-11-27T22:45:58Z,2024-05-12 14:41:33+00:00,12,12,https://api.github.com/repos/finmath/finmath-l...,https://api.github.com/repos/finmath/finmath-l...,True,False,False,"['finmath-lib', 'java', 'mathematical-modellin..."
5,32472801,cloudsimplus,https://github.com/cloudsimplus/cloudsimplus,False,https://api.github.com/repos/cloudsimplus/clou...,https://api.github.com/repos/cloudsimplus/clou...,https://api.github.com/repos/cloudsimplus/clou...,192,Java,master,...,2024-05-17T16:34:18Z,2024-05-18 19:00:24+00:00,17,17,https://api.github.com/repos/cloudsimplus/clou...,https://api.github.com/repos/cloudsimplus/clou...,False,False,False,"['auto-scaling', 'cloud-computing', 'cloud-inf..."
7,56945187,Evolving-Protozoa,https://github.com/DylanCope/Evolving-Protozoa,False,https://api.github.com/repos/DylanCope/Evolvin...,https://api.github.com/repos/DylanCope/Evolvin...,https://api.github.com/repos/DylanCope/Evolvin...,21,Java,master,...,2023-06-14T22:52:20Z,2024-05-15 18:43:39+00:00,12,12,https://api.github.com/repos/DylanCope/Evolvin...,https://api.github.com/repos/DylanCope/Evolvin...,True,False,False,"['evolutionary-algorithms', 'simulation']"
8,163447483,PureEdgeSim,https://github.com/CharafeddineMechalikh/PureE...,False,https://api.github.com/repos/CharafeddineMecha...,https://api.github.com/repos/CharafeddineMecha...,https://api.github.com/repos/CharafeddineMecha...,73,Java,master,...,2024-04-04T07:52:44Z,2024-05-13 13:34:04+00:00,7,7,https://api.github.com/repos/CharafeddineMecha...,https://api.github.com/repos/CharafeddineMecha...,True,False,False,"['algorithm', 'cloud-computing', 'cloudsim', '..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220,259992626,tumor-growth-simulation,https://github.com/Jeffresh/tumor-growth-simul...,False,https://api.github.com/repos/Jeffresh/tumor-gr...,https://api.github.com/repos/Jeffresh/tumor-gr...,https://api.github.com/repos/Jeffresh/tumor-gr...,0,Java,master,...,2020-05-15T09:42:08Z,2024-03-07 19:52:52+00:00,1,1,https://api.github.com/repos/Jeffresh/tumor-gr...,https://api.github.com/repos/Jeffresh/tumor-gr...,True,False,False,"['cellular-automata', 'gui', 'java', 'java-swi..."
228,736035086,BoomChess-Android,https://github.com/mklemmingen/BoomChess-Android,False,https://api.github.com/repos/mklemmingen/BoomC...,https://api.github.com/repos/mklemmingen/BoomC...,https://api.github.com/repos/mklemmingen/BoomC...,0,Java,main,...,2024-02-09T00:24:38Z,2024-01-22 19:05:09+00:00,1,1,https://api.github.com/repos/mklemmingen/BoomC...,https://api.github.com/repos/mklemmingen/BoomC...,True,False,False,"['android', 'bot', 'chess', 'game', 'libgdx', ..."
406,96018333,pedroEngine,https://github.com/pedroth/pedroEngine,False,https://api.github.com/repos/pedroth/pedroEngi...,https://api.github.com/repos/pedroth/pedroEngi...,https://api.github.com/repos/pedroth/pedroEngi...,0,Java,master,...,2023-12-08T18:33:11Z,2023-11-21 22:36:17+00:00,1,1,https://api.github.com/repos/pedroth/pedroEngi...,https://api.github.com/repos/pedroth/pedroEngi...,True,False,False,"['algorithms', 'graph', 'simulation', 'spectra..."
422,223274113,supermarket-simulator,https://github.com/davidarny/supermarket-simul...,False,https://api.github.com/repos/davidarny/superma...,https://api.github.com/repos/davidarny/superma...,https://api.github.com/repos/davidarny/superma...,0,Java,master,...,2023-03-02T19:56:39Z,2023-03-08 20:46:20+00:00,1,1,https://api.github.com/repos/davidarny/superma...,https://api.github.com/repos/davidarny/superma...,True,False,False,['simulation']


In [9]:
while sim_star_mean <  math.floor(trad_star_mean//2):
    max_index = df['Stargazers'].idxmax()
    df = df.drop(max_index)

    trad_star_mean = df['Stargazers'].mean()
    print(sim_star_mean, trad_star_mean)

df

73.23880597014926 639.0655737704918
73.23880597014926 378.3884297520661
73.23880597014926 268.15
73.23880597014926 224.60504201680672
73.23880597014926 191.0
73.23880597014926 168.6153846153846
73.23880597014926 149.56896551724137
73.23880597014926 136.61739130434782


Unnamed: 0,ID,Name,URL,IsFork,Commits,Branches,Releases,Forks,MainLanguage,DefaultBranch,...,PushedAt,UpdatedAt,TotalIssues,OpenIssues,TotalPullRequests,OpenPullRequests,HasWiki,IsArchived,IsDisabled,Topics
8,28589035,Resty,https://github.com/Dreampie/Resty,False,https://api.github.com/repos/Dreampie/Resty/co...,https://api.github.com/repos/Dreampie/Resty/br...,https://api.github.com/repos/Dreampie/Resty/re...,440,Java,master,...,2021-11-05T02:19:42Z,2024-04-15 05:45:59+00:00,5,5,https://api.github.com/repos/Dreampie/Resty/pu...,https://api.github.com/repos/Dreampie/Resty/pu...,True,False,False,"['activerecord', 'httpclient', 'java', 'restfu..."
9,26628954,vertx-web,https://github.com/vert-x3/vertx-web,False,https://api.github.com/repos/vert-x3/vertx-web...,https://api.github.com/repos/vert-x3/vertx-web...,https://api.github.com/repos/vert-x3/vertx-web...,520,Java,master,...,2024-05-19T01:02:38Z,2024-05-19 04:20:30+00:00,141,141,https://api.github.com/repos/vert-x3/vertx-web...,https://api.github.com/repos/vert-x3/vertx-web...,False,False,False,"['client', 'reactive', 'server', 'sockjs', 've..."
10,40998919,problem-spring-web,https://github.com/zalando/problem-spring-web,False,https://api.github.com/repos/zalando/problem-s...,https://api.github.com/repos/zalando/problem-s...,https://api.github.com/repos/zalando/problem-s...,126,Java,main,...,2024-04-22T04:51:46Z,2024-05-06 11:35:27+00:00,45,45,https://api.github.com/repos/zalando/problem-s...,https://api.github.com/repos/zalando/problem-s...,False,False,False,"['error', 'exception', 'java', 'json', 'micros..."
11,46464011,hasor,https://github.com/ClouGence/hasor,False,https://api.github.com/repos/ClouGence/hasor/c...,https://api.github.com/repos/ClouGence/hasor/b...,https://api.github.com/repos/ClouGence/hasor/r...,273,Java,master,...,2022-12-14T20:50:39Z,2024-05-11 08:18:40+00:00,40,40,https://api.github.com/repos/ClouGence/hasor/p...,https://api.github.com/repos/ClouGence/hasor/p...,False,False,False,"['aop', 'dataql', 'db', 'hasor', 'ioc', 'java'..."
12,267619299,activej,https://github.com/activej/activej,False,https://api.github.com/repos/activej/activej/c...,https://api.github.com/repos/activej/activej/b...,https://api.github.com/repos/activej/activej/r...,72,Java,master,...,2024-05-11T19:46:21Z,2024-05-19 08:05:55+00:00,31,31,https://api.github.com/repos/activej/activej/p...,https://api.github.com/repos/activej/activej/p...,False,False,False,"['async', 'code-generation', 'dependency-injec..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,55153134,Jerrydog,https://github.com/sylvainhalle/Jerrydog,False,https://api.github.com/repos/sylvainhalle/Jerr...,https://api.github.com/repos/sylvainhalle/Jerr...,https://api.github.com/repos/sylvainhalle/Jerr...,2,Java,master,...,2023-06-18T18:51:05Z,2023-07-24 13:38:44+00:00,1,1,https://api.github.com/repos/sylvainhalle/Jerr...,https://api.github.com/repos/sylvainhalle/Jerr...,True,False,False,"['httpd', 'lightweight', 'server', 'tomcat', '..."
119,460858099,log_generator,https://github.com/donnemse/log_generator,False,https://api.github.com/repos/donnemse/log_gene...,https://api.github.com/repos/donnemse/log_gene...,https://api.github.com/repos/donnemse/log_gene...,0,Java,main,...,2023-09-15T06:17:18Z,2024-02-09 08:53:16+00:00,3,3,https://api.github.com/repos/donnemse/log_gene...,https://api.github.com/repos/donnemse/log_gene...,True,False,False,"['fw', 'generator', 'log', 'security', 'waf', ..."
120,446182315,webrest-starter,https://github.com/ratulSharker/webrest-starter,False,https://api.github.com/repos/ratulSharker/webr...,https://api.github.com/repos/ratulSharker/webr...,https://api.github.com/repos/ratulSharker/webr...,0,Java,staging,...,2024-05-07T08:21:33Z,2023-04-06 08:07:57+00:00,11,11,https://api.github.com/repos/ratulSharker/webr...,https://api.github.com/repos/ratulSharker/webr...,True,False,False,"['bootstrap', 'rest', 'rest-api', 'spring', 's..."
121,74473888,webinloop,https://github.com/sprylab/webinloop,False,https://api.github.com/repos/sprylab/webinloop...,https://api.github.com/repos/sprylab/webinloop...,https://api.github.com/repos/sprylab/webinloop...,0,Java,master,...,2021-08-02T16:59:19Z,2023-02-14 02:54:35+00:00,3,3,https://api.github.com/repos/sprylab/webinloop...,https://api.github.com/repos/sprylab/webinloop...,True,False,False,"['ant-task', 'cli', 'integration-testing', 'ma..."


In [10]:
sim_star_mean = sim_df['Stargazers'].mean()
sim_con_mean = sim_df['Contributors'].mean()
sim_issue_mean = sim_df['TotalIssues'].mean()

trad_star_mean = df['Stargazers'].mean()
trad_con_mean = df['Contributors'].mean()
trad_issue_mean = df['TotalIssues'].mean()

trad_star_min = df['Stargazers'].min()
trad_con_min = df['Contributors'].min()
trad_issue_min = df['TotalIssues'].min()


print(sim_star_mean, sim_con_mean, sim_issue_mean)
print(trad_star_mean, trad_con_mean, trad_issue_mean)

73.23880597014926 65.94029850746269 18.432835820895523
136.61739130434782 63.86086956521739 28.356521739130436


In [11]:
import math

sim_df = sim_df[sim_df['TotalIssues'] >= 10]
sim_df

Unnamed: 0,ID,Name,URL,IsFork,Commits,Branches,Releases,Forks,MainLanguage,DefaultBranch,...,PushedAt,UpdatedAt,TotalIssues,OpenIssues,TotalPullRequests,OpenPullRequests,HasWiki,IsArchived,IsDisabled,Topics
0,61399845,synthea,https://github.com/synthetichealth/synthea,False,https://api.github.com/repos/synthetichealth/s...,https://api.github.com/repos/synthetichealth/s...,https://api.github.com/repos/synthetichealth/s...,605,Java,master,...,2024-05-17T19:42:30Z,2024-05-17 15:03:04+00:00,179,179,https://api.github.com/repos/synthetichealth/s...,https://api.github.com/repos/synthetichealth/s...,True,False,False,"['fhir', 'health-data', 'simulation', 'synthea..."
2,8832601,finmath-lib,https://github.com/finmath/finmath-lib,False,https://api.github.com/repos/finmath/finmath-l...,https://api.github.com/repos/finmath/finmath-l...,https://api.github.com/repos/finmath/finmath-l...,166,Java,master,...,2023-11-27T22:45:58Z,2024-05-12 14:41:33+00:00,12,12,https://api.github.com/repos/finmath/finmath-l...,https://api.github.com/repos/finmath/finmath-l...,True,False,False,"['finmath-lib', 'java', 'mathematical-modellin..."
5,32472801,cloudsimplus,https://github.com/cloudsimplus/cloudsimplus,False,https://api.github.com/repos/cloudsimplus/clou...,https://api.github.com/repos/cloudsimplus/clou...,https://api.github.com/repos/cloudsimplus/clou...,192,Java,master,...,2024-05-17T16:34:18Z,2024-05-18 19:00:24+00:00,17,17,https://api.github.com/repos/cloudsimplus/clou...,https://api.github.com/repos/cloudsimplus/clou...,False,False,False,"['auto-scaling', 'cloud-computing', 'cloud-inf..."
7,56945187,Evolving-Protozoa,https://github.com/DylanCope/Evolving-Protozoa,False,https://api.github.com/repos/DylanCope/Evolvin...,https://api.github.com/repos/DylanCope/Evolvin...,https://api.github.com/repos/DylanCope/Evolvin...,21,Java,master,...,2023-06-14T22:52:20Z,2024-05-15 18:43:39+00:00,12,12,https://api.github.com/repos/DylanCope/Evolvin...,https://api.github.com/repos/DylanCope/Evolvin...,True,False,False,"['evolutionary-algorithms', 'simulation']"
9,41836729,Robot-Overlord-App,https://github.com/MarginallyClever/Robot-Over...,False,https://api.github.com/repos/MarginallyClever/...,https://api.github.com/repos/MarginallyClever/...,https://api.github.com/repos/MarginallyClever/...,48,Java,master,...,2024-05-16T01:32:02Z,2024-04-22 03:13:43+00:00,12,12,https://api.github.com/repos/MarginallyClever/...,https://api.github.com/repos/MarginallyClever/...,True,False,False,"['control', 'java', 'robot', 'robotics', 'simu..."
12,41550991,mars-sim,https://github.com/mars-sim/mars-sim,False,https://api.github.com/repos/mars-sim/mars-sim...,https://api.github.com/repos/mars-sim/mars-sim...,https://api.github.com/repos/mars-sim/mars-sim...,34,Java,master,...,2024-05-19T06:30:24Z,2024-05-19 06:30:27+00:00,164,164,https://api.github.com/repos/mars-sim/mars-sim...,https://api.github.com/repos/mars-sim/mars-sim...,True,False,False,"['agent-based-modeling', 'agent-based-simulati..."
17,19718418,varsim,https://github.com/bioinform/varsim,False,https://api.github.com/repos/bioinform/varsim/...,https://api.github.com/repos/bioinform/varsim/...,https://api.github.com/repos/bioinform/varsim/...,31,Java,master,...,2023-04-07T01:32:48Z,2024-03-31 14:14:50+00:00,41,41,https://api.github.com/repos/bioinform/varsim/...,https://api.github.com/repos/bioinform/varsim/...,False,False,False,"['genomics', 'high-throughput-sequencing', 'si..."
23,431279043,aerie,https://github.com/NASA-AMMOS/aerie,False,https://api.github.com/repos/NASA-AMMOS/aerie/...,https://api.github.com/repos/NASA-AMMOS/aerie/...,https://api.github.com/repos/NASA-AMMOS/aerie/...,17,Java,develop,...,2024-05-17T21:57:12Z,2024-05-17 21:04:46+00:00,174,174,https://api.github.com/repos/NASA-AMMOS/aerie/...,https://api.github.com/repos/NASA-AMMOS/aerie/...,False,False,False,"['aerospace', 'discrete-event-simulation', 'ja..."
28,51246530,workcraft,https://github.com/workcraft/workcraft,False,https://api.github.com/repos/workcraft/workcra...,https://api.github.com/repos/workcraft/workcra...,https://api.github.com/repos/workcraft/workcra...,142,Java,master,...,2024-05-17T22:10:41Z,2024-05-17 11:01:26+00:00,60,60,https://api.github.com/repos/workcraft/workcra...,https://api.github.com/repos/workcraft/workcra...,False,False,False,"['async', 'cad', 'circuit', 'eda', 'formal-spe..."
29,112573334,react-native-ble-peripheral,https://github.com/himelbrand/react-native-ble...,False,https://api.github.com/repos/himelbrand/react-...,https://api.github.com/repos/himelbrand/react-...,https://api.github.com/repos/himelbrand/react-...,45,Java,master,...,2024-04-29T13:40:23Z,2024-05-15 12:53:31+00:00,11,11,https://api.github.com/repos/himelbrand/react-...,https://api.github.com/repos/himelbrand/react-...,True,False,False,"['ble', 'iot', 'react-native', 'simulation']"


In [24]:
df.to_csv('data/trad_df.csv')
sim_df.to_csv('data/sim_df.csv')

## Download Traditional Repository

In [25]:
import csv
import os
import subprocess
import shutil
import pandas as pd

def clone_repository(repo_url, destination):
    if not os.path.exists(destination):
        try:
            subprocess.run(["git", "clone", repo_url, destination], timeout=240, check=True)
            print(f"Repository cloned successfully: {destination}")
        except subprocess.TimeoutExpired:
            print("Time out reached for download.")
            if os.path.exists(destination):
                shutil.rmtree(destination)
                print(destination, "Successfully cleaned up")
        except subprocess.CalledProcessError as e:
            print(f"Failed to clone repository: {repo_url}")
            print(e)



def download_repositories_from_csv(csv_filename, destination_folder):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    df = pd.read_csv(csv_filename)
    df = df[df['MainLanguage'] == 'Java']
    for index, row in df.iterrows():
        repo_url = row['URL']
        repo_name = row['Name']
        destination = os.path.join(destination_folder, repo_name)

        if os.path.exists(destination):
            print(destination, " Already Exists")
            continue

        clone_repository(repo_url, destination)

# Folder where repositories will be cloned
output_folder = os.path.join('data', 'repos', 'traditional')

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

input_csv = "data/trad_df.csv"

download_repositories_from_csv(input_csv, output_folder)

Repository cloned successfully: data\repos\traditional\vertx-web
Repository cloned successfully: data\repos\traditional\problem-spring-web
Repository cloned successfully: data\repos\traditional\hasor
Repository cloned successfully: data\repos\traditional\activej
Repository cloned successfully: data\repos\traditional\pippo
Repository cloned successfully: data\repos\traditional\wicket
Repository cloned successfully: data\repos\traditional\xxl-crawler
Repository cloned successfully: data\repos\traditional\LGame
Repository cloned successfully: data\repos\traditional\opentest
Repository cloned successfully: data\repos\traditional\capacitor-firebase
Repository cloned successfully: data\repos\traditional\SORMAS-Project
Repository cloned successfully: data\repos\traditional\alchemy
Repository cloned successfully: data\repos\traditional\riptide
Repository cloned successfully: data\repos\traditional\qaf
Repository cloned successfully: data\repos\traditional\metl
Repository cloned successfully: d

## Download Simulation Repository

In [26]:
import csv
import os
import subprocess
import shutil
import pandas as pd

def clone_repository(repo_url, destination):
    if not os.path.exists(destination):
        try:
            subprocess.run(["git", "clone", repo_url, destination], timeout=240, check=True)
            print(f"Repository cloned successfully: {destination}")
        except subprocess.TimeoutExpired:
            print("Time out reached for download.")
            if os.path.exists(destination):
                shutil.rmtree(destination)
                print(destination, "Successfully cleaned up")
        except subprocess.CalledProcessError as e:
            print(f"Failed to clone repository: {repo_url}")
            print(e)



def download_repositories_from_csv(csv_filename, destination_folder):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    df = pd.read_csv(csv_filename)
    df = df[df['MainLanguage'] == 'Java']
    for index, row in df.iterrows():
        repo_url = row['URL']
        repo_name = row['Name']
        destination = os.path.join(destination_folder, repo_name)

        if os.path.exists(destination):
            print(destination, " Already Exists")
            continue


        clone_repository(repo_url, destination)

# Folder where repositories will be cloned
output_folder = os.path.join('data', 'repos', 'simulation')
input_csv = "data/sim_df.csv"

download_repositories_from_csv(input_csv, output_folder)

Repository cloned successfully: data\repos\simulation\synthea
Repository cloned successfully: data\repos\simulation\finmath-lib
Repository cloned successfully: data\repos\simulation\cloudsimplus
Repository cloned successfully: data\repos\simulation\Evolving-Protozoa
Repository cloned successfully: data\repos\simulation\Robot-Overlord-App
Repository cloned successfully: data\repos\simulation\mars-sim
Repository cloned successfully: data\repos\simulation\varsim
Repository cloned successfully: data\repos\simulation\aerie
Repository cloned successfully: data\repos\simulation\workcraft
Repository cloned successfully: data\repos\simulation\react-native-ble-peripheral
Repository cloned successfully: data\repos\simulation\cas
Repository cloned successfully: data\repos\simulation\ShapeOfThingsThatWere
Repository cloned successfully: data\repos\simulation\eqasim-java
Repository cloned successfully: data\repos\simulation\MATSim-UAM
Repository cloned successfully: data\repos\simulation\PowerSystem