In [None]:
import glob
import shutil
import os

# Define the source and destination directories
source_dir = 'results'
destination_dir = 'data/results/microservices/'

# Create the destination directory if it doesn't exist
os.makedirs(destination_dir, exist_ok=True)

# Iterate through all files in the source directory
for root, dirs, files in os.walk(source_dir):
    for file in files:
        if file.endswith('.json'):
            file_path = os.path.join(root, file)
            print(file_path)
            # Move the JSON file to the destination directory
            shutil.move(file_path, destination_dir)

In [None]:
import os

# Define the directory to check

# Define the file size limit (in bytes)
file_size_limit = 100 * 1024 * 1024  # 100 MB

# Iterate through all files in the directory
for root, dirs, files in os.walk(destination_dir):
    for file in files:
        file_path = os.path.join(root, file)
        file_size = os.path.getsize(file_path)
        if file_size > file_size_limit:
            print(f"File {file_path} is too large: {file_size / (1024 * 1024):.2f} MB")

In [None]:
import json

with open("data/microservice_projects/projects.json", "r", encoding="utf-8") as file:
    projects = json.load(file)

with open("slurm/microservice_projects.txt", "w", encoding="utf-8") as dest:
    for project in projects:
        print(project['name'])
        dest.write(f"python3 /tmp/ssimon/config-space/experiments/analysis.py --url={project['html_url']} --name={project['name']}\n")

In [None]:
import pandas as pd

df = pd.read_csv("data/popularity_projects_annotated.csv")

df = df[df['relevance'] == "relevant"][:100]

with open("slurm/popularity_projects.txt", "w", encoding="utf-8") as dest:
    for index, row in df.iterrows():
        dest.write(f"python3 /tmp/ssimon/config-space/experiments/analysis.py --url={row['html_url']} --name={row['name']}\n")

In [None]:
import pandas as pd

df = pd.read_csv("data/ml_products.csv")

with open("slurm/ml_products.txt", "w", encoding="utf-8") as dest:
    for index, row in df.iterrows():
        name = row['Name'].split("/")[-1]
        url = row['Link(s)'].strip()
        if " " in url:
            parts = url.split(" ")
            for x in parts:
                x = x.strip()
                dest.write(f"python3 /tmp/ssimon/config-space/experiments/analysis.py --url={x} --name={name}\n")
        else:
            dest.write(f"python3 /tmp/ssimon/config-space/experiments/analysis.py --url={url} --name={name}\n")

In [None]:
import pandas as pd

df = pd.read_csv("data/NICHE.csv")

with open("slurm/niche.txt", "w", encoding="utf-8") as dest:
    for index, row in df.iterrows():
        name = row['GitHub Repo'].split("/")[-1]
        full_name = row['GitHub Repo']
        url = f"https://github.com/{full_name}"

        if row["Engineered ML Project"] == "Y":
            dest.write(f"python3 /tmp/ssimon/config-space/experiments/analysis.py --url={url} --name={name}\n")

In [None]:
import pandas as pd

df = pd.read_csv("data/top_projects.csv")

with open("slurm/niche.txt", "w", encoding="utf-8") as dest:
    for index, row in df.iterrows():
        
            dest.write(f"python3 /tmp/ssimon/config-space/experiments/analysis.py --url={url} --name={name}\n")

In [None]:
import pandas as pd

df = pd.read_csv("data/low_projects.csv")

df_relevant = df[df['relevant'] == "y"][:100]

with open("slurm/low_projects.txt", "w", encoding="utf-8") as dest:
    for index, row in df_relevant.iterrows():
        dest.write(f"python3 /tmp/ssimon/config-space/experiments/analysis.py --url={row["html_url"]} --name={row["name"]}\n")

In [None]:
import os
from paramiko import SSHClient, AutoAddPolicy
from scp import SCPClient
from dotenv import load_dotenv

load_dotenv()

ssh = SSHClient()
ssh.set_missing_host_key_policy(AutoAddPolicy())
ssh.connect('tesla.informatik.uni-leipzig.de', username=os.getenv("TESLA_USER"), password=os.getenv("TESLA_PWD"))

remote_dir = '/home/ssimon/GitHub/config-space/slurm/microservices'
local_dir = '/Users/sebastiansimon/GitHub/config-space/data/microservice_projects'  

# Get list of .json files from the server
stdin, stdout, stderr = ssh.exec_command(f"find {remote_dir} -name '*.json'")
json_files = stdout.read().decode().splitlines()

with SCPClient(ssh.get_transport()) as scp:
    for remote_file in json_files:
        print(f"Copying {remote_file}")
        filename = os.path.basename(remote_file)
        local_file = os.path.join(local_dir, filename)
        if os.path.exists(local_file):
            print(f"File {local_file} already exists, skipping.")
            continue
        scp.get(remote_file, os.path.join(local_dir, filename))

In [1]:
import os
import shutil

projects_dir = 'slurm/projects'
folders_without_json = []

folders = os.listdir(projects_dir)

print("Number of projects analyzed:", len(folders))

for folder in os.listdir(projects_dir):
    folder_path = os.path.join(projects_dir, folder)
    if os.path.isdir(folder_path):
        has_json = any(f.endswith('.json') for f in os.listdir(folder_path))
        if not has_json:
            folders_without_json.append(folder)
            #shutil.rmtree(folder_path)  # delete folder and all its contents

print(f"{len(folders_without_json)} folders without JSON files:")
for folder_path in folders_without_json:
    print(folder_path)

Number of projects analyzed: 1005
161 folders without JSON files:
rclone
turborepo
DefinitelyTyped
platform
zulip
echarts
aspnetcore
payload
wekan
refine
photoprism
taro
winston
lossless-cut
tidb
chatwoot
InvokeAI
jitsi-meet
appwrite
infisical
langflow
strapi
ccxt
obs-studio
fonts
vector
compiler-explorer
fluentui
subql
Signal-Android
bit
dash
ollama
Telegram
masscan
ansible
homepage
kratos
zed
gitea
posthog
keycloak
appsmith
Babylon.js
ragflow
renovate
shardingsphere
minikube
openapi-generator
storybook
crawlee
apollo-client
heroui
mattermost
handsontable
jenkins
AFFiNE
generator-jhipster
EasySpider
superset
linux
netdata
dokploy
gatsby
ghidra
backstage
argo-cd
cilium
medusa
CopilotKit
TypeScript
electron
searxng
ToolJet
qdrant
BaseRecyclerViewAdapterHelper
redux
AutoGPT
teleport
union
jdk
label-studio
angular
atuin
amis
AppFlowy
qmk_firmware
lapce
hyperswitch
intellij-community
open-webui
moby
insomnia
astro
QtScrcpy
dataease
RxSwift
sniffnet
lighthouse
selenium
graphql-engine
Mindus