In [3]:
import enigma_ai.cost.resources as res
import enigma_ai.cost.performance as perf

gpus = [
    res.GPUTensorCoreSpec(name="A100", clock_rate_ghz=1.41, 
                        num_tensor_cores=6912),
    res.GPUTensorCoreSpec(name="V100", clock_rate_ghz=1.53, 
                        num_tensor_cores=5120),
]
gpu_specs = res.GPUSpec(name="NVIDIA", architecture="Ampere", gpus=gpus)

# Define hardware specifications
hardware = res.HardwareSpec(gpus=[gpu_specs])

# Define experiment specifications
spec = res.ExperimentSpec(model_params=1e9, dataset=1e12, 
                        hardware=hardware, precision="fp32", 
                        hours_trained=1.0)

# Calculate compute cost
compute = res.calculate_compute_cost(spec)
print(compute)



############################################
# Performance

# Select a Scaling Factor (Model Size, Dataset Size, or LoRa Parameters)
scaling_factor = "Model Size"

#Select parameters for the scaling factor
model_size = 1e9 # 1 billion parameters
dataset_size = 1e12 # 1 trillion samples


scaling_params = perf.estimate_finetuning_performance(
    scaling_factor,
    model_size=model_size,
    dataset_size=dataset_size,
)

print(f'Expected Perplexity: {scaling_params["L"]}')


{'Required FLOPs': 6000000000.0, 'Actual FLOPs': 1125.08928}
Expected Perplexity: 0.7488900210846068


In [None]:
from enigma_ai.cost import performance

scalling_factor = 'Dataset Size'
model_size = 10**6
dataset_size = 10**6
performance.estimate_finetuning_performance(scalling_factor, model_size, dataset_size)

In [None]:
from enigma_ai.data import scrape

# Set up your GitHub API token
github_token = 'your_github_api_token'

# Define your search query and parameters
search_term = 'pentest'
max_results = 100
filename = 'fetched_repos.csv'

# Fetch repositories matching the query
repos_df = scrape.fetch_repos(github_token, max_results, filename, search_term, min_stars=100)


In [None]:
from enigma_ai.data import process
import pandas as pd

# Load the previously fetched repository data
filename = 'fetched_repos.csv'
repos_df = pd.read_csv(filename)

#Limit the number of repositories to process
repos_df = repos_df.head(1)

# Extract code files from the repositories
repos_with_code = process.extract_code_from_repos(repos_df, filename, github_token)

#Print the first 1000 characters of the README.md file of the first repository
print(repos_with_code['code'].values[0]['Markdown']['README.md'][:1000])


In [None]:
from enigma_ai.finetuning import train