In [None]:
import wandb
import pandas as pd
import netrc
import matplotlib.pyplot as plt

In [None]:
wandb.login()

In [None]:
import os
import json
import glob

def load_all_json_files():
    # Initialize an empty dictionary to store the contents of all JSON files
    combined_data = {}

    # Find all JSON files with the naming pattern: fasttext_wmt14_results_V{V value}_h{h tilde value}
    files = glob.glob('./experiments/fasttext/fasttext_wmt14_results_V*_h*.json')

    # Iterate over each file
    for fname in files:
        # Extract the V and h_tilde values from the filename
        V_value = fname.split('_V')[1].split('_h')[0]
        h_tilde_value = fname.split('_h')[1].replace('.json', '')

        # Load the contents of the JSON file
        with open(fname, 'r') as f:
            data = json.load(f)
        
        # Store the contents in the combined_data dictionary using the V and h_tilde as keys
        if V_value not in combined_data:
            combined_data[V_value] = {}
        
        combined_data[V_value][h_tilde_value] = data
    
    return combined_data

def save_combined_json(combined_data, output_file):
    # Save the combined data as a single JSON file
    with open(output_file, 'w') as f:
        json.dump(combined_data, f, indent=4)

# Load all the JSON files into a combined dictionary
combined_data = load_all_json_files()

# Save the combined dictionary to a new JSON file
output_file = 'combined_fasttext_wmt14_results.json'
save_combined_json(combined_data, output_file)

print(f"All JSON files have been combined and saved to {output_file}.")

In [None]:
def download_data_from_wandb(entity: str, project: str, keywords: list):
    api_key = netrc.netrc().authenticators(host="api.wandb.ai")[2]
    api = wandb.Api(api_key=api_key, timeout=1800)
    runs = api.runs(path=f"{entity}/{project}")
    df = pd.DataFrame()

    if type(keywords) == str:
        keywords = [f"{keywords}", ]

    for run in runs:
        # We are only interested in 2 sorts of logs: server logs, and baseline logs
        for keyword in keywords:

            if type(keyword) != str:
                continue

            if keyword in run.name:
                run_scan = api.run(f"{entity}/{project}/{run.id}")
                rows = run_scan.scan_history()
                rows_pd = pd.DataFrame(rows)
                rows_pd["name"] = run.name
                rows_pd["id"] = run.id
                df = pd.concat([df, rows_pd])

    return df

In [None]:
def total_power(df, run_names):
    plt.figure(figsize=(10, 6))
    
    for run_name in run_names:
        run_df = df[df['name'] == run_name]
        plt.plot(run_df['_step'], run_df['power/total'], label=run_name)
    
    plt.xlabel('Step')
    plt.ylabel('Power (Total)')
    plt.title('Power Consumption (Total) for Different Runs')
    plt.legend(title='Runs')
    plt.grid(True)
    
    plt.show()

In [None]:
def power_run(df, run_names, total_power, average_power, num_samples):
    for run_name in run_names:
        run_df = df[df['name'] == run_name]
        total_power_used = run_df['power/total'].sum() / 1000
        total_power.append(total_power_used)
        average_power_used = total_power_used / num_samples
        average_power.append(average_power_used)
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
    
    ax1.bar(run_names, total_power)
    ax1.set_xlabel('Run Name')
    ax1.set_ylabel('Total Power Used (Watts)')
    ax1.set_title('Total Power Used for Each Run')
    ax1.set_xticklabels(run_names, rotation=45)
    
    ax2.bar(run_names, average_power)
    ax2.set_xlabel('Run Name')
    ax2.set_ylabel('Average Power Used per Sample (Watts)')
    ax2.set_title('Average Power Used per Sample for Each Run')
    ax2.set_xticklabels(run_names, rotation=45)
    
    plt.tight_layout()
    plt.show()

    return average_power

In [None]:
entity = "ryzhangofficial"
project = "classifier"
keywords = ["13b", "7b", "tiny"]

df = download_data_from_wandb(entity, project, keywords)

In [None]:
print("-------------------------------------------------------")
print(df.shape)
print("-------------------------------------------------------")
print(df.columns)
print("-------------------------------------------------------")
print(df.dtypes)
print("-------------------------------------------------------")
print(df.head())
print("-------------------------------------------------------")
print(df.info())
print("-------------------------------------------------------")
print(df.describe())

In [None]:
print(df['name'].unique()) 
print(df['id'].unique())

In [None]:
run_names = ['wmt14-tiny', 'wmt14-7b', 'wmt14-13b']

In [None]:
total_power = []
average_power = []
num_samples = 3000

power_run(df, run_names, total_power, average_power, num_samples)

In [None]:
run_names = ['cnndailymail-tiny', 'cnndailymail-7b', 'cnndailymail-13b']

In [None]:
total_power = []
average_power = []
num_samples = 3000

power_run(df, run_names, total_power, average_power, num_samples)

In [None]:
entity = "ryzhangofficial"
project = "classifier"
keywords = ["fasttext"]

df_fasttext = download_data_from_wandb(entity, project, keywords)

In [None]:
print("-------------------------------------------------------")
print(df_fasttext.shape)
print("-------------------------------------------------------")
print(df_fasttext.columns)
print("-------------------------------------------------------")
print(df_fasttext.dtypes)
print("-------------------------------------------------------")
print(df_fasttext.head())
print("-------------------------------------------------------")
print(df_fasttext.info())
print("-------------------------------------------------------")
print(df_fasttext.describe())

In [None]:
print(df_fasttext['name'].unique()) 
print(df_fasttext['id'].unique())

In [None]:
run_names = ['V100-fasttext-wmt14-h25', 'V100-fasttext-wmt14-h3',
             'V100-fasttext-wmt14-h35', 'V1000-fasttext-wmt14-h25',
             'V10000-fasttext-wmt14-h1', 'V100000-fasttext-wmt14-h1',
             'V1000000-fasttext-wmt14-h1', 'V10000000-fasttext-wmt14-h1']

In [None]:
total_power(df_fasttext, run_names)

In [None]:
total_power = []
average_power = []
num_samples = 3000

power_run(df_fasttext, run_names, total_power, average_power, num_samples)

In [None]:
df_fasttext['chosen_model_accuracy']

In [None]:
import pickle
import pandas as pd

with open('data/input_output_train', 'rb') as file:
    combined_outputs = pickle.load(file)

df_combined_outputs = pd.DataFrame(combined_outputs)
df_combined_outputs.head()

In [None]:
import os
import json

directory = 'experiments/fasttext'

for filename in os.listdir(directory):
    if filename.endswith('.json'): 
        filepath = os.path.join(directory, filename)
        
        with open(filepath, 'r') as f:
            data = json.load(f)

        total = 0
        num_results = len(data)
        
        for result in data:
            total += result['chosen_model_accuracy']
        
        avg_accuracy = total / num_results if num_results > 0 else 0
        print(f"{filename}: {avg_accuracy}")

In [None]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt

directory = 'experiments/fasttext'
file_averages = []
file_names = []

for filename in os.listdir(directory):
    if filename.endswith('.json') and "h5" in filename:
        filepath = os.path.join(directory, filename)
        with open(filepath, 'r') as f:
            data = json.load(f)
            accuracies = [result['chosen_model_accuracy'] for result in data]
            avg_accuracy = np.mean(accuracies)
            file_averages.append(avg_accuracy)
            file_names.append(filename)

plt.figure(figsize=(10, 6))
plt.bar(file_names, file_averages, color='b')
plt.title('Average Accuracy per File')
plt.xlabel('Filename')
plt.ylabel('Average Accuracy')
plt.xticks(rotation=90)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
entity = "ryzhangofficial"
project = "classifier"
keywords = ["wmt14-13b", "wmt14-7b" "wmt14-tiny"]

df = download_data_from_wandb(entity, project, keywords)

In [None]:
run_names = ['wmt14-13b', 'wmt14-7b', 'wmt14-tiny']

In [None]:
total_power(df, run_names)

In [None]:
total_power = []
average_power = []
num_samples = 3000

avg_power = power_run(df, run_names, total_power, average_power, num_samples)
print(avg_power)

In [None]:
entity = "ryzhangofficial"
project = "classifier"
run_names = ["cnndailymail-tiny", "cnndailymail-7b", "cnndailymail-13b"]

df = download_data_from_wandb(entity, project, run_names)

In [None]:
total_power(df, run_names)

In [None]:
total_power = []
average_power = []
num_samples = 3000

avg_power = power_run(df, run_names, total_power, average_power, num_samples)
print(avg_power)

In [None]:
entity = "ryzhangofficial"
project = "classifier"
keywords = ["0.5", ".3125"]

df = download_data_from_wandb(entity, project, keywords)

In [None]:
total_power = []
average_power = []
run_names = ["fasttext_wmt14_results_V0.01_h0.5"]
num_samples = 3000

avg_power = power_run(df, run_names, total_power, average_power, num_samples)
print(avg_power)

In [None]:
total_power = []
average_power = []
run_names = ["fasttext_cnn_dailymail_results_V0.01_h0.3125"]
num_samples = 3000

avg_power = power_run(df, run_names, total_power, average_power, num_samples)
print(avg_power)