In [6]:
import os
import csv
import glob

def read_column_from_csv(file_path, column_index):
    with open(file_path, 'r') as csvfile:
        reader = csv.reader(csvfile)
        column_data = [row[column_index] for row in reader]
    return column_data

def write_row_to_csv(file_path, row_data):
    with open(file_path, 'a') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(row_data)

directory_path = './progress_gpt' # Change this to the path of the directory containing the csv files
column_index = 2 # Change this to the index of the column you want to extract (0 for the first column)
aggregate_file = 'aggregate_gpt.csv'

# Remove the aggregate file if it already exists to start fresh
if os.path.exists(aggregate_file):
    os.remove(aggregate_file)

# Iterate through all csv files in the directory
for file_path in glob.glob(os.path.join(directory_path, '*.csv')):
    # Read the specified column from the current csv file
    column_data = read_column_from_csv(file_path, column_index)

    # Write the column data as a row in the aggregate csv
    write_row_to_csv(aggregate_file, column_data)



In [10]:
import csv
import os

def normalize_row(row):
    row_min = min(row)
    row_max = max(row)
    normalized_row = [(value - row_min) / (row_max - row_min) for value in row]
    return normalized_row

def read_and_normalize_csv(input_filename, output_filename):
    with open(input_filename, 'r') as input_file, open(output_filename, 'w', newline='') as output_file:
        csv_reader = csv.reader(input_file)
        csv_writer = csv.writer(output_file)

        for row in csv_reader:
            numerical_row = list(map(float, row))
            normalized_row = normalize_row(numerical_row)
            csv_writer.writerow(normalized_row)

input_filename = "aggregate_human.csv"
output_filename = "normalized_human.csv"

if not os.path.exists(input_filename):
    print(f"{input_filename} does not exist. Please provide a valid CSV file.")
else:
    read_and_normalize_csv(input_filename, output_filename)
    print(f"Normalized data saved to {output_filename}")

Normalized data saved to normalized_human.csv


In [12]:
import csv
import os

def column_averages(data):
    num_columns = len(data[0])
    num_rows = len(data)
    column_sums = [0] * num_columns

    for row in data:
        for i, value in enumerate(row):
            column_sums[i] += value

    return [column_sum / num_rows for column_sum in column_sums]

def read_and_calculate_averages(input_filename, output_filename):
    data = []

    with open(input_filename, 'r') as input_file:
        csv_reader = csv.reader(input_file)

        for row in csv_reader:
            numerical_row = list(map(float, row))
            data.append(numerical_row)

    averages = column_averages(data)

    with open(output_filename, 'w', newline='') as output_file:
        csv_writer = csv.writer(output_file)
        csv_writer.writerow(averages)

input_filename = "normalized_gpt.csv"
output_filename = "avgs_norm_gpt.csv"

if not os.path.exists(input_filename):
    print(f"{input_filename} does not exist. Please provide a valid CSV file.")
else:
    read_and_calculate_averages(input_filename, output_filename)
    print(f"Column averages saved to {output_filename}")

Column averages saved to avgs_norm_gpt.csv


In [12]:
import csv
import numpy as np
import os

def read_csv_to_numpy_array(input_filename):
    with open(input_filename, 'r') as input_file:
        csv_reader = csv.reader(input_file)
        row = next(csv_reader)
        numerical_row = list(map(float, row))
        numpy_array = np.array(numerical_row)
    return numpy_array

input_filename = "avgs_norm_gpt.csv"

if not os.path.exists(input_filename):
    print(f"{input_filename} does not exist. Please provide a valid CSV file.")
else:
    numpy_array = read_csv_to_numpy_array(input_filename)
    print("Numpy array:", numpy_array)
    print("Shape of the numpy array:", numpy_array.shape)

Numpy array: [0.25320591 0.30373804 0.31127672 0.3158001  0.29989693 0.28989786
 0.28395617 0.28995295 0.30085289 0.29915524 0.30837586 0.30157995
 0.3007419  0.29693495 0.3033967  0.30924945 0.32418713 0.32194088
 0.31919812 0.32368586 0.31255106 0.3148458  0.3250725  0.33717601
 0.33523201 0.34334021 0.33594499 0.34059239 0.34400703 0.35182389
 0.37096982 0.38178048 0.36767078 0.3807188  0.39468364 0.39396182
 0.39736933 0.40857354 0.41652653 0.42232859 0.43118962 0.4370648
 0.45470283 0.43630826 0.44312319 0.45103001 0.46135792 0.45682421
 0.45247723 0.45421484 0.45629535 0.46577271 0.47525175 0.49843026
 0.4907559  0.50757657 0.52407931 0.53870861 0.55158865 0.56308937
 0.56654046 0.56895232 0.57392707 0.5694719  0.55422243 0.54848402
 0.54398693 0.53320391 0.53578749 0.5321571  0.53090859 0.53147156
 0.53631513 0.53736988 0.57605774 0.57543452 0.57658974 0.59446216
 0.59433887 0.60998145 0.61438871 0.61310531 0.60971451 0.62830591
 0.63039026 0.63210961 0.63272552 0.62779431 0.629

In [14]:
import csv
import numpy as np
import os

def read_csv_to_numpy_array(input_filename):
    data = []

    with open(input_filename, 'r') as input_file:
        csv_reader = csv.reader(input_file)

        for row in csv_reader:
            numerical_row = list(map(float, row))
            data.append(numerical_row)

    numpy_array = np.array(data)
    return numpy_array

def column_variances(numpy_array):
    return np.var(numpy_array, axis=0)

def save_variances_to_csv(output_filename, variances):
    with open(output_filename, 'w', newline='') as output_file:
        csv_writer = csv.writer(output_file)
        csv_writer.writerow(variances)

input_filename = "normalized_human.csv"
output_filename = "var_norm_human.csv"

if not os.path.exists(input_filename):
    print(f"{input_filename} does not exist. Please provide a valid CSV file.")
else:
    numpy_array = read_csv_to_numpy_array(input_filename)
    variances = column_variances(numpy_array)
    save_variances_to_csv(output_filename, variances)
    print(f"Column variances saved to {output_filename}")

Column variances saved to var_norm_human.csv


In [16]:
import os
import pandas as pd

def aggregate_last_value(folder_path):
    aggregate_data = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            df = pd.read_csv(file_path)
            
            # Get the value in the last row of the first column
            last_value = df.iloc[-1, 0]
            aggregate_data.append(last_value)

    # Convert list to DataFrame and save as csv file
    df_aggregate = pd.DataFrame(aggregate_data)
    df_aggregate.to_csv("rollout_results_human.csv", index=False, header=False)

# Use the function, for example:
aggregate_last_value('./bn_results_human')