In [1]:
import os, re, json
import pandas as pd
import numpy as np

In [2]:
def load_config(config_file='config.json'):
    with open(config_file, 'r') as f:
        config = json.load(f)
    return config

In [3]:
config = load_config()
models_base_location = config['models_base_location']

In [4]:
df_sample_sub = pd.read_csv(config['sample_submission_location']+'SampleSubmission.csv')

In [5]:
def process_blend_outputs(base_path):
    # Check if base path exists
    if not os.path.exists(base_path):
        raise FileNotFoundError(f"Base path {base_path} does not exist")
    
    data = []
    
    # Get all txt files in the folder
    files = [f for f in os.listdir(base_path) if f.endswith('.txt')]
    
    for file in files:
        file_path = os.path.join(base_path, file)
        base_id = file[:23]  # First 23 characters for grouping
        
        # Try to read and parse the number from the file
        try:
            with open(file_path, 'r') as f:
                content = f.read().strip()
                value = float(content)
        except (ValueError, IOError):
            value = np.nan
        
        data.append({
            'full_filename': file[:-4],  # Remove .txt
            'id': base_id,
            'is_post': 'post_disaster' in file,
            'value': value
        })
    
    # Create initial DataFrame with all data
    df = pd.DataFrame(data)
    
    # Create result DataFrame grouped by base_id
    result_df = df.groupby('id')['value'].agg(lambda x: int(round(x.mean(), 0)) if not x.isna().all() else 0).reset_index()
    result_df['id'] = result_df['id']+'_X_no_damage'
    result_df = result_df.rename(columns={'value': 'pred'})
    
    return result_df

In [6]:
if not os.path.exists(config['intermediate_submission_location']):
    os.makedirs(config['intermediate_submission_location'])

In [7]:
try:
    averaged_df = process_blend_outputs('llama11b_outputs')
    print("\nAveraged DataFrame:")
    print(averaged_df)
    
    merged_df = df_sample_sub.merge(averaged_df[['id', 'pred']], 
                               on='id', 
                               how='left')

    merged_df['target'] = merged_df.apply(lambda row: row['pred'] 
                                        if pd.notna(row['pred']) 
                                        else row['target'], 
                                        axis=1)
    merged_df['target'] = merged_df['target'].astype(int)

    submission_name = config['intermediate_submission_location']+'llama11b_submission.csv'
    merged_df[['id', 'target']].to_csv(submission_name, index=False)

except FileNotFoundError as e:
    print(f"Llama11b error: {e}")


Averaged DataFrame:
                                    id  pred
0  malawi-cyclone_00000000_X_no_damage    51
1  malawi-cyclone_00000001_X_no_damage    45
2  malawi-cyclone_00000002_X_no_damage    48
3  malawi-cyclone_00000009_X_no_damage    32


In [8]:
try:
    averaged_df = process_blend_outputs('qwen72_outputs')
    print("\nAveraged DataFrame:")
    print(averaged_df)
    
    merged_df = df_sample_sub.merge(averaged_df[['id', 'pred']], 
                                   on='id', 
                                   how='left')

    merged_df['target'] = merged_df.apply(lambda row: row['pred'] 
                                        if pd.notna(row['pred']) 
                                        else row['target'], 
                                        axis=1)
    merged_df['target'] = merged_df['target'].astype(int)

    submission_name = config['intermediate_submission_location']+'qwen72_submission.csv'
    merged_df[['id', 'target']].to_csv(submission_name, index=False)

except FileNotFoundError as e:
    print(f"Qwen72 error: {e}")


Averaged DataFrame:
                                    id  pred
0  malawi-cyclone_00000000_X_no_damage    20
1  malawi-cyclone_00000001_X_no_damage    15
2  malawi-cyclone_00000002_X_no_damage    14
3  malawi-cyclone_00000009_X_no_damage    15


In [9]:
try:
    averaged_df = process_blend_outputs('pixtral_outputs')
    print("\nAveraged DataFrame:")
    print(averaged_df)
    
    merged_df = df_sample_sub.merge(averaged_df[['id', 'pred']], 
                                   on='id', 
                                   how='left')

    merged_df['target'] = merged_df.apply(lambda row: row['pred'] 
                                        if pd.notna(row['pred']) 
                                        else row['target'], 
                                        axis=1)
    ## doubled the average rather than adjust function to sum for this model; this is adding left/right
    merged_df['target'] = 2*merged_df['target'].astype(int) 

    submission_name = config['intermediate_submission_location']+'pixtral_submission.csv'
    merged_df[['id', 'target']].to_csv(submission_name, index=False)

except FileNotFoundError as e:
    print(f"Pixtral error: {e}")


Averaged DataFrame:
                                    id  pred
0  malawi-cyclone_00000000_X_no_damage    12
1  malawi-cyclone_00000001_X_no_damage     8
2  malawi-cyclone_00000002_X_no_damage     8
3  malawi-cyclone_00000009_X_no_damage     8


In [26]:
rt_detr_dir = 'rt_detr_outputs'
results = []

for f in [f for f in os.listdir(rt_detr_dir) if f.endswith('.json')]:
    try:
        with open(os.path.join(rt_detr_dir, f), 'rb') as file:
            j = json.load(file)
        
        annotations = j['annotations']
        detections = len(annotations)
        
        results.append({
            'set': 'post' if 'post_disaster' in f else 'pre',
            'file': f,
            'id': f[:23]+'_X_no_damage',
            'detections': detections
        })
    except Exception as e:
        print(f"Error reading {f}: {str(e)}")
        continue

results_df = pd.DataFrame(results)

results_df['detections'] = results_df['detections'].astype(int)
final_df = results_df.groupby('id').agg({
    'detections': [
        ('max_detections', 'max'),
        ('diff_detections', lambda x: abs(x.max() - x.min()))
    ]
}).reset_index()
final_df.columns = final_df.columns.get_level_values(1)
print(final_df.head())
final_df.to_csv(config['intermediate_submission_location']+'huggingface_space_rolled.csv', index=False)

                                        max_detections  diff_detections
0  malawi-cyclone_00000000_X_no_damage              21                2
1  malawi-cyclone_00000001_X_no_damage              26                1
2  malawi-cyclone_00000002_X_no_damage              13                2
3  malawi-cyclone_00000009_X_no_damage              23                2
