In [None]:
import json
import os
from pathlib import Path
import re

In [None]:
def load_json_files(directory):
    data = []
    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            with open(os.path.join(directory, filename), 'r') as file:
                data.extend(json.load(file))
    return data

def parse_response(response_str):
    match = re.search(r'\{.*\}', response_str)
    if match:
        try:
            response_dict = json.loads(match.group())
            # Pretvaramo sve vrijednosti u stringove
            return {k: str(v) if v is not None else '0' for k, v in response_dict.items()}
        except json.JSONDecodeError:
            return None
    return None

def filter_samples(data):
    filtered_data = []
    removed_samples = []
    
    for sample in data:
        response = parse_response(sample['response'])
        if response and any(int(value) != 0 if value is not None else False for value in response.values()):
            filtered_data.append(sample)
        else:
            removed_samples.append(sample)
    
    return filtered_data, removed_samples

In [None]:
# Load all JSON files from the @data directory
data_directory = Path('data')
all_samples = load_json_files(data_directory)

# Filter samples
#filtered_samples, removed_samples = filter_samples(all_samples)

In [None]:
# Print results
print(f"Total samples before filtering: {len(all_samples)}")
print(f"Total samples after filtering: {len(filtered_samples)}")
print(f"Number of samples removed: {len(removed_samples)}")

In [None]:
# Save filtered samples to a new JSON file
output_file = data_directory / 'filtered_samples.json'
with open(output_file, 'w') as f:
    json.dump(all_samples, f, indent=2)

print(f"\nFiltered samples saved to {output_file}")