In [20]:
import json
import os
from glob import glob
import pandas as pd
from tqdm import tqdm  # For progress bars
from fuzzywuzzy import fuzz
from itertools import combinations
import numpy as np
from collections import defaultdict

# Define path to your JSON files
data_path = "C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json/*.json"

# Function to explore a single JSON file in detail
def explore_json_structure(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    print("Top-level keys:", data.keys())
    print("\nMeta structure:", data.get('meta', {}))
    print("\nInfo structure:", json.dumps(data.get('info', {}), indent=2)[:500] + "...")
    
    # Explore innings structure if available
    if 'innings' in data:
        print(f"\nNumber of innings: {len(data['innings'])}")
        if data['innings']:
            print(f"First innings team: {data['innings'][0].get('team')}")
            print(f"First innings overs: {len(data['innings'][0].get('overs', []))}")
            
            # Sample one over
            if data['innings'][0].get('overs'):
                print("\nSample delivery structure:")
                print(json.dumps(data['innings'][0]['overs'][0], indent=2))

# Get sample file
sample_files = glob(data_path)[:5]  # Just get first 5 files
if sample_files:
    explore_json_structure(sample_files[0])

Top-level keys: dict_keys(['meta', 'info', 'innings'])

Meta structure: {'data_version': '1.0.0', 'created': '2016-11-09', 'revision': 2}

Info structure: {
  "balls_per_over": 6,
  "city": "Perth",
  "dates": [
    "2016-11-03",
    "2016-11-04",
    "2016-11-05",
    "2016-11-06",
    "2016-11-07"
  ],
  "event": {
    "match_number": 1,
    "name": "South Africa in Australia Test Series"
  },
  "gender": "male",
  "match_type": "Test",
  "match_type_number": 2230,
  "officials": {
    "match_referees": [
      "AJ Pycroft"
    ],
    "reserve_umpires": [
      "SJ Nogajski"
    ],
    "tv_umpires": [
      "RA Kettleborough"
    ],
    "umpires...

Number of innings: 4
First innings team: South Africa
First innings overs: 64

Sample delivery structure:
{
  "over": 0,
  "deliveries": [
    {
      "batter": "SC Cook",
      "bowler": "MA Starc",
      "non_striker": "D Elgar",
      "runs": {
        "batter": 0,
        "extras": 0,
        "total": 0
      }
    },
    {
      "bat

In [13]:
def get_deep_schema_profile(file_paths):
    """Analyze schema consistency across all JSON files at multiple levels"""
    # Track variations
    top_level_schema = Counter()
    info_level_schema = Counter()
    innings_level_schema = Counter()
    over_level_schema = Counter()
    delivery_level_schema = Counter()
    
    # Track match types
    match_types = Counter()
    
    # Error tracking
    errors = []
    
    # Process files
    total_files = len(file_paths)
    print(f"Analyzing {total_files} JSON files...")
    
    for i, file_path in enumerate(file_paths):
        # Print progress for large datasets
        if i % 1000 == 0 and i > 0:
            print(f"Processed {i}/{total_files} files...")
            
        try:
            with open(file_path, 'r') as f:
                data = json.load(f)
            
            # Track top-level keys
            top_level_schema[tuple(sorted(data.keys()))] += 1
            
            # Track info level keys if present
            if 'info' in data:
                info_level_schema[tuple(sorted(data['info'].keys()))] += 1
                
                # Track match type
                if 'match_type' in data['info']:
                    match_types[data['info']['match_type']] += 1
            
            # Track innings structure if present
            if 'innings' in data and data['innings']:
                for innings in data['innings']:
                    innings_level_schema[tuple(sorted(innings.keys()))] += 1
                    
                    # Track over structure
                    if 'overs' in innings and innings['overs']:
                        for over in innings['overs']:
                            over_level_schema[tuple(sorted(over.keys()))] += 1
                            
                            # Track delivery structure
                            if 'deliveries' in over and over['deliveries']:
                                for delivery in over['deliveries']:
                                    delivery_level_schema[tuple(sorted(delivery.keys()))] += 1
                
        except Exception as e:
            errors.append((file_path, str(e)))
    
    return {
        'total_files': total_files,
        'error_count': len(errors),
        'errors': errors[:10],  # First 10 errors only
        'top_level_variations': dict(top_level_schema),
        'info_level_variations': dict(info_level_schema),
        'innings_level_variations': dict(innings_level_schema),
        'over_level_variations': dict(over_level_schema),
        'delivery_level_variations': dict(delivery_level_schema),
        'match_types': dict(match_types)
    }

# Get all JSON files
all_files = glob(data_path)
schema_profile = get_deep_schema_profile(all_files)

# Print summary results
print(f"\nSchema analysis complete for {schema_profile['total_files']} files:")
print(f"Files with errors: {schema_profile['error_count']}")

print(f"\nTop-level schema variations: {len(schema_profile['top_level_variations'])}")
for schema, count in schema_profile['top_level_variations'].items():
    print(f"  {schema}: {count} files")

print(f"\nInfo-level schema variations: {len(schema_profile['info_level_variations'])}")
if len(schema_profile['info_level_variations']) <= 5:
    for schema, count in schema_profile['info_level_variations'].items():
        print(f"  {schema}: {count} files")
else:
    print(f"  Top 5 variations:")
    for schema, count in sorted(schema_profile['info_level_variations'].items(), 
                               key=lambda x: x[1], reverse=True)[:5]:
        print(f"  {schema}: {count} files")

print(f"\nMatch types found:")
for match_type, count in schema_profile['match_types'].items():
    print(f"  {match_type}: {count} files")

print(f"\nInnings-level schema variations: {len(schema_profile['innings_level_variations'])}")
print(f"Over-level schema variations: {len(schema_profile['over_level_variations'])}")
print(f"Delivery-level schema variations: {len(schema_profile['delivery_level_variations'])}")

# Optional: Save detailed results to file for later analysis
import pickle
with open('schema_analysis_results.pkl', 'wb') as f:
    pickle.dump(schema_profile, f)

# Create a more detailed analysis of the delivery structure since it's the most important
if schema_profile['delivery_level_variations']:
    print("\nAnalyzing delivery structure variations in more detail...")
    
    # Count the frequency of each field in deliveries
    delivery_fields = Counter()
    for schema, count in schema_profile['delivery_level_variations'].items():
        for field in schema:
            delivery_fields[field] += count
            
    print("\nDelivery fields frequency:")
    for field, count in delivery_fields.most_common():
        percentage = (count / schema_profile['total_files']) * 100
        print(f"  {field}: {count} files ({percentage:.1f}%)")
    
    # Identify required vs optional fields
    required_threshold = 0.95  # Fields present in 95% of files are considered required
    required_fields = []
    optional_fields = []
    
    for field, count in delivery_fields.items():
        percentage = count / schema_profile['total_files']
        if percentage >= required_threshold:
            required_fields.append(field)
        else:
            optional_fields.append((field, percentage))
    
    print(f"\nRequired delivery fields (present in ≥95% of files):")
    for field in required_fields:
        print(f"  {field}")
        
    print(f"\nOptional delivery fields (with presence percentage):")
    for field, percentage in sorted(optional_fields, key=lambda x: x[1], reverse=True):
        print(f"  {field}: {percentage*100:.1f}%")

Analyzing 18646 JSON files...
Processed 1000/18646 files...
Processed 2000/18646 files...
Processed 3000/18646 files...
Processed 4000/18646 files...
Processed 5000/18646 files...
Processed 6000/18646 files...
Processed 7000/18646 files...
Processed 8000/18646 files...
Processed 9000/18646 files...
Processed 10000/18646 files...
Processed 11000/18646 files...
Processed 12000/18646 files...
Processed 13000/18646 files...
Processed 14000/18646 files...
Processed 15000/18646 files...
Processed 16000/18646 files...
Processed 17000/18646 files...
Processed 18000/18646 files...

Schema analysis complete for 18646 files:
Files with errors: 0

Top-level schema variations: 1
  ('info', 'innings', 'meta'): 18646 files

Info-level schema variations: 48
  Top 5 variations:
  ('balls_per_over', 'city', 'dates', 'event', 'gender', 'match_type', 'officials', 'outcome', 'overs', 'player_of_match', 'players', 'registry', 'season', 'team_type', 'teams', 'toss', 'venue'): 5803 files
  ('balls_per_over', 

In [16]:

all_files = glob(data_path)

print(f"Total match files found: {len(all_files)}")

def extract_match_metadata(file_path):
    """Extract comprehensive metadata from a match file"""
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        
        # Get match ID from filename
        match_id = os.path.basename(file_path).split('.')[0]
        
        # Extract meta information
        meta = data.get('meta', {})
        info = data.get('info', {})
        
        # Extract basic metadata
        metadata = {
            'match_id': match_id,
            'data_version': meta.get('data_version'),
            'created_date': meta.get('created'),
            'revision': meta.get('revision'),
            
            # Match information
            'match_type': info.get('match_type'),
            'match_type_number': info.get('match_type_number'),
            'gender': info.get('gender'),
            'teams': '|'.join(info.get('teams', [])),
            'team_type': info.get('team_type'),
            'overs': info.get('overs'),
            'balls_per_over': info.get('balls_per_over'),
            
            # Date and location
            'date': info.get('dates', [None])[0] if info.get('dates') else None,
            'city': info.get('city'),
            'venue': info.get('venue'),
            'season': info.get('season'),
            
            # Event information
            'event_name': info.get('event', {}).get('name') if isinstance(info.get('event'), dict) else info.get('event'),
            'event_group': info.get('event', {}).get('group') if isinstance(info.get('event'), dict) else None,
            
            # Officials
            'umpires': '|'.join(info.get('officials', {}).get('umpires', [])) if 'officials' in info else None,
            'referees': '|'.join(info.get('officials', {}).get('referees', [])) if 'officials' in info and 'referees' in info['officials'] else None,
            
            # Toss information
            'toss_winner': info.get('toss', {}).get('winner') if 'toss' in info else None,
            'toss_decision': info.get('toss', {}).get('decision') if 'toss' in info else None,
            
            # Outcome information
            'outcome_winner': info.get('outcome', {}).get('winner') if 'outcome' in info and 'winner' in info['outcome'] else None,
            'outcome_result': info.get('outcome', {}).get('result') if 'outcome' in info and 'result' in info['outcome'] else None,
            'outcome_method': info.get('outcome', {}).get('method') if 'outcome' in info and 'method' in info['outcome'] else None,
            'outcome_by_runs': info.get('outcome', {}).get('by', {}).get('runs') if 'outcome' in info and 'by' in info['outcome'] and 'runs' in info['outcome']['by'] else None,
            'outcome_by_wickets': info.get('outcome', {}).get('by', {}).get('wickets') if 'outcome' in info and 'by' in info['outcome'] and 'wickets' in info['outcome']['by'] else None,
            
            # Player information
            'player_of_match': '|'.join(info.get('player_of_match', [])),
            'team1_players': len(info.get('players', {}).get(info.get('teams', [''])[0], [])) if info.get('teams') and info.get('players') else 0,
            'team2_players': len(info.get('players', {}).get(info.get('teams', ['', ''])[1], [])) if len(info.get('teams', [])) > 1 and info.get('players') else 0,
            'registry_count': len(info.get('registry', {}).get('people', {})) if 'registry' in info else 0,
            
            # Data completeness
            'missing_fields': '|'.join(info.get('missing', [])),
            'has_innings_data': 'innings' in data and len(data['innings']) > 0,
            'innings_count': len(data.get('innings', [])),
            'total_overs_recorded': sum(len(innings.get('overs', [])) for innings in data.get('innings', [])),
            'total_deliveries': sum(sum(len(over.get('deliveries', [])) for over in innings.get('overs', [])) 
                                   for innings in data.get('innings', []))
        }
                
        return metadata
    
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return {'match_id': os.path.basename(file_path).split('.')[0], 'error': str(e)}

# Process files in batches to manage memory usage
def process_files_in_batches(file_paths, batch_size=1000):
    """Process files in batches to manage memory usage"""
    num_batches = (len(file_paths) + batch_size - 1) // batch_size
    all_metadata = []
    
    for batch_idx in range(num_batches):
        start_idx = batch_idx * batch_size
        end_idx = min((batch_idx + 1) * batch_size, len(file_paths))
        batch_files = file_paths[start_idx:end_idx]
        
        print(f"Processing batch {batch_idx+1}/{num_batches} (files {start_idx+1}-{end_idx})")
        
        batch_metadata = []
        for file_path in tqdm(batch_files, desc=f"Batch {batch_idx+1}"):
            metadata = extract_match_metadata(file_path)
            batch_metadata.append(metadata)
        
        all_metadata.extend(batch_metadata)
        
        # Optional: Save intermediate results
        if batch_idx % 5 == 0:
            temp_df = pd.DataFrame(all_metadata)
            temp_df.to_csv(f'cricket_metadata_partial_{end_idx}.csv', index=False)
            print(f"Saved intermediate results up to file {end_idx}")
    
    return pd.DataFrame(all_metadata)

# Process all files in batches
match_metadata_df = process_files_in_batches(all_files, batch_size=1000)
print(f"Created metadata for {len(match_metadata_df)} matches")

# Save complete results
match_metadata_df.to_csv('cricket_match_metadata_complete.csv', index=False)

# Display basic statistics about the metadata
print("\nBasic statistics about the metadata:")
print(match_metadata_df.describe(include='all').transpose()[['count', 'unique', 'top', 'freq', 'mean', 'std', 'min', 'max']])

# Check for NULL values and completeness
null_counts = match_metadata_df.isnull().sum()
null_percentages = null_counts / len(match_metadata_df) * 100
null_analysis = pd.DataFrame({'null_count': null_counts, 'null_percentage': null_percentages})
print("\nNULL value analysis:")
print(null_analysis[null_analysis['null_count'] > 0].sort_values('null_count', ascending=False))

# Check for empty matches (no innings data)
empty_matches = match_metadata_df[match_metadata_df['has_innings_data'] == False]
print(f"\nMatches without innings data: {len(empty_matches)}")

# Analyze completeness by match type
match_type_analysis = match_metadata_df.groupby('match_type').agg({
    'match_id': 'count',
    'total_deliveries': 'sum',
    'innings_count': 'mean',
    'city': lambda x: x.isnull().mean() * 100,
    'venue': lambda x: x.isnull().mean() * 100,
    'outcome_winner': lambda x: x.notnull().mean() * 100,
    'outcome_result': lambda x: x.notnull().mean() * 100,
    'player_of_match': lambda x: (x != '').mean() * 100,
    'missing_fields': lambda x: (x != '').mean() * 100
}).reset_index()

match_type_analysis.columns = ['match_type', 'count', 'total_deliveries', 'avg_innings', 
                              'pct_null_city', 'pct_null_venue', 'pct_with_winner', 
                              'pct_with_result', 'pct_with_pom', 'pct_with_missing_fields']

print("\nQuality analysis by match type:")
print(match_type_analysis)

# Analyze date distribution
match_metadata_df['date'] = pd.to_datetime(match_metadata_df['date'], errors='coerce')
date_distribution = match_metadata_df.groupby(match_metadata_df['date'].dt.year).size()
print("\nMatches by year:")
print(date_distribution)

# Analyze venue consistency
venue_corrections = {}
top_venues = match_metadata_df['venue'].value_counts().head(20)
print("\nTop 20 venues:")
print(top_venues)

# Check for similar venue names (potential inconsistencies)
def find_similar_venues(venues, threshold=90):
    similar_pairs = []
    venue_list = list(venues)
    
    # Process in smaller chunks to avoid memory issues
    chunk_size = 50
    for i in range(0, len(venue_list), chunk_size):
        chunk = venue_list[i:i+chunk_size]
        for venue1, venue2 in combinations(chunk, 2):
            similarity = fuzz.ratio(str(venue1).lower(), str(venue2).lower())
            if similarity >= threshold:
                similar_pairs.append((venue1, venue2, similarity))
        
        # Also compare with venues from other chunks
        if i > 0:
            for venue1 in chunk:
                for j in range(0, i, chunk_size):
                    other_chunk = venue_list[j:j+chunk_size]
                    for venue2 in other_chunk:
                        similarity = fuzz.ratio(str(venue1).lower(), str(venue2).lower())
                        if similarity >= threshold:
                            similar_pairs.append((venue1, venue2, similarity))
    
    return pd.DataFrame(similar_pairs, columns=['venue1', 'venue2', 'similarity']).sort_values('similarity', ascending=False)

# Get all unique venues
all_venues = match_metadata_df['venue'].dropna().unique()
print(f"Total unique venues: {len(all_venues)}")

# Find similar venues in chunks to manage memory
similar_venues = find_similar_venues(all_venues[:200])  # Start with top 200 venues
print("\nPotentially similar venue names:")
print(similar_venues.head(20))

# Save similar venues for later reference
similar_venues.to_csv('similar_venues.csv', index=False)

# Additional analysis: Player registry completeness
player_registry_counts = match_metadata_df['registry_count']
print("\nPlayer registry statistics:")
print(f"Average players per match: {player_registry_counts.mean():.2f}")
print(f"Minimum players: {player_registry_counts.min()}")
print(f"Maximum players: {player_registry_counts.max()}")

# Team name variations
team_variations = match_metadata_df['teams'].value_counts().head(20)
print("\nTop 20 team combinations:")
print(team_variations)

# Identify matches with potential data quality issues
quality_issues = match_metadata_df[
    (match_metadata_df['total_deliveries'] < 10) |  # Too few deliveries
    (match_metadata_df['team1_players'] < 11) |     # Too few players
    (match_metadata_df['team2_players'] < 11) |     # Too few players
    (match_metadata_df['registry_count'] < 22)      # Incomplete registry
]

print(f"\nMatches with potential quality issues: {len(quality_issues)}")
if len(quality_issues) > 0:
    print(quality_issues[['match_id', 'match_type', 'total_deliveries', 'team1_players', 'team2_players', 'registry_count']].head(10))

# Save results of quality issues for further investigation
quality_issues.to_csv('matches_with_quality_issues.csv', index=False)

Total match files found: 18646
Processing batch 1/19 (files 1-1000)


Batch 1:  33%|██████████████████████▌                                              | 327/1000 [00:00<00:01, 634.55it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1033707.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1041523.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1041525.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1043953.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1043961.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 1:  52%|███████████████████████████████████▍                                | 521/1000 [00:00<00:00, 1040.07it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1062580.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1068432.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1068441.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1068481.json: sequence item 1: expected str instance, dict found


Batch 1:  97%|██████████████████████████████████████████████████████████████████▋  | 967/1000 [00:01<00:00, 801.49it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1073401.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1073402.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1073403.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1073404.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1073405.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 1: 100%|████████████████████████████████████████████████████████████████████| 1000/1000 [00:01<00:00, 760.84it/s]


Saved intermediate results up to file 1000
Processing batch 2/19 (files 1001-2000)


Batch 2:  60%|████████████████████████████████████████▉                           | 602/1000 [00:00<00:00, 1201.16it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1122904.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1122905.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1122906.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1123204.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1126717.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 2: 100%|████████████████████████████████████████████████████████████████████| 1000/1000 [00:01<00:00, 854.30it/s]


Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1131232.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1131233.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1131777.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1131778.json: sequence item 0: expected str instance, dict found
Processing batch 3/19 (files 2001-3000)


Batch 3:  12%|████████                                                            | 119/1000 [00:00<00:00, 1189.98it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1138193.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1138200.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1138201.json: sequence item 0: expected str instance, dict found


Batch 3:  24%|████████████████▏                                                   | 238/1000 [00:00<00:00, 1126.24it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1144436.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1144437.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1144438.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1144439.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1144982.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 3:  78%|████████████████████████████████████████████████████▉               | 778/1000 [00:00<00:00, 1270.21it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1153855.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1153856.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1153857.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1155304.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1156212.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 3: 100%|███████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1029.44it/s]


Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1161228.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1161229.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1161230.json: sequence item 0: expected str instance, dict found
Processing batch 4/19 (files 3001-4000)


Batch 4:  46%|███████████████████████████████                                     | 457/1000 [00:00<00:00, 1171.02it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1167075.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1167078.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1167080.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1167085.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1168016.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 4:  79%|█████████████████████████████████████████████████████▊              | 792/1000 [00:00<00:00, 1386.40it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1176432.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1176433.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1176434.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1177015.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1177016.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 4: 100%|███████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1069.31it/s]


Processing batch 5/19 (files 4001-5000)


Batch 5:  29%|███████████████████▋                                                | 290/1000 [00:00<00:00, 1500.65it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1198472.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1198474.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1200180.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1200181.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1200182.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 5:  50%|█████████████████████████████████▋                                  | 495/1000 [00:00<00:00, 1744.10it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1205824.json: sequence item 1: expected str instance, dict found


Batch 5:  67%|█████████████████████████████████████████████▌                      | 670/1000 [00:00<00:00, 1592.00it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1208344.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1208345.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1208346.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1223944.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1223945.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 5: 100%|███████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1269.28it/s]


Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1229326.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1229329.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1229330.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1229331.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1229332.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 6:  42%|█████████████████████████████                                         | 416/1000 [00:04<00:06, 95.57it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1244843.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1244844.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1244845.json: sequence item 0: expected str instance, dict found


Batch 6:  46%|████████████████████████████████▎                                     | 461/1000 [00:05<00:05, 94.35it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1249233.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1249234.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1249241.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1249242.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1249243.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 6:  69%|████████████████████████████████████████████████▍                     | 692/1000 [00:07<00:03, 94.66it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1252264.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1252265.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1252266.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1252267.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1252268.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 6:  71%|█████████████████████████████████████████████████▎                   | 714/1000 [00:07<00:02, 100.18it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1252285.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1252286.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1252287.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1252288.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1252289.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/

Batch 6:  82%|████████████████████████████████████████████████████████▏            | 815/1000 [00:08<00:01, 133.03it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1253267.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1253268.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1253270.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1253271.json: sequence item 0: expected str instance, dict found


Batch 6: 100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:10<00:00, 99.56it/s]


Saved intermediate results up to file 6000
Processing batch 7/19 (files 6001-7000)


Batch 7: 100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:10<00:00, 97.37it/s]


Processing batch 8/19 (files 7001-8000)


Batch 8:  96%|██████████████████████████████████████████████████████████████████▎  | 961/1000 [00:09<00:00, 100.71it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1321304.json: sequence item 0: expected str instance, dict found


Batch 8: 100%|████████████████████████████████████████████████████████████████████| 1000/1000 [00:09<00:00, 105.23it/s]


Processing batch 9/19 (files 8001-9000)


Batch 9:  73%|██████████████████████████████████████████████████▉                   | 727/1000 [00:07<00:02, 98.10it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1343837.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1343839.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1343840.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1343842.json: sequence item 1: expected str instance, dict found


Batch 9: 100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:10<00:00, 94.61it/s]


Processing batch 10/19 (files 9001-10000)


Batch 10: 100%|███████████████████████████████████████████████████████████████████| 1000/1000 [00:09<00:00, 103.48it/s]


Processing batch 11/19 (files 10001-11000)


Batch 11: 100%|████████████████████████████████████████████████████████████████████| 1000/1000 [00:10<00:00, 96.93it/s]


Saved intermediate results up to file 11000
Processing batch 12/19 (files 11001-12000)


Batch 12:  12%|████████▏                                                           | 121/1000 [00:01<00:07, 118.56it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1411265.json: sequence item 0: expected str instance, dict found


Batch 12: 100%|███████████████████████████████████████████████████████████████████| 1000/1000 [00:08<00:00, 111.97it/s]


Processing batch 13/19 (files 12001-13000)


Batch 13:  28%|███████████████████▍                                                | 285/1000 [00:02<00:06, 108.42it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1442656.json: sequence item 0: expected str instance, dict found


Batch 13:  31%|████████████████████▉                                               | 307/1000 [00:02<00:06, 105.35it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1442683.json: sequence item 0: expected str instance, dict found


Batch 13: 100%|████████████████████████████████████████████████████████████████████| 1000/1000 [00:10<00:00, 99.90it/s]


Processing batch 14/19 (files 13001-14000)


Batch 14:  42%|████████████████████████████▉                                        | 420/1000 [00:04<00:05, 99.24it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\216271.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\216669.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\217481.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\217647.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\217648.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 14:  44%|██████████████████████████████▍                                      | 442/1000 [00:04<00:05, 97.54it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\223335.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\224044.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\224227.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\225171.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\225254.json: sequence item 0: expected str instance, dict found


Batch 14:  47%|████████████████████████████████▍                                    | 471/1000 [00:04<00:06, 84.24it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\225959.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\226352.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\226354.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\226356.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\226359.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 14:  49%|█████████████████████████████████▉                                   | 491/1000 [00:04<00:05, 90.73it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\226390.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\226392.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\236358.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\236963.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\237222.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 14:  51%|███████████████████████████████████▎                                 | 511/1000 [00:05<00:05, 85.21it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\238173.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\238174.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\238175.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\238177.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\238179.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 14:  52%|████████████████████████████████████                                 | 522/1000 [00:05<00:05, 91.66it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\238199.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\238200.json: sequence item 0: expected str instance, dict found


Batch 14:  55%|█████████████████████████████████████▉                               | 550/1000 [00:05<00:05, 76.94it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\239911.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\239913.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\239917.json: sequence item 0: expected str instance, dict found


Batch 14:  57%|███████████████████████████████████████▎                             | 569/1000 [00:05<00:05, 82.55it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\247466.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\247468.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\247470.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\247474.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\247483.json: sequence item 0: expected str instance, dict found


Batch 14:  60%|█████████████████████████████████████████▎                           | 599/1000 [00:06<00:04, 87.80it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\247494.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\247496.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\247500.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\247503.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\247506.json: sequence item 0: expected str instance, dict found


Batch 14:  61%|█████████████████████████████████████████▉                           | 608/1000 [00:06<00:04, 86.19it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\249212.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\249214.json: sequence item 0: expected str instance, dict found


Batch 14:  64%|███████████████████████████████████████████▉                         | 637/1000 [00:06<00:04, 87.53it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\249232.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\249234.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\249740.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\249742.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\249744.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 14:  66%|█████████████████████████████████████████████▍                       | 658/1000 [00:06<00:03, 88.81it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\249749.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\249750.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\249758.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\249759.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\250671.json: sequence item 0: expected str instance, dict found


Batch 14:  68%|██████████████████████████████████████████████▋                      | 677/1000 [00:07<00:03, 89.57it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\250672.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\251491.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\255961.json: sequence item 0: expected str instance, dict found


Batch 14:  72%|█████████████████████████████████████████████████▍                   | 716/1000 [00:07<00:03, 85.39it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\258475.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\258477.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\259389.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\259390.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\259793.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 14:  74%|██████████████████████████████████████████████████▉                  | 739/1000 [00:07<00:02, 96.09it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\275789.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\276226.json: sequence item 0: expected str instance, dict found


Batch 14:  79%|██████████████████████████████████████████████████████▌              | 790/1000 [00:08<00:02, 84.65it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\291359.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\291369.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\293071.json: sequence item 0: expected str instance, dict found


Batch 14:  82%|████████████████████████████████████████████████████████▍            | 818/1000 [00:08<00:02, 79.69it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\293482.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\295785.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\295788.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\296687.json: sequence item 1: expected str instance, dict found


Batch 14:  84%|██████████████████████████████████████████████████████████▏          | 843/1000 [00:09<00:02, 77.44it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\297797.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\298797.json: sequence item 0: expected str instance, dict found


Batch 14:  87%|████████████████████████████████████████████████████████████         | 870/1000 [00:09<00:01, 78.60it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\298809.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\299009.json: sequence item 0: expected str instance, dict found


Batch 14:  90%|██████████████████████████████████████████████████████████████       | 899/1000 [00:09<00:01, 79.11it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\312295.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\312296.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\313992.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\313994.json: sequence item 1: expected str instance, dict found


Batch 14:  93%|████████████████████████████████████████████████████████████████▍    | 933/1000 [00:10<00:00, 80.37it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\323951.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\325569.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\325572.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\325579.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\325580.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 14: 100%|████████████████████████████████████████████████████████████████████| 1000/1000 [00:10<00:00, 91.72it/s]


Processing batch 15/19 (files 14001-15000)


Batch 15:   1%|▋                                                                      | 9/1000 [00:00<00:11, 88.53it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\341302.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\341306.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\343760.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\343761.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\343762.json: sequence item 0: expected str instance, dict found


Batch 15:   2%|█▎                                                                    | 18/1000 [00:00<00:11, 89.05it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\343763.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\345468.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\345470.json: sequence item 0: expected str instance, dict found


Batch 15:  14%|██████████                                                           | 145/1000 [00:01<00:08, 99.03it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\366623.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\377314.json: sequence item 0: expected str instance, dict found


Batch 15:  17%|███████████▍                                                         | 166/1000 [00:01<00:09, 88.06it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\378755.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\378759.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\383275.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\385025.json: sequence item 1: expected str instance, dict found


Batch 15:  18%|████████████▋                                                        | 184/1000 [00:02<00:10, 75.01it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\387568.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\390227.json: sequence item 0: expected str instance, dict found


Batch 15:  49%|█████████████████████████████████▌                                   | 486/1000 [00:05<00:05, 87.25it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\430508.json: sequence item 0: expected str instance, dict found


Batch 15:  62%|██████████████████████████████████████████▊                          | 621/1000 [00:06<00:03, 94.82it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\460039.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\460042.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\460045.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\460048.json: sequence item 0: expected str instance, dict found


Batch 15:  66%|█████████████████████████████████████████████▌                       | 660/1000 [00:07<00:04, 80.06it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\464720.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\464723.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\467078.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\467080.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\467084.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 15:  68%|██████████████████████████████████████████████▊                      | 679/1000 [00:07<00:03, 80.80it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\473029.json: sequence item 0: expected str instance, dict found


Batch 15:  73%|██████████████████████████████████████████████████▏                  | 727/1000 [00:07<00:03, 86.39it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\483126.json: sequence item 0: expected str instance, dict found


Batch 15:  83%|████████████████████████████████████████████████████████▏           | 827/1000 [00:08<00:01, 103.59it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\504779.json: sequence item 1: expected str instance, dict found


Batch 15:  85%|██████████████████████████████████████████████████████████▌          | 848/1000 [00:09<00:01, 82.39it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\518071.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\518072.json: sequence item 0: expected str instance, dict found


Batch 15:  92%|███████████████████████████████████████████████████████████████▍     | 920/1000 [00:10<00:00, 94.31it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\523805.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\523806.json: sequence item 0: expected str instance, dict found


Batch 15:  95%|█████████████████████████████████████████████████████████████████▊   | 953/1000 [00:10<00:00, 90.90it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\527680.json: sequence item 0: expected str instance, dict found


Batch 15: 100%|████████████████████████████████████████████████████████████████████| 1000/1000 [00:10<00:00, 91.94it/s]


Processing batch 16/19 (files 15001-16000)


Batch 16:  11%|███████▍                                                             | 107/1000 [00:01<00:10, 82.00it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\542852.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\542853.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\542854.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\542856.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\543881.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 16:  22%|███████████████▏                                                    | 223/1000 [00:02<00:07, 100.72it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\555887.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\555888.json: sequence item 1: expected str instance, dict found


Batch 16:  40%|███████████████████████████▍                                        | 403/1000 [00:04<00:05, 116.24it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\576402.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\576403.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\576405.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\576406.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\576407.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 16:  42%|████████████████████████████▏                                       | 415/1000 [00:04<00:05, 108.82it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\579743.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\579751.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\579753.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\579754.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\579755.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 16:  46%|███████████████████████████████▋                                     | 460/1000 [00:04<00:05, 93.41it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\592263.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\592264.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\592266.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\592274.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\592275.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 16:  49%|█████████████████████████████████▉                                   | 492/1000 [00:05<00:05, 99.21it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\594914.json: sequence item 0: expected str instance, dict found


Batch 16:  63%|███████████████████████████████████████████▍                         | 630/1000 [00:06<00:03, 99.54it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\603239.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\603240.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\603241.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\627006.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\627007.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 16:  65%|████████████████████████████████████████████▉                        | 652/1000 [00:06<00:03, 98.71it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\628092.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\628093.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\628095.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\628096.json: sequence item 0: expected str instance, dict found


Batch 16:  77%|█████████████████████████████████████████████████████▏               | 771/1000 [00:08<00:02, 94.80it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\644943.json: sequence item 1: expected str instance, dict found


Batch 16: 100%|████████████████████████████████████████████████████████████████████| 1000/1000 [00:10<00:00, 96.94it/s]


Saved intermediate results up to file 16000
Processing batch 17/19 (files 16001-17000)


Batch 17:  15%|█████████▉                                                          | 147/1000 [00:01<00:07, 111.85it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\660825.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\660827.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\661887.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\661889.json: sequence item 0: expected str instance, dict found


Batch 17:  21%|██████████████▏                                                     | 209/1000 [00:02<00:07, 103.64it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\664299.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\664303.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\666043.json: sequence item 0: expected str instance, dict found


Batch 17:  25%|█████████████████▎                                                   | 250/1000 [00:02<00:08, 93.01it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\668511.json: sequence item 0: expected str instance, dict found


Batch 17:  58%|███████████████████████████████████████▌                            | 581/1000 [00:06<00:03, 109.88it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693275.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693277.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693279.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693281.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693283.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 17:  60%|█████████████████████████████████████████                           | 604/1000 [00:06<00:03, 104.49it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693315.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693317.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693319.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693321.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693323.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 17:  63%|██████████████████████████████████████████▊                         | 629/1000 [00:06<00:03, 112.37it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693353.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693355.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693357.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693361.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693367.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 17:  65%|████████████████████████████████████████████▍                       | 653/1000 [00:06<00:03, 107.56it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693415.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693417.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693419.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693421.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\693423.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 17:  69%|██████████████████████████████████████████████▋                     | 687/1000 [00:07<00:03, 101.03it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\722389.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\722391.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\722665.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\722667.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\722673.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 17:  82%|████████████████████████████████████████████████████████▊            | 824/1000 [00:08<00:01, 88.58it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\736499.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\736501.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\736505.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\736507.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\736511.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 17:  89%|█████████████████████████████████████████████████████████████▍       | 891/1000 [00:09<00:01, 90.47it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\746087.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\746089.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\746091.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\746093.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\746095.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 17:  91%|██████████████████████████████████████████████████████████████▊      | 910/1000 [00:09<00:01, 80.61it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\749543.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\749545.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\749547.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\749551.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\749553.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 17:  96%|██████████████████████████████████████████████████████████████████▏  | 960/1000 [00:10<00:00, 89.67it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\754787.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\754789.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\754813.json: sequence item 0: expected str instance, dict found


Batch 17: 100%|████████████████████████████████████████████████████████████████████| 1000/1000 [00:10<00:00, 95.10it/s]


Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\757505.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\757507.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\757509.json: sequence item 0: expected str instance, dict found
Processing batch 18/19 (files 17001-18000)


Batch 18:   9%|█████▉                                                               | 86/1000 [00:00<00:08, 104.84it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\785959.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\785961.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\785963.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\785965.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\785967.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 18:  39%|██████████████████████████▋                                          | 387/1000 [00:04<00:06, 99.71it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804713.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804715.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804723.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804725.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804729.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 18:  40%|███████████████████████████▍                                         | 398/1000 [00:04<00:06, 96.97it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804757.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804759.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804761.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804763.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804765.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 18:  42%|████████████████████████████▊                                        | 418/1000 [00:04<00:06, 92.59it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804797.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804799.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804801.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804803.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804805.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 18:  44%|██████████████████████████████                                      | 442/1000 [00:05<00:05, 102.19it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804841.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804843.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804845.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804849.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\804851.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 18:  48%|████████████████████████████████▊                                    | 475/1000 [00:05<00:05, 99.78it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\812775.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\812777.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\812779.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\812781.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\812783.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radbo

Batch 18:  50%|█████████████████████████████████▊                                  | 497/1000 [00:05<00:04, 103.46it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\828245.json: sequence item 0: expected str instance, dict found


Batch 18:  65%|████████████████████████████████████████████▍                       | 654/1000 [00:07<00:03, 108.83it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\870891.json: sequence item 0: expected str instance, dict found


Batch 18:  74%|██████████████████████████████████████████████████▎                 | 739/1000 [00:07<00:02, 113.03it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\895797.json: sequence item 0: expected str instance, dict found


Batch 18:  88%|████████████████████████████████████████████████████████████▍        | 875/1000 [00:09<00:01, 91.59it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\907375.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\907391.json: sequence item 0: expected str instance, dict found


Batch 18:  94%|████████████████████████████████████████████████████████████████▉    | 941/1000 [00:09<00:00, 96.03it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\916647.json: sequence item 1: expected str instance, dict found


Batch 18:  99%|████████████████████████████████████████████████████████████████████ | 987/1000 [00:10<00:00, 79.74it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\923327.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\930567.json: sequence item 1: expected str instance, dict found


Batch 18: 100%|████████████████████████████████████████████████████████████████████| 1000/1000 [00:10<00:00, 93.62it/s]


Processing batch 19/19 (files 18001-18646)


Batch 19:   7%|████▊                                                                 | 44/646 [00:00<00:05, 111.46it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\935813.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\935817.json: sequence item 1: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\935821.json: sequence item 0: expected str instance, dict found


Batch 19:  10%|███████▎                                                               | 67/646 [00:00<00:05, 99.17it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\946545.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\946547.json: sequence item 0: expected str instance, dict found
Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\946549.json: sequence item 0: expected str instance, dict found


Batch 19:  79%|██████████████████████████████████████████████████████▌              | 511/646 [00:05<00:01, 113.61it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\962017.json: sequence item 1: expected str instance, dict found


Batch 19:  99%|█████████████████████████████████████████████████████████████████████▏| 638/646 [00:06<00:00, 98.26it/s]

Error processing C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\wi_201706.json: sequence item 0: expected str instance, dict found


Batch 19: 100%|██████████████████████████████████████████████████████████████████████| 646/646 [00:06<00:00, 99.05it/s]


Created metadata for 18646 matches

Basic statistics about the metadata:
                        count unique  \
match_id                18646  18646   
data_version            17956      2   
created_date            17956   2219   
revision              17956.0    NaN   
match_type              17956      6   
match_type_number      7559.0    NaN   
gender                  17956      2   
teams                   17956   3546   
team_type               17956      2   
overs                 15332.0    NaN   
balls_per_over        17956.0    NaN   
date                    17956   5289   
city                    16361    324   
venue                   17956    798   
season                  17956     60   
event_name              17873    920   
event_group              5026     31   
umpires                 17188   6542   
referees                  0.0    NaN   
toss_winner             17956    355   
toss_decision           17956      2   
outcome_winner          16599    348   
outcome

In [19]:

# Function to check types in a sample JSON file
def check_data_types(file_path):
    """Examine data types in a sample JSON file without transforming anything"""
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    type_analysis = {
        'info_fields': {},
        'delivery_fields': {}
    }
    
    # Check info section data types
    info = data.get('info', {})
    for key, value in info.items():
        type_analysis['info_fields'][key] = type(value).__name__
    
    # Check delivery data types (from first over of first innings)
    if 'innings' in data and data['innings'] and 'overs' in data['innings'][0]:
        overs = data['innings'][0]['overs']
        if overs and 'deliveries' in overs[0]:
            delivery = overs[0]['deliveries'][0]
            for key, value in delivery.items():
                type_analysis['delivery_fields'][key] = type(value).__name__
                # Also check nested fields like 'runs'
                if isinstance(value, dict):
                    for subkey, subvalue in value.items():
                        type_analysis['delivery_fields'][f"{key}.{subkey}"] = type(subvalue).__name__
    
    return type_analysis

# Analyze a few sample files
sample_files = glob(data_path)[:5]  # Just analyze a few files

type_results = {}
for file in sample_files:
    type_results[file] = check_data_types(file)

# Summarize the results
print("Data type analysis (no transformations performed):")
for file, analysis in type_results.items():
    print(f"\nFile: {file}")
    print("Info field types:")
    for field, field_type in analysis['info_fields'].items():
        print(f"  {field}: {field_type}")
    
    print("\nDelivery field types:")
    for field, field_type in analysis['delivery_fields'].items():
        print(f"  {field}: {field_type}")

Data type analysis (no transformations performed):

File: C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1000851.json
Info field types:
  balls_per_over: int
  city: str
  dates: list
  event: dict
  gender: str
  match_type: str
  match_type_number: int
  officials: dict
  outcome: dict
  player_of_match: list
  players: dict
  registry: dict
  season: str
  team_type: str
  teams: list
  toss: dict
  venue: str

Delivery field types:
  batter: str
  bowler: str
  non_striker: str
  runs: dict
  runs.batter: int
  runs.extras: int
  runs.total: int

File: C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json\1000853.json
Info field types:
  balls_per_over: int
  city: str
  dates: list
  event: dict
  gender: str
  match_type: str
  match_type_number: int
  officials: dict
  outcome: dict
  player_of_match: list
  players: dict
  registry: dict
  season: str
  team_ty

In [22]:
data_dir = data_path = "C:/Users/lohit/Desktop/Radboud University/Data Engineering (NWI-IMC073-2024)/Data & Codes/extracted_data_json"

# Stats collection
runs_per_delivery = []
extras_per_delivery = []
total_runs_per_delivery = []
invalid_runs = []

# Process each JSON file
file_count = 0
delivery_count = 0

# Process each file
for filename in os.listdir(data_dir):
    if filename.endswith('.json'):
        file_path = os.path.join(data_dir, filename)
        file_count += 1
        
        # Progress tracking (optional)
        if file_count % 1000 == 0:
            print(f"Processed {file_count} files...")
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                match_data = json.load(f)
            
            # Process each innings
            if 'innings' in match_data:
                for innings in match_data['innings']:
                    if 'overs' in innings:
                        for over in innings['overs']:
                            if 'deliveries' in over:
                                for delivery in over['deliveries']:
                                    delivery_count += 1
                                    
                                    # Extract runs data
                                    if 'runs' in delivery:
                                        runs = delivery['runs']
                                        
                                        # Collect statistics
                                        batter_runs = runs.get('batter', 0)
                                        extras = runs.get('extras', 0)
                                        total = runs.get('total', 0)
                                        
                                        runs_per_delivery.append(batter_runs)
                                        extras_per_delivery.append(extras)
                                        total_runs_per_delivery.append(total)
                                        
                                        # Check for potential outliers or invalid data
                                        if total > 7:  # Allowing for 6 + no ball
                                            invalid_runs.append({
                                                'file': filename,
                                                'batter': delivery.get('batter', 'unknown'),
                                                'bowler': delivery.get('bowler', 'unknown'),
                                                'batter_runs': batter_runs,
                                                'extras': extras,
                                                'total': total
                                            })
        except Exception as e:
            print(f"Error processing {filename}: {str(e)}")

# Convert to pandas DataFrames for analysis
runs_df = pd.DataFrame({
    'batter_runs': runs_per_delivery,
    'extras': extras_per_delivery,
    'total_runs': total_runs_per_delivery
})

# Generate summary statistics
summary_stats = runs_df.describe()
print("\nSummary Statistics:")
print(summary_stats)

print(f"\nTotal files processed: {file_count}")
print(f"Total deliveries analyzed: {delivery_count}")
print(f"Number of potentially invalid run values (>7): {len(invalid_runs)}")

# Show sample of invalid runs if any
if invalid_runs:
    print("\nSample of potentially invalid runs:")
    for i, item in enumerate(invalid_runs[:10]):
        print(item)
        if i >= 9:
            break

# Distribution analysis
print("\nRun Distribution (% of deliveries):")
for run_value in range(8):
    percentage = (runs_df['total_runs'] == run_value).mean() * 100
    print(f"{run_value} runs: {percentage:.2f}%")

Processed 1000 files...
Processed 2000 files...
Processed 3000 files...
Processed 4000 files...
Processed 5000 files...
Processed 6000 files...
Processed 7000 files...
Processed 8000 files...
Processed 9000 files...
Processed 10000 files...
Processed 11000 files...
Processed 12000 files...
Processed 13000 files...
Processed 14000 files...
Processed 15000 files...
Processed 16000 files...
Processed 17000 files...
Processed 18000 files...

Summary Statistics:
        batter_runs        extras    total_runs
count  9.663876e+06  9.663876e+06  9.663876e+06
mean   7.501123e-01  4.700816e-02  7.971204e-01
std    1.305196e+00  3.052374e-01  1.317440e+00
min    0.000000e+00  0.000000e+00  0.000000e+00
25%    0.000000e+00  0.000000e+00  0.000000e+00
50%    0.000000e+00  0.000000e+00  0.000000e+00
75%    1.000000e+00  0.000000e+00  1.000000e+00
max    8.000000e+00  1.200000e+01  1.300000e+01

Total files processed: 18646
Total deliveries analyzed: 9663876
Number of potentially invalid run values 

In [23]:
# Stats collection
deliveries_per_over = defaultdict(list)
outlier_overs = []

# Process each JSON file
file_count = 0
over_count = 0

# Process each file
for filename in os.listdir(data_dir):
    if filename.endswith('.json'):
        file_path = os.path.join(data_dir, filename)
        file_count += 1
        
        # Progress tracking (optional)
        if file_count % 1000 == 0:
            print(f"Processed {file_count} files...")
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                match_data = json.load(f)
            
            # Get match type (T20, ODI, Test, etc.)
            match_type = match_data.get('info', {}).get('match_type', 'unknown')
            balls_per_over = match_data.get('info', {}).get('balls_per_over', 6)
            
            # Process each innings
            if 'innings' in match_data:
                for innings in match_data['innings']:
                    if 'overs' in innings:
                        for over in innings['overs']:
                            over_count += 1
                            over_num = over.get('over', -1)
                            
                            # Count deliveries in this over
                            if 'deliveries' in over:
                                num_deliveries = len(over['deliveries'])
                                deliveries_per_over[match_type].append(num_deliveries)
                                
                                # Check for potential outliers
                                if num_deliveries > balls_per_over + 2:  # Allowing for extras
                                    outlier_overs.append({
                                        'file': filename,
                                        'match_type': match_type,
                                        'over_num': over_num,
                                        'deliveries': num_deliveries,
                                        'expected': balls_per_over
                                    })
        except Exception as e:
            print(f"Error processing {filename}: {str(e)}")

# Generate summary statistics for each match type
stats_by_match_type = {}
for match_type, deliveries in deliveries_per_over.items():
    stats_by_match_type[match_type] = {
        'count': len(deliveries),
        'mean': np.mean(deliveries),
        'std': np.std(deliveries),
        'min': min(deliveries),
        'max': max(deliveries),
    }

print("\nSummary Statistics by Match Type:")
for match_type, stats in stats_by_match_type.items():
    print(f"\n{match_type} matches:")
    print(f"  Number of overs: {stats['count']}")
    print(f"  Average deliveries per over: {stats['mean']:.2f}")
    print(f"  Min deliveries: {stats['min']}")
    print(f"  Max deliveries: {stats['max']}")

print(f"\nTotal files processed: {file_count}")
print(f"Total overs analyzed: {over_count}")
print(f"Number of potentially outlier overs: {len(outlier_overs)}")

# Show sample of outlier overs if any
if outlier_overs:
    print("\nSample of potentially outlier overs:")
    for i, item in enumerate(outlier_overs[:10]):
        print(item)
        if i >= 9:
            break

Processed 1000 files...
Processed 2000 files...
Processed 3000 files...
Processed 4000 files...
Processed 5000 files...
Processed 6000 files...
Processed 7000 files...
Processed 8000 files...
Processed 9000 files...
Processed 10000 files...
Processed 11000 files...
Processed 12000 files...
Processed 13000 files...
Processed 14000 files...
Processed 15000 files...
Processed 16000 files...
Processed 17000 files...
Processed 18000 files...

Summary Statistics by Match Type:

Test matches:
  Number of overs: 277020
  Average deliveries per over: 6.03
  Min deliveries: 1
  Max deliveries: 10

ODI matches:
  Number of overs: 253469
  Average deliveries per over: 6.13
  Min deliveries: 1
  Max deliveries: 14

T20 matches:
  Number of overs: 418800
  Average deliveries per over: 6.17
  Min deliveries: 1
  Max deliveries: 19

ODM matches:
  Number of overs: 123062
  Average deliveries per over: 6.14
  Min deliveries: 1
  Max deliveries: 12

MDM matches:
  Number of overs: 503520
  Average deliv