In [None]:
import os
import re
import requests

file_keys = {
  2: '1FQsx0Sais2XVWbwfcX7jsPaCSmsKl_8X',
  3: '1oNLSPLbP3IGzXFYySC1eTqIsShRPdRUW',
  4: '1eI4b_A5R2WgXtq-RjPhAr0HrdlPbvXNW',
  5: '1w2OMa_OSpoAKaTGv5YFWuiSWnyN3Friy',
  6: '1rOaNaWJdVQHstJg2aqXJEbPRJr40ouXg',
  7: '1EsTfs9bVggp_QKFLP4R25vZFEDGQrI-2',
  8: '1WBe7ECFK9dagqCVyKVGYt5uQuEVgYfVm',
  9: '1BHwT767bD60JawzN_rPKLLBZXzuxBRYs',
}

def download_drive_files(file_dict, output_dir='.', verbose=False):
  """
  Downloads files from Google Drive, handling large file virus scan warnings.
  """
  if not os.path.exists(output_dir):
    os.makedirs(output_dir)

  # Setup progress bar if requested
  items = file_dict.items()
  if verbose:
    try:
      from tqdm import tqdm
      items = tqdm(items, total=len(file_dict), unit='file', desc='Downloading')
    except ImportError:
      pass

  # Use a session to persist cookies across requests
  session = requests.Session()

  for key, file_id in items:
    output_path = os.path.join(output_dir, f'artin_reps_d{key}_v2.csv')
    
    try:
      # 1. Initial attempt to get the file
      url = 'https://drive.google.com/uc'
      params = {'export': 'download', 'id': file_id}
      response = session.get(url, params=params, stream=True)

      # 2. Check if we received the warning page (HTML instead of CSV/Binary)
      # The warning page usually has a 200 OK status but Content-Type is text/html
      if 'text/html' in response.headers.get('Content-Type', '').lower():
        content = response.text
        
        # Look for the virus warning confirmation form pattern
        if 'virus' in content.lower() and 'confirm' in content:
          # Extract necessary hidden inputs from the form
          confirm_match = re.search(r'name="confirm" value="([^"]+)"', content)
          uuid_match = re.search(r'name="uuid" value="([^"]+)"', content)
          
          if confirm_match and uuid_match:
            params['confirm'] = confirm_match.group(1)
            params['uuid'] = uuid_match.group(1)
            
            # The action URL might be different (e.g., drive.usercontent.google.com)
            # We attempt to extract it, otherwise default to the current strategy
            action_match = re.search(r'action="([^"]+)"', content)
            if action_match:
              url = action_match.group(1)
              # If the action URL is relative or different, we use it directly
            
            # Retry the download with the new parameters
            response = session.get(url, params=params, stream=True)

      response.raise_for_status()

      # 3. Save the file content
      with open(output_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=32768):
          if chunk:
            f.write(chunk)
            
      if not verbose:
        print(f"Successfully downloaded: {output_path}")

    except Exception as e:
      error_msg = f"Failed to download item {key}: {e}"
      if verbose and 'tqdm' in locals():
        tqdm.write(error_msg)
      else:
        print(error_msg)

if __name__ == "__main__":
  download_drive_files(file_keys, verbose=True)