This notebook downloads the required databases to the databases directory to run the rest of the analysis. This also unzips the files and deletes the original zip file

In [1]:
import os
import subprocess
import tarfile

# Set the target folder path (change this to your desired directory)
target_folder = "databases/ecoli"
os.makedirs(target_folder, exist_ok=True)

# List of file URLs to download using wget.
# Update the second URL as appropriate for UP000002311.
file_urls = [
    "https://ftp.ebi.ac.uk/pub/databases/alphafold/v4/UP000000625_83333_ECOLI_v4.tar" 
]

for url in file_urls:
    print(f"Downloading {url} into {target_folder} ...")
    # Download the file using wget
    subprocess.run(["wget", "-P", target_folder, url], check=True)
    print("Download complete.")

    # Determine the local file path from the URL
    file_name = url.split("/")[-1]
    local_file_path = os.path.join(target_folder, file_name)
    
    # Extract the tar file
    print(f"Extracting {local_file_path} ...")
    try:
        with tarfile.open(local_file_path, "r:*") as tar:
            tar.extractall(path=target_folder)
        print("Extraction complete.")
    except Exception as e:
        print(f"Error extracting {local_file_path}: {e}")
        continue

    # Remove the original tar file
    try:
        os.remove(local_file_path)
        print(f"Removed archive {local_file_path}.\n")
    except Exception as e:
        print(f"Error removing {local_file_path}: {e}")

Downloading https://ftp.ebi.ac.uk/pub/databases/alphafold/v4/UP000000625_83333_ECOLI_v4.tar into databases/ecoli ...


--2025-04-09 15:27:34--  https://ftp.ebi.ac.uk/pub/databases/alphafold/v4/UP000000625_83333_ECOLI_v4.tar
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.165
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.165|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 480047104 (458M) [application/x-tar]
Saving to: ‘databases/ecoli/UP000000625_83333_ECOLI_v4.tar’

     0K .......... .......... .......... .......... ..........  0%  193K 40m27s
    50K .......... .......... .......... .......... ..........  0%  386K 30m20s
   100K .......... .......... .......... .......... ..........  0%  387K 26m57s
   150K .......... .......... .......... .......... ..........  0%  186M 20m13s
   200K .......... .......... .......... .......... ..........  0%  240M 16m11s
   250K .......... .......... .......... .......... ..........  0%  387K 16m51s
   300K .......... .......... .......... .......... ..........  0%  164M 14m27s
   350K .......... .......... .......... .....

Download complete.
Extracting databases/ecoli/UP000000625_83333_ECOLI_v4.tar ...
Extraction complete.
Removed archive databases/ecoli/UP000000625_83333_ECOLI_v4.tar.



In [2]:
import os
import subprocess
import tarfile

# Set the target folder path (change this to your desired directory)
target_folder = "databases/scerevisiae"
os.makedirs(target_folder, exist_ok=True)

# List of file URLs to download using wget.
# Update the second URL as appropriate for UP000002311.
file_urls = [
    "https://ftp.ebi.ac.uk/pub/databases/alphafold/latest/UP000002311_559292_YEAST_v4.tar" 
]

for url in file_urls:
    print(f"Downloading {url} into {target_folder} ...")
    # Download the file using wget
    subprocess.run(["wget", "-P", target_folder, url], check=True)
    print("Download complete.")

    # Determine the local file path from the URL
    file_name = url.split("/")[-1]
    local_file_path = os.path.join(target_folder, file_name)
    
    # Extract the tar file
    print(f"Extracting {local_file_path} ...")
    try:
        with tarfile.open(local_file_path, "r:*") as tar:
            tar.extractall(path=target_folder)
        print("Extraction complete.")
    except Exception as e:
        print(f"Error extracting {local_file_path}: {e}")
        continue

    # Remove the original tar file
    try:
        os.remove(local_file_path)
        print(f"Removed archive {local_file_path}.\n")
    except Exception as e:
        print(f"Error removing {local_file_path}: {e}")

Downloading https://ftp.ebi.ac.uk/pub/databases/alphafold/latest/UP000002311_559292_YEAST_v4.tar into databases/scerevisiae ...


--2025-04-09 15:28:18--  https://ftp.ebi.ac.uk/pub/databases/alphafold/latest/UP000002311_559292_YEAST_v4.tar
Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.165
Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.165|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1025904640 (978M) [application/x-tar]
Saving to: ‘databases/scerevisiae/UP000002311_559292_YEAST_v4.tar’

     0K .......... .......... .......... .......... ..........  0%  288K 57m57s
    50K .......... .......... .......... .......... ..........  0%  386K 50m35s
   100K .......... .......... .......... .......... ..........  0%  589K 43m10s
   150K .......... .......... .......... .......... ..........  0% 1.10M 36m4s
   200K .......... .......... .......... .......... ..........  0%  589K 34m31s
   250K .......... .......... .......... .......... ..........  0%  289M 28m46s
   300K .......... .......... .......... .......... ..........  0%  405M 24m40s
   350K .......... .......... ....

Download complete.
Extracting databases/scerevisiae/UP000002311_559292_YEAST_v4.tar ...
Extraction complete.
Removed archive databases/scerevisiae/UP000002311_559292_YEAST_v4.tar.

