## Beispiel SQLite3

In [2]:
import sqlite3

# Connect to the database
with sqlite3.connect('example.db') as conn:
    # Create a database cursor
    cursor = conn.cursor()
    # Create the table if it doesn't exist
    cursor.execute("CREATE TABLE IF NOT EXISTS sequences (id INTEGER PRIMARY KEY, name TEXT, sequence TEXT)")
    # Insert data into the table
    cursor.execute("INSERT INTO sequences (name, sequence) VALUES (?, ?)", ("BRCA1", "ATCGGCTA"))
    cursor.execute("INSERT INTO sequences (name, sequence) VALUES (?, ?)", ("TRAF6", "ATCGGCTAGGG"))
    #cursor.execute("DELETE FROM sequences")
    # Commit changes (optional in `with` as it commits on close, but explicit commit is good practice)
    conn.commit()

with sqlite3.connect('example.db') as conn:
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM sequences")
    rows = cursor.fetchall()
    for row in rows:
        print(row)

(1, 'BRCA1', 'ATCGGCTA')
(2, 'TRAF6', 'ATCGGCTAGGG')
(3, 'BRCA1', 'ATCGGCTA')
(4, 'TRAF6', 'ATCGGCTAGGG')


## Beispiel Entrez

In [6]:
from Bio import Entrez, SeqIO

Entrez.email = "your_email@example.com"

# Suche nach IDs
handle = Entrez.esearch(db="nucleotide", term="BRCA1[Gene] AND Homo sapiens[Organism]", retmax=5)
results = Entrez.read(handle)
print("Gefundene IDs:", results['IdList'])
handle.close()

# Abruf der Sequenzen für jede ID
for seq_id in results['IdList']:
    handle = Entrez.efetch(db="nucleotide", id=seq_id, rettype="gb", retmode="text")
    seq_record = SeqIO.read(handle, "genbank")
    handle.close()
    print(f"ID: {seq_record.id}, Beschreibung: {seq_record.description}, Sequenz: {seq_record.seq[:50]}")

handle = Entrez.esummary(db="nucleotide", id=results['IdList'][0])
summary = Entrez.read(handle)
print("\n Zusammenfassung:",summary)

handle = Entrez.elink(dbfrom="nucleotide", db="protein", id=results['IdList'][0])
links = Entrez.read(handle)
print("\n Verknüpfte Proteine:",links)

protein_id = links[0]["LinkSetDb"][0]["Link"][0]["Id"]
print("\nAbrufen der ersten Protein-ID:", protein_id)

# Abrufen der Protein-Daten
handle = Entrez.efetch(db="protein", id=protein_id, rettype="gb", retmode="text")
protein_data = handle.read()
handle.close()
print("\nProtein-Daten:\n", protein_data[:500])  # Nur die ersten 500 Zeichen anzeigen

Gefundene IDs: ['262359905', '2814447092', '2814447090', '2814447088', '2811272175']
ID: NG_005905.2, Beschreibung: Homo sapiens BRCA1 DNA repair associated (BRCA1), RefSeqGene (LRG_292) on chromosome 17, Sequenz: TGTGTGTATGAAGTTAACTTCAAAGCAAGCTTCCTGTGCTGAGGGGGTGG
ID: PQ399722.1, Beschreibung: Homo sapiens isolate TWH-4347-0-1 Shortening of BRCA1 (BRCA1) gene, complete cds, Sequenz: ATGGATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGC
ID: PQ399721.1, Beschreibung: Homo sapiens isolate TWH-4530-0-1 Shortening of BRCA1 (BRCA1) gene, complete cds, Sequenz: ATGGATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGC
ID: PQ399720.1, Beschreibung: Homo sapiens isolate TWH-4363-0-1 Shortening of BRCA1 (BRCA1) gene, complete cds, Sequenz: ATGGATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGC
ID: PQ178459.1, Beschreibung: Homo sapiens isolate OV-0685-0-1 breast and ovarian cancer susceptibility protein 1 (BRCA1) mRNA, partial sequence, Sequenz: ATGGATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGC

 Zusammenfas