This is for the search dropdown menu in the manual tab

In [1]:
import sqlite3
import requests
import zipfile
import io
import re
import json

def extract_wavelength(filename):
    # Extract the wavelength using regular expression
    wavelength_match = re.search(r'Raman__([\d.]+)__', filename)
    if wavelength_match:
        return wavelength_match.group(1)
    else:
        return None

def extract_file_number(filename):
    # Extract the file number using regular expression
    file_number_match = re.search(r'__(\d+)\.txt$', filename)
    if file_number_match:
        return file_number_match.group(1)
    else:
        return None

def extract_elements(chemical_formula):
    # Extract elements from chemical formula using regular expression
    element_symbols = re.findall(r'[A-Z][a-z]*', chemical_formula)
    return ', '.join(element_symbols)

conn = sqlite3.connect('RRUFFRaman_databaseSEARCH.db')  # Replace 'your_database.db' with your database filename
cursor = conn.cursor()
create_table_query = '''
    CREATE TABLE IF NOT EXISTS database_tableSEARCH (
        id INTEGER PRIMARY KEY,
        filename TEXT,
        x_data REAL,
        y_data REAL
    )
'''
cursor.execute(create_table_query)
conn.commit()

# List of URLs to download
urls = [
    "https://rruff.info/zipped_data_files/raman/excellent_oriented.zip",
    "https://rruff.info/zipped_data_files/raman/excellent_unoriented.zip",
    "https://rruff.info/zipped_data_files/raman/fair_oriented.zip",
    "https://rruff.info/zipped_data_files/raman/fair_unoriented.zip",
    "https://rruff.info/zipped_data_files/raman/ignore_unoriented.zip",
    "https://rruff.info/zipped_data_files/raman/poor_oriented.zip",
    "https://rruff.info/zipped_data_files/raman/poor_unoriented.zip",
    "https://rruff.info/zipped_data_files/raman/unrated_oriented.zip",
    "https://rruff.info/zipped_data_files/raman/unrated_unoriented.zip"
]

for url in urls:
    response = requests.get(url)
    if response.status_code == 200:
        with io.BytesIO(response.content) as zip_stream:
            with zipfile.ZipFile(zip_stream) as zip_ref:
                for filename in zip_ref.namelist():
                    try:
                        with zip_ref.open(filename) as file:
                            content = file.read().decode('utf-8')  # Try to decode as UTF-8
                    except UnicodeDecodeError:
                        print(f"Skipping file {filename} due to UnicodeDecodeError.")
                        continue  # Skip this file and move to the next one
                    
                    lines = content.split('\n')
                    
                    # Prepare to collect data for insertion
                    data_to_insert = []

                    for line in lines:
                        if line.startswith("##") or line.strip() == "":
                            continue  # Skip metadata and empty lines
                        try:
                            x, y = map(float, line.split(', '))
                            data_to_insert.append((x, y))
                        except ValueError:
                            print(f"Skipping line due to ValueError: {line}")

                    if data_to_insert:
                        cursor.execute("INSERT INTO database_tableSEARCH (filename, x_data, y_data) VALUES (?, ?, ?)",
                                       (filename, json.dumps([x for x, _ in data_to_insert]), json.dumps([y for _, y in data_to_insert])))

        conn.commit()
        print(f"Data from {url} processed successfully.")
    else:
        print(f"Failed to download data from {url}")

conn.close()


Data from https://rruff.info/zipped_data_files/raman/excellent_oriented.zip processed successfully.
Skipping line due to ValueError: 800, -
Data from https://rruff.info/zipped_data_files/raman/excellent_unoriented.zip processed successfully.
Data from https://rruff.info/zipped_data_files/raman/fair_oriented.zip processed successfully.
Data from https://rruff.info/zipped_data_files/raman/fair_unoriented.zip processed successfully.
Data from https://rruff.info/zipped_data_files/raman/ignore_unoriented.zip processed successfully.
Data from https://rruff.info/zipped_data_files/raman/poor_oriented.zip processed successfully.
Data from https://rruff.info/zipped_data_files/raman/poor_unoriented.zip processed successfully.
Skipping file Minium__P001241__Raman__Reference_PDF__40850.pdf due to UnicodeDecodeError.
Skipping file Huntite__P000080__Raman__Reference_PDF__40848.pdf due to UnicodeDecodeError.
Skipping file Heklaite__P001140__Raman__Reference_PDF__40849.pdf due to UnicodeDecodeError.
Sk