In [19]:
#response_excellentoriented = requests.get(excellentoriented_url)
#response_excellentunoriented = requests.get(excellentunoriented_url)
#response_fairoriented = requests.get(fairoriented_url)
#response_fairunoriented = requests.get(fairunoriented_url)

import sqlite3
import requests
import zipfile
import io
import re
import json

def extract_wavelength(filename):
    # Extract the wavelength using regular expression
    wavelength_match = re.search(r'Raman__([\d.]+)__', filename)
    if wavelength_match:
        return wavelength_match.group(1)
    else:
        return None

def extract_file_number(filename):
    # Extract the file number using regular expression
    file_number_match = re.search(r'__(\d+)\.txt$', filename)
    if file_number_match:
        return file_number_match.group(1)
    else:
        return None

def extract_elements(chemical_formula):
    # Extract elements from chemical formula using regular expression
    element_symbols = re.findall(r'[A-Z][a-z]*', chemical_formula)
    return ', '.join(element_symbols)

conn = sqlite3.connect('RRUFFRaman_database.db')  # Replace 'your_database.db' with your database filename
cursor = conn.cursor()
create_table_query = '''
    CREATE TABLE IF NOT EXISTS database_table (
        id INTEGER PRIMARY KEY,
        filename TEXT,
        mineral_name TEXT,
        rruff_id TEXT,
        wavelength TEXT,
        orientation TEXT,
        file_number TEXT,
        elements TEXT,
        x_data REAL,
        y_data REAL
    )
'''
cursor.execute(create_table_query)
conn.commit()

excellentoriented_url = "https://rruff.info/zipped_data_files/raman/excellent_oriented.zip"
excellentunoriented_url = "https://rruff.info/zipped_data_files/raman/excellent_oriented.zip"
fairoriented_url = "https://rruff.info/zipped_data_files/raman/fair_oriented.zip"
fairunoriented_url = "https://rruff.info/zipped_data_files/raman/fair_oriented.zip"

response_excellentoriented = requests.get(excellentoriented_url)
response_excellentunoriented = requests.get(excellentunoriented_url)
response_fairoriented = requests.get(fairoriented_url)
response_fairunoriented = requests.get(fairunoriented_url)

if (response_excellentoriented.status_code == 200) and \
   (response_excellentunoriented.status_code == 200) and \
   (response_fairoriented.status_code == 200) and \
   (response_fairunoriented.status_code == 200):
    with io.BytesIO(response_excellentoriented.content) as zip_stream:
        with zipfile.ZipFile(zip_stream) as zip_ref:
            for filename in zip_ref.namelist():
                with zip_ref.open(filename) as file:
                    content = file.read().decode('utf-8')
                    lines = content.split('\n')
                    mineral_name = filename.split('__')[0]
                    rruff_id = filename.split('__')[1]
                    orientation = filename.split('__')[-3]  # Extract the last part as orientation
                    wavelength = extract_wavelength(filename)
                    file_number = extract_file_number(filename)
                    
                    elements = ""
                    for line in lines:
                        if line.startswith("##IDEAL CHEMISTRY="):
                            elements = line.split('=')[1]
                            elements = extract_elements(elements)
                            break
                    
                    # Store the largest y-values and corresponding x-values
                    largest_y_values = []
                    for line in lines:
                        if not line.startswith("##") and line.strip() != "":
                            x, y = map(float, line.split(', '))
                            if not largest_y_values or y > largest_y_values[-1][0]:
                                if len(largest_y_values) >= 10:
                                    largest_y_values.pop()
                                for i, (y_value, _) in enumerate(largest_y_values):
                                    if y > y_value:
                                        largest_y_values.insert(i, (y, x))
                                        break
                                else:
                                    largest_y_values.append((y, x))

                    # Insert the largest y-values and corresponding x-values into the database
                    for y_value, x_value in largest_y_values:
                        cursor.execute("INSERT INTO database_table (filename, mineral_name, rruff_id, wavelength, orientation, file_number, elements, x_data, y_data) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
                                       (filename, mineral_name, rruff_id, wavelength, orientation, file_number, elements, x_value, y_value))

    conn.commit()
    print("Data download and insertion completed.")
else:
    print("Failed to download data.")

conn.close()

Data download and insertion completed.
